diff --git a/DESCRIPTION b/DESCRIPTION index 31d13a9..5055a80 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: medoutcon Title: Efficient Natural and Interventional Causal Mediation Analysis -Version: 0.2.2 +Version: 0.2.3 Authors@R: c( person("Nima", "Hejazi", email = "nh@nimahejazi.org", role = c("aut", "cre", "cph"), diff --git a/LICENSE b/LICENSE index fc423be..0f8023e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2020-2022 +YEAR: 2020-2024 COPYRIGHT HOLDER: Nima S. Hejazi diff --git a/NEWS.md b/NEWS.md index 38e6044..dc96875 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,21 @@ +# medoutcon 0.2.3 + +* Added a new named argument `cv_stratify` to `est_onestep()` and `est_tml()` + and to the `estimator_args` list-argument in `medoutcon()`, which allows for + stratified folds to be generated for cross-fitting (by passing these to the + `strata_ids` argument of `make_folds()` from the `origami` package). This is + also triggered by an override in `est_onestep()` and `est_tml()` when the + proportion of detected cases is less than 0.1, a heuristic for rare outcomes. +* Increased the default number of folds for cross-fitting from 5 to 10, setting + `cv_folds = 10L` in named arguments to `est_onestep()` and `est_tml()` and to + the `estimator_args` list-argument in `medoutcon()`. +* Changed default propensity score truncation bounds specified in `g_bounds` to + `c(0.005, 0.995)` from `c(0.01, 0.99)` (in v0.22), based on sanity checks and + manual experimentation. +* Wrapped instances of `sl3_Task()` in which `outcome_type = "continuous"` is + specified in `suppressWarnings()` to sink warnings when the outcome variable + for a given nuisance estimation task fails `sl3`'s check for continuous-ness. + # medoutcon 0.2.2 * Change iterative targeting procedures in `est_tml()` to use `glm2::glm2` diff --git a/R/estimators.R b/R/estimators.R index 6cb49a6..d854261 100644 --- a/R/estimators.R +++ b/R/estimators.R @@ -81,7 +81,7 @@ cv_eif <- function(fold, effect_type = c("interventional", "natural"), w_names, m_names, - g_bounds = c(0.01, 0.99)) { + g_bounds = c(0.005, 0.995)) { # make training and validation data train_data <- origami::training(data_in) valid_data <- origami::validation(data_in) @@ -160,9 +160,11 @@ cv_eif <- function(fold, g_star <- g_out$treat_est_valid$treat_pred_A_star[valid_data$R == 1] h_prime <- h_out$treat_est_valid$treat_pred_A_prime g_prime <- g_out$treat_est_valid$treat_pred_A_prime[valid_data$R == 1] - q_prime_Z_one <- q_out$moc_est_valid_Z_one$moc_pred_A_prime[valid_data$R == 1] + q_prime_Z_one <- + q_out$moc_est_valid_Z_one$moc_pred_A_prime[valid_data$R == 1] r_prime_Z_one <- r_out$moc_est_valid_Z_one$moc_pred_A_prime - q_prime_Z_natural <- q_out$moc_est_valid_Z_natural$moc_pred_A_prime[valid_data$R == 1] + q_prime_Z_natural <- + q_out$moc_est_valid_Z_natural$moc_pred_A_prime[valid_data$R == 1] r_prime_Z_natural <- r_out$moc_est_valid_Z_natural$moc_pred_A_prime # need pseudo-outcome regressions with intervention set to a contrast @@ -208,12 +210,14 @@ cv_eif <- function(fold, # predict u(z, a', w) using intervened data with treatment set A = a' # NOTE: here, obs_weights should not include two_phase_weights (?) - u_task_valid_z_interv <- sl3::sl3_Task$new( - data = valid_data_z_interv, - weights = "obs_weights", - covariates = c("Z", "A", w_names), - outcome = "U_pseudo", - outcome_type = "continuous" + suppressWarnings( + u_task_valid_z_interv <- sl3::sl3_Task$new( + data = valid_data_z_interv, + weights = "obs_weights", + covariates = c("Z", "A", w_names), + outcome = "U_pseudo", + outcome_type = "continuous" + ) ) # return partial pseudo-outcome for v nuisance regression @@ -352,7 +356,7 @@ two_phase_eif <- function(R, # for each index in R with R == 0, add a zero at the same index in eif new_eif <- rep(NA, length(R)) eif_idx <- 1 - for (idx in seq_len(length(R))) { + for (idx in seq_along(R)) { if (R[idx] == 1) { new_eif[idx] <- eif[eif_idx] eif_idx <- eif_idx + 1 @@ -443,6 +447,16 @@ two_phase_eif <- function(R, #' conditions on the one-step estimator to be relaxed. For compatibility with #' \code{\link[origami]{make_folds}}, this value specified must be greater #' than or equal to 2; the default is to create 5 folds. +#' @param cv_strat A \code{logical} atomic vector indicating whether V-fold +#' cross-validation should stratify the folds based on the outcome variable. +#' If \code{TRUE}, the folds are stratified by passing the outcome variable to +#' the \code{strata_ids} argument of \code{\link[origami]{make_folds}}. While +#' the default is \code{FALSE}, an override is triggered when the incidence of +#' the binary outcome variable falls below the tolerance in \code{strat_pmin}. +#' @param strat_pmin A \code{numeric} atomic vector indicating a tolerance for +#' the minimum proportion of cases (for a binary outcome variable) below which +#' stratified V-fold cross-validation is invoked if \code{cv_strat} is set to +#' \code{TRUE} (default is \code{FALSE}). The default tolerance is 0.1. #' #' @importFrom assertthat assert_that #' @importFrom stats var weighted.mean @@ -462,18 +476,35 @@ est_onestep <- function(data, w_names, m_names, y_bounds, - g_bounds = c(0.01, 0.99), + g_bounds = c(0.005, 0.995), effect_type = c("interventional", "natural"), svy_weights = NULL, - cv_folds = 5L) { + cv_folds = 10L, + cv_strat = FALSE, + strat_pmin = 0.1) { # make sure that more than one fold is specified assertthat::assert_that(cv_folds > 1L) # create cross-validation folds - folds <- origami::make_folds(data, - fold_fun = origami::folds_vfold, - V = cv_folds - ) + if (cv_strat && data[, mean(Y) <= strat_pmin]) { + # check that outcome is binary for stratified V-fold cross-validation + assertthat::assert_that(data[, all(unique(Y) %in% c(0, 1))]) + + # if outcome is binary and rare, use stratified V-fold cross-validation + folds <- origami::make_folds( + data, + fold_fun = origami::folds_vfold, + V = cv_folds, + strata_ids = data$Y + ) + } else { + # just use standard V-fold cross-validation + folds <- origami::make_folds( + data, + fold_fun = origami::folds_vfold, + V = cv_folds + ) + } # estimate the EIF on a per-fold basis cv_eif_results <- origami::cross_validate( @@ -599,6 +630,16 @@ est_onestep <- function(data, #' conditions on the TML estimator to be relaxed. Note: for compatibility with #' \code{\link[origami]{make_folds}}, this value must be greater than or #' equal to 2; the default is to create 10 folds. +#' @param cv_strat A \code{logical} atomic vector indicating whether V-fold +#' cross-validation should stratify the folds based on the outcome variable. +#' If \code{TRUE}, the folds are stratified by passing the outcome variable to +#' the \code{strata_ids} argument of \code{\link[origami]{make_folds}}. While +#' the default is \code{FALSE}, an override is triggered when the incidence of +#' the binary outcome variable falls below the tolerance in \code{strat_pmin}. +#' @param strat_pmin A \code{numeric} atomic vector indicating a tolerance for +#' the minimum proportion of cases (for a binary outcome variable) below which +#' stratified V-fold cross-validation is invoked if \code{cv_strat} is set to +#' \code{TRUE} (default is \code{FALSE}). The default tolerance is 0.1. #' @param max_iter A \code{numeric} integer giving the maximum number of steps #' to be taken for the iterative procedure to construct a TML estimator. #' @param tiltmod_tol A \code{numeric} indicating the maximum step size to be @@ -626,20 +667,37 @@ est_tml <- function(data, w_names, m_names, y_bounds, - g_bounds = c(0.01, 0.99), + g_bounds = c(0.005, 0.95), effect_type = c("interventional", "natural"), svy_weights = NULL, - cv_folds = 5L, - max_iter = 5L, + cv_folds = 10L, + cv_strat = FALSE, + strat_pmin = 0.1, + max_iter = 10L, tiltmod_tol = 5) { # make sure that more than one fold is specified assertthat::assert_that(cv_folds > 1L) # create cross-validation folds - folds <- origami::make_folds(data, - fold_fun = origami::folds_vfold, - V = cv_folds - ) + if (cv_strat && data[, mean(Y) <= strat_pmin]) { + # check that outcome is binary for stratified V-fold cross-validation + assertthat::assert_that(data[, all(unique(Y) %in% c(0, 1))]) + + # if outcome is binary and rare, use stratified V-fold cross-validation + folds <- origami::make_folds( + data, + fold_fun = origami::folds_vfold, + V = cv_folds, + strata_ids = data$Y + ) + } else { + # just use standard V-fold cross-validation + folds <- origami::make_folds( + data, + fold_fun = origami::folds_vfold, + V = cv_folds + ) + } # perform the cv_eif procedure on a per-fold basis cv_eif_results <- origami::cross_validate( diff --git a/R/fit_mechanisms.R b/R/fit_mechanisms.R index f6ba2b8..8239092 100644 --- a/R/fit_mechanisms.R +++ b/R/fit_mechanisms.R @@ -589,7 +589,8 @@ fit_nuisance_u <- function(train_data, g_star <- g_out$treat_est_train$treat_pred_A_star[train_data$R == 1] h_prime <- h_out$treat_est_train$treat_pred_A_prime g_prime <- g_out$treat_est_train$treat_pred_A_prime[train_data$R == 1] - q_prime_Z_natural <- q_out$moc_est_train_Z_natural$moc_pred_A_prime[train_data$R == 1] + q_prime_Z_natural <- + q_out$moc_est_train_Z_natural$moc_pred_A_prime[train_data$R == 1] r_prime_Z_natural <- r_out$moc_est_train_Z_natural$moc_pred_A_prime # remove observations that were not sampled in second stage @@ -618,12 +619,14 @@ fit_nuisance_u <- function(train_data, w_names, "A", "Z", "U_pseudo", "obs_weights" )) - u_task_train <- sl3::sl3_Task$new( - data = u_data_train, - weights = "obs_weights", - covariates = c("Z", "A", w_names), - outcome = "U_pseudo", - outcome_type = "continuous" + suppressWarnings( + u_task_train <- sl3::sl3_Task$new( + data = u_data_train, + weights = "obs_weights", + covariates = c("Z", "A", w_names), + outcome = "U_pseudo", + outcome_type = "continuous" + ) ) ## fit model for nuisance parameter regression on training data @@ -640,12 +643,14 @@ fit_nuisance_u <- function(train_data, w_names, "A", "Z", "U_pseudo", "obs_weights" )) - u_task_valid <- sl3::sl3_Task$new( - data = u_data_valid, - weights = "obs_weights", - covariates = c("Z", "A", w_names), - outcome = "U_pseudo", - outcome_type = "continuous" + suppressWarnings( + u_task_valid <- sl3::sl3_Task$new( + data = u_data_valid, + weights = "obs_weights", + covariates = c("Z", "A", w_names), + outcome = "U_pseudo", + outcome_type = "continuous" + ) ) ## predict from nuisance parameter regression on validation and training data @@ -702,8 +707,10 @@ fit_nuisance_v <- function(train_data, m_names, w_names) { ## extract nuisance estimates necessary for this routrine - q_train_prime_Z_one <- q_out$moc_est_train_Z_one$moc_pred_A_prime[train_data$R == 1] - q_valid_prime_Z_one <- q_out$moc_est_valid_Z_one$moc_pred_A_prime[valid_data$R == 1] + q_train_prime_Z_one <- + q_out$moc_est_train_Z_one$moc_pred_A_prime[train_data$R == 1] + q_valid_prime_Z_one <- + q_out$moc_est_valid_Z_one$moc_pred_A_prime[valid_data$R == 1] # remove observations that were not sampled in second stage train_data <- train_data[R == 1, ] @@ -799,24 +806,28 @@ fit_nuisance_v <- function(train_data, ## build regression tasks for training and validation sets train_data[, V_pseudo := v_pseudo_train] - v_task_train <- sl3::sl3_Task$new( - data = train_data, - weights = "obs_weights", # NOTE: should not include two_phase_weights - covariates = c("A", w_names), - outcome = "V_pseudo", - outcome_type = "continuous" + suppressWarnings( + v_task_train <- sl3::sl3_Task$new( + data = train_data, + weights = "obs_weights", # NOTE: should not include two_phase_weights + covariates = c("A", w_names), + outcome = "V_pseudo", + outcome_type = "continuous" + ) ) # NOTE: independent implementation from ID sets A to a* as done below valid_data[, `:=`( V_pseudo = v_pseudo_valid, A = contrast[2] )] - v_task_valid <- sl3::sl3_Task$new( - data = valid_data, - weights = "obs_weights", # NOTE: should not include two_phase_weights - covariates = c("A", w_names), - outcome = "V_pseudo", - outcome_type = "continuous" + suppressWarnings( + v_task_valid <- sl3::sl3_Task$new( + data = valid_data, + weights = "obs_weights", # NOTE: should not include two_phase_weights + covariates = c("A", w_names), + outcome = "V_pseudo", + outcome_type = "continuous" + ) ) ## fit regression model for v on training task, get predictions on validation @@ -911,8 +922,10 @@ fit_nuisance_d <- function(train_data, g_prime <- g_out$treat_est_train$treat_pred_A_prime[train_data$R == 1] u_prime <- u_out$u_train_pred v_star <- v_out$v_train_pred - q_prime_Z_one <- q_out$moc_est_train_Z_one$moc_pred_A_prime[train_data$R == 1] - q_prime_Z_natural <- q_out$moc_est_train_Z_natural$moc_pred_A_prime[train_data$R == 1] + q_prime_Z_one <- + q_out$moc_est_train_Z_one$moc_pred_A_prime[train_data$R == 1] + q_prime_Z_natural <- + q_out$moc_est_train_Z_natural$moc_pred_A_prime[train_data$R == 1] r_prime_Z_natural <- r_out$moc_est_train_Z_natural$moc_pred_A_prime # NOTE: assuming Z in {0,1}; other cases not supported yet @@ -926,12 +939,14 @@ fit_nuisance_d <- function(train_data, )] # predict u(z, a', w) using intervened data with treatment set A = a' - u_task_train_z_interv <- sl3::sl3_Task$new( - data = train_data_z_interv, - weights = "obs_weights", # NOTE: should not include two_phase_weights - covariates = c("Z", "A", w_names), - outcome = "U_pseudo", - outcome_type = "continuous" + suppressWarnings( + u_task_train_z_interv <- sl3::sl3_Task$new( + data = train_data_z_interv, + weights = "obs_weights", # NOTE: should not include two_phase_weights + covariates = c("Z", "A", w_names), + outcome = "U_pseudo", + outcome_type = "continuous" + ) ) # return partial pseudo-outcome for v nuisance regression @@ -966,12 +981,14 @@ fit_nuisance_d <- function(train_data, # generate the sl3 task # NOTE: Purposefully not adding two-phase sampling weights - d_task_train <- sl3::sl3_Task$new( - data = eif_data_train, - weights = "obs_weights", # NOTE: should not include two_phase_weights - covariates = c(w_names, "A", "Z", "Y"), - outcome = "eif", - outcome_type = "continuous" + suppressWarnings( + d_task_train <- sl3::sl3_Task$new( + data = eif_data_train, + weights = "obs_weights", # NOTE: should not include two_phase_weights + covariates = c(w_names, "A", "Z", "Y"), + outcome = "eif", + outcome_type = "continuous" + ) ) ## fit model for nuisance parameter regression on training data diff --git a/R/medoutcon.R b/R/medoutcon.R index a5a970e..4c43df6 100644 --- a/R/medoutcon.R +++ b/R/medoutcon.R @@ -24,9 +24,9 @@ #' weights corresponding to the inverse probability of the mediator being #' measured. Defaults to a vector of ones. #' @param effect A \code{character} indicating whether to compute the direct or -#' the indirect effect as discussed in . -#' This is ignored when the argument \code{contrast} is provided. By default, -#' the direct effect is estimated. +#' the indirect effects of . This is +#' ignored when the argument \code{contrast} is provided. By default, the +#' direct effect is estimated. #' @param contrast A \code{numeric} double indicating the two values of the #' intervention \code{A} to be compared. The default value of \code{NULL} has #' no effect, as the value of the argument \code{effect} is instead used to @@ -69,16 +69,16 @@ #' contrast-specific parameter) to be computed. Both an efficient one-step #' estimator using cross-fitting and a cross-validated targeted minimum loss #' estimator (TMLE) are available. The default is the TML estimator. -#' @param estimator_args A \code{list} of extra arguments to be passed (via +#' @param estimator_args A \code{list} of additional arguments passed (via #' \code{...}) to the function call for the specified estimator. The default #' is chosen so as to allow the number of folds used to compute the one-step -#' or TML estimators to be easily adjusted. In the case of the TML estimator, -#' the number of update (fluctuation) iterations is limited, and a tolerance -#' is included for the updates introduced by tilting (fluctuation) models. +#' or TML estimators to be adjusted and for stratified cross-validation to be +#' used in cases of rare outcomes. In the case of the TML estimator, the +#' number of update (fluctuation) iterations is limited, and a tolerance is +#' included for the updates introduced by tilting (fluctuation) models. #' @param g_bounds A \code{numeric} vector containing two values, the first #' being the minimum allowable estimated propensity score value and the #' second being the maximum allowable for estimated propensity scores. -#' Defaults to \code{c(0.001, 0.999)}. #' #' @importFrom data.table as.data.table setnames set #' @importFrom sl3 Lrnr_glm_fast Lrnr_hal9001 @@ -139,17 +139,12 @@ medoutcon <- function(W, d_learners = sl3::Lrnr_glm_fast$new(), estimator = c("tmle", "onestep"), estimator_args = list( - cv_folds = 5L, max_iter = 5L, - tiltmod_tol = 5 + cv_folds = 10L, cv_strat = FALSE, strat_pmin = 0.1, + max_iter = 10L, tiltmod_tol = 5 ), - g_bounds = c(0.01, 0.99)) { + g_bounds = c(0.005, 0.995)) { # set defaults estimator <- match.arg(estimator) - estimator_args <- unlist(estimator_args, recursive = FALSE) - est_args_os <- estimator_args[names(estimator_args) %in% - names(formals(est_onestep))] - est_args_tmle <- estimator_args[names(estimator_args) %in% - names(formals(est_tml))] # set constant Z for estimation of the natural (in)direct effects if (is.null(Z)) { @@ -203,8 +198,12 @@ medoutcon <- function(W, est_params <- lapply(contrast_grid, function(contrast) { if (estimator == "onestep") { + # set arguments to pass to one-step workhorse function + estimator_args <- estimator_args[names(estimator_args) %in% + names(formals(est_onestep))] + # EFFICIENT ONE-STEP ESTIMATOR - onestep_est_args <- list( + est_onestep_args <- list( data = data, contrast = contrast, g_learners = g_learners, @@ -222,13 +221,17 @@ medoutcon <- function(W, svy_weights = svy_weights, g_bounds = g_bounds ) - onestep_est_args <- unlist(list(onestep_est_args, est_args_os), + est_onestep_args <- unlist(list(est_onestep_args, estimator_args), recursive = FALSE ) - est_out <- do.call(est_onestep, onestep_est_args) + est_out <- do.call(est_onestep, est_onestep_args) } else if (estimator == "tmle") { + # set arguments to pass to TMLE workhorse function + estimator_args <- estimator_args[names(estimator_args) %in% + names(formals(est_tml))] + # TARGETED MINIMUM LOSS ESTIMATOR - tmle_est_args <- list( + est_tml_args <- list( data = data, contrast = contrast, g_learners = g_learners, @@ -246,10 +249,10 @@ medoutcon <- function(W, svy_weights = svy_weights, g_bounds = g_bounds ) - tmle_est_args <- unlist(list(tmle_est_args, est_args_tmle), + est_tml_args <- unlist(list(est_tml_args, estimator_args), recursive = FALSE ) - est_out <- do.call(est_tml, tmle_est_args) + est_out <- do.call(est_tml, est_tml_args) } # lazily create output as classed list diff --git a/README.Rmd b/README.Rmd index 2bf4c68..ba467eb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -17,7 +17,7 @@ knitr::opts_chunk$set( # R/`medoutcon` -[![R-CMD-check](https://github.com/nhejazi/medoutcon/workflows/R-CMD-check/badge.svg)](https://github.com/nhejazi/medoutcon/actions) +[![R-CMD-check](https://github.com/nhejazi/medoutcon/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/nhejazi/medoutcon/actions/workflows/R-CMD-check.yml) [![Coverage Status](https://img.shields.io/codecov/c/github/nhejazi/medoutcon/master.svg)](https://codecov.io/github/nhejazi/medoutcon?branch=master) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![MIT license](http://img.shields.io/badge/license-MIT-brightgreen.svg)](http://opensource.org/licenses/MIT) @@ -74,11 +74,11 @@ To illustrate how `medoutcon` may be used to estimate stochastic interventional mediator(s) (`M`) and a mediator-outcome confounder (`Z`), consider the following example: -```{r example, warning=FALSE, message=FALSE} +```{r example, warning=FALSE} library(data.table) library(tidyverse) library(medoutcon) -set.seed(1584) +set.seed(02138) # produces a simple data set based on ca causal model with mediation make_example_data <- function(n_obs = 1000) { @@ -109,7 +109,7 @@ make_example_data <- function(n_obs = 1000) { } # set seed and simulate example data -example_data <- make_example_data() +example_data <- make_example_data(n_obs = 5000L) w_names <- str_subset(colnames(example_data), "W") m_names <- str_subset(colnames(example_data), "M") @@ -198,24 +198,24 @@ After using the `medoutcon` R package, please cite the following: author={Hejazi, Nima S and D{\'\i}az, Iv{\'a}n and Rudolph, Kara E}, title = {{medoutcon}: Efficient natural and interventional causal mediation analysis}, - year = {2022}, + year = {2024}, doi = {10.5281/zenodo.5809519}, url = {https://github.com/nhejazi/medoutcon}, - note = {R package version 0.1.6} + note = {R package version 0.2.3} } --- ## License -© 2020-2022 [Nima S. Hejazi](https://nimahejazi.org) +© 2020-2024 [Nima S. Hejazi](https://nimahejazi.org) The contents of this repository are distributed under the MIT license. See below for details: ``` MIT License -Copyright (c) 2020-2022 Nima S. Hejazi +Copyright (c) 2020-2024 Nima S. Hejazi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -240,3 +240,4 @@ SOFTWARE. ## References + diff --git a/README.md b/README.md index 360c6fa..7d35986 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ -[![R-CMD-check](https://github.com/nhejazi/medoutcon/workflows/R-CMD-check/badge.svg)](https://github.com/nhejazi/medoutcon/actions) +[![R-CMD-check](https://github.com/nhejazi/medoutcon/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/nhejazi/medoutcon/actions/workflows/R-CMD-check.yml) [![Coverage Status](https://img.shields.io/codecov/c/github/nhejazi/medoutcon/master.svg)](https://codecov.io/github/nhejazi/medoutcon?branch=master) [![Project Status: Active – The project has reached a stable, usable @@ -36,19 +36,18 @@ only). In the presence of an intermediate mediator-outcome confounder $Z$, itself affected by the treatment $A$, these correspond to the *interventional* (in)direct effects described by Dı́az et al. (2020), though similar (yet less general) effect definitions -and/or estimation strategies have appeared in VanderWeele, Vansteelandt, -and Robins (2014), Rudolph et al. (2017), Zheng and van der Laan (2017), -and Benkeser and Ran (2021). When no intermediate confounders are -present, these effect definitions simplify to the well-studied *natural* -(in)direct effects, and our estimators are analogs of those formulated -by Zheng and van der Laan (2012). Both an efficient one-step -bias-corrected estimator with cross-fitting (Pfanzagl and Wefelmeyer -1985; Zheng and van der Laan 2011; Chernozhukov et al. 2018) and a -cross-validated targeted minimum loss estimator (TMLE) (van der Laan and -Rose 2011; Zheng and van der Laan 2011) are made available. `medoutcon` -integrates with the [`sl3` R package](https://github.com/tlverse/sl3) -(Coyle et al. 2021) to leverage statistical machine learning in the -estimation procedure. +and/or estimation strategies have appeared i`n @`vanderweele2014effect, +Rudolph et al. (2017), Zheng and van der Laan (2017), and Benkeser and +Ran (2021). When no intermediate confounders are present, these effect +definitions simplify to the well-studied *natural* (in)direct effects, +and our estimators are analogs of those formulated by Zheng and van der +Laan (2012). Both an efficient one-step bias-corrected estimator with +cross-fitting (Pfanzagl and Wefelmeyer 1985; Zheng and van der Laan +2011; Chernozhukov et al. 2018) and a cross-validated targeted minimum +loss estimator (TMLE) (van der Laan and Rose 2011; Zheng and van der +Laan 2011) are made available. `medoutcon` integrates with the [`sl3` R +package](https://github.com/tlverse/sl3) (Coyle et al. 2021) to leverage +statistical machine learning in the estimation procedure. ------------------------------------------------------------------------ @@ -73,21 +72,34 @@ confounder (`Z`), consider the following example: ``` r library(data.table) library(tidyverse) -#> ── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.3.2 ── -#> ✔ ggplot2 3.3.6 ✔ purrr 0.3.4 -#> ✔ tibble 3.1.8 ✔ dplyr 1.0.10 -#> ✔ tidyr 1.2.1 ✔ stringr 1.4.1 -#> ✔ readr 2.1.2 ✔ forcats 0.5.2 -#> ── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ── -#> ✖ dplyr::between() masks data.table::between() -#> ✖ dplyr::filter() masks stats::filter() -#> ✖ dplyr::first() masks data.table::first() -#> ✖ dplyr::lag() masks stats::lag() -#> ✖ dplyr::last() masks data.table::last() -#> ✖ purrr::transpose() masks data.table::transpose() +#> ── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ── +#> ✔ dplyr 1.1.3 ✔ readr 2.1.4 +#> ✔ forcats 1.0.0 ✔ stringr 1.5.0 +#> ✔ ggplot2 3.4.4 ✔ tibble 3.2.1 +#> ✔ lubridate 1.9.3 ✔ tidyr 1.3.0 +#> ✔ purrr 1.0.2 +#> ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ── +#> ✖ dplyr::between() masks data.table::between() +#> ✖ dplyr::filter() masks stats::filter() +#> ✖ dplyr::first() masks data.table::first() +#> ✖ lubridate::hour() masks data.table::hour() +#> ✖ lubridate::isoweek() masks data.table::isoweek() +#> ✖ dplyr::lag() masks stats::lag() +#> ✖ dplyr::last() masks data.table::last() +#> ✖ lubridate::mday() masks data.table::mday() +#> ✖ lubridate::minute() masks data.table::minute() +#> ✖ lubridate::month() masks data.table::month() +#> ✖ lubridate::quarter() masks data.table::quarter() +#> ✖ lubridate::second() masks data.table::second() +#> ✖ purrr::transpose() masks data.table::transpose() +#> ✖ lubridate::wday() masks data.table::wday() +#> ✖ lubridate::week() masks data.table::week() +#> ✖ lubridate::yday() masks data.table::yday() +#> ✖ lubridate::year() masks data.table::year() +#> ℹ Use the conflicted package () to force all conflicts to become errors library(medoutcon) -#> medoutcon v0.1.6: Efficient Natural and Interventional Causal Mediation Analysis -set.seed(1584) +#> medoutcon v0.2.3: Efficient Natural and Interventional Causal Mediation Analysis +set.seed(02138) # produces a simple data set based on ca causal model with mediation make_example_data <- function(n_obs = 1000) { @@ -118,49 +130,53 @@ make_example_data <- function(n_obs = 1000) { } # set seed and simulate example data -example_data <- make_example_data() +example_data <- make_example_data(n_obs = 5000L) w_names <- str_subset(colnames(example_data), "W") m_names <- str_subset(colnames(example_data), "M") # quick look at the data head(example_data) #> W_1 W_2 W_3 A Z M Y -#> 1: 1 0 1 0 0 0 1 -#> 2: 0 1 0 0 0 1 0 -#> 3: 1 1 1 1 0 1 1 -#> 4: 0 1 1 0 0 1 0 -#> 5: 0 0 0 0 0 1 1 -#> 6: 1 0 1 1 0 1 0 +#> 1: 1 0 0 0 0 1 0 +#> 2: 0 0 0 0 0 0 1 +#> 3: 1 0 1 1 1 1 0 +#> 4: 1 0 1 1 0 1 1 +#> 5: 1 0 1 0 1 1 1 +#> 6: 1 0 0 0 0 1 0 # compute one-step estimate of the interventional direct effect -os_de <- medoutcon(W = example_data[, ..w_names], - A = example_data$A, - Z = example_data$Z, - M = example_data[, ..m_names], - Y = example_data$Y, - effect = "direct", - estimator = "onestep") +os_de <- medoutcon( + W = example_data[, ..w_names], + A = example_data$A, + Z = example_data$Z, + M = example_data[, ..m_names], + Y = example_data$Y, + effect = "direct", + estimator = "onestep" +) os_de #> Interventional Direct Effect #> Estimator: onestep -#> Estimate: -0.065 -#> Std. Error: 0.054 -#> 95% CI: [-0.17, 0.041] +#> Estimate: -0.101 +#> Std. Error: 0.028 +#> 95% CI: [-0.156, -0.045] # compute targeted minimum loss estimate of the interventional direct effect -tmle_de <- medoutcon(W = example_data[, ..w_names], - A = example_data$A, - Z = example_data$Z, - M = example_data[, ..m_names], - Y = example_data$Y, - effect = "direct", - estimator = "tmle") +tmle_de <- medoutcon( + W = example_data[, ..w_names], + A = example_data$A, + Z = example_data$Z, + M = example_data[, ..m_names], + Y = example_data$Y, + effect = "direct", + estimator = "tmle" +) tmle_de #> Interventional Direct Effect #> Estimator: tmle -#> Estimate: -0.06 -#> Std. Error: 0.058 -#> 95% CI: [-0.173, 0.053] +#> Estimate: -0.103 +#> Std. Error: 0.029 +#> 95% CI: [-0.16, -0.046] ``` For details on how to use data adaptive regression (machine learning) @@ -220,24 +236,24 @@ After using the `medoutcon` R package, please cite the following: author={Hejazi, Nima S and D{\'\i}az, Iv{\'a}n and Rudolph, Kara E}, title = {{medoutcon}: Efficient natural and interventional causal mediation analysis}, - year = {2022}, + year = {2024}, doi = {10.5281/zenodo.5809519}, url = {https://github.com/nhejazi/medoutcon}, - note = {R package version 0.1.6} + note = {R package version 0.2.3} } ------------------------------------------------------------------------ ## License -© 2020-2022 [Nima S. Hejazi](https://nimahejazi.org) +© 2020-2024 [Nima S. Hejazi](https://nimahejazi.org) The contents of this repository are distributed under the MIT license. See below for details: MIT License - Copyright (c) 2020-2022 Nima S. Hejazi + Copyright (c) 2020-2024 Nima S. Hejazi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -261,7 +277,8 @@ See below for details: ## References -
+
@@ -284,8 +301,8 @@ Parameters.” *The Econometrics Journal* 21 (1).
Coyle, Jeremy R, Nima S Hejazi, Ivana Malenica, Rachael V Phillips, and -Oleg Sofrygin. 2021. *`sl3`: Modern Machine Learning Pipelines for Super -Learning* (version 1.4.4). . +Oleg Sofrygin. 2021. “`sl3`: Modern Machine Learning Pipelines for Super +Learning.” .
@@ -323,15 +340,6 @@ Business Media.
-
- -VanderWeele, Tyler J, Stijn Vansteelandt, and James M Robins. 2014. -“Effect Decomposition in the Presence of an Exposure-Induced -Mediator-Outcome Confounder.” *Epidemiology* 25 (2): 300. -. - -
-
Zheng, Wenjing, and Mark J van der Laan. 2011. “Cross-Validated Targeted diff --git a/docs/404.html b/docs/404.html index 64c9b87..573edae 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ medoutcon - 0.2.0 + 0.2.3
@@ -100,7 +100,7 @@

Page not found (404)

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html index 7f7a227..b01126d 100644 --- a/docs/CONTRIBUTING.html +++ b/docs/CONTRIBUTING.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3
@@ -114,7 +114,7 @@

Pull requests -

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 381fb1c..c58adec 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -56,7 +56,7 @@

License

-
YEAR: 2020-2022
+
YEAR: 2020-2024
 COPYRIGHT HOLDER: Nima S. Hejazi
 
@@ -75,7 +75,7 @@

License

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 26f1f61..b10d36b 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -79,7 +79,7 @@

MIT License

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/articles/index.html b/docs/articles/index.html index f4ff169..2920a16 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -72,7 +72,7 @@

All vignettes

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/articles/intro_medoutcon.html b/docs/articles/intro_medoutcon.html index 2ce69dc..8373de6 100644 --- a/docs/articles/intro_medoutcon.html +++ b/docs/articles/intro_medoutcon.html @@ -33,7 +33,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -84,7 +84,7 @@

Nima Hejazi, Iván Díaz, and Kara Rudolph

-

2022-10-12

+

2024-03-17

Source: vignettes/intro_medoutcon.Rmd @@ -306,8 +306,8 @@

Setting up the data exampley <- rbinom(n_obs, 1, plogis(1 / (rowSums(w) - z + a + m))) ## construct output - dat <- as.data.table(cbind(w = w, a = a, z = z, m = m, y = y)) - setnames(dat, c(w_names, "A", "Z", m_names, "Y")) + dat <- as.data.table(cbind(w = w, a = a, z = z, m = m, y = y)) + setnames(dat, c(w_names, "A", "Z", m_names, "Y")) return(dat) } @@ -392,8 +392,10 @@

Ensemble learning of nuisance f hal_bounded_lrnr <- Pipeline$new(hal_gaussian_lrnr, bound_lrnr) # create learner library and instantiate super learner ensemble -lrnr_lib <- Stack$new(mean_lrnr, fglm_lrnr, enet_lrnr, lasso_lrnr, - rf_lrnr, hal_bounded_lrnr) +lrnr_lib <- Stack$new( + mean_lrnr, fglm_lrnr, enet_lrnr, lasso_lrnr, + rf_lrnr, hal_bounded_lrnr +) sl_lrnr <- Lrnr_sl$new(learners = lrnr_lib, metalearner = Lrnr_nnls$new())

While we recommend the use of a Super Learner ensemble model like the one constructed above in practice, such a library will be too @@ -443,58 +445,62 @@

Estimating the direct effect
 # compute one-step estimate of the interventional direct effect
-os_de <- medoutcon(W = example_data[, ..w_names],
-                   A = example_data$A,
-                   Z = example_data$Z,
-                   M = example_data[, ..m_names],
-                   Y = example_data$Y,
-                   g_learners = sl_lrnr,
-                   h_learners = sl_lrnr,
-                   b_learners = sl_lrnr,
-                   q_learners = sl_lrnr,
-                   r_learners = sl_lrnr,
-                   effect = "direct",
-                   estimator = "onestep",
-                   estimator_args = list(cv_folds = 2))
+os_de <- medoutcon(
+  W = example_data[, ..w_names],
+  A = example_data$A,
+  Z = example_data$Z,
+  M = example_data[, ..m_names],
+  Y = example_data$Y,
+  g_learners = sl_lrnr,
+  h_learners = sl_lrnr,
+  b_learners = sl_lrnr,
+  q_learners = sl_lrnr,
+  r_learners = sl_lrnr,
+  effect = "direct",
+  estimator = "onestep",
+  estimator_args = list(cv_folds = 2)
+)
 summary(os_de)
## # A tibble: 1 × 7
-##    lwr_ci param_est upr_ci var_est  eif_mean estimator param                
-##     <dbl>     <dbl>  <dbl>   <dbl>     <dbl> <chr>     <chr>                
-## 1 -0.0935    0.0147  0.123 0.00305 -3.53e-17 onestep   direct_interventional
+## lwr_ci param_est upr_ci var_est eif_mean estimator param +## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> +## 1 -0.0862 0.0293 0.145 0.00347 3.38e-17 onestep direct_interventional

From the output of the summary method, we note that the one-step estimate of the interventional direct effect \(\hat{\theta}_{\text{os}}^{\text{DE}}\) is -0.015, with 95% confidence interval [-0.094, 0.123].

+0.029, with 95% confidence interval [-0.086, 0.145].

Next, let’s compare the one-step estimate to the TML estimate. Analogous to the case of the one-step estimator, the TML estimator can be evaluated via a single call to the medoutcon function:

 # compute targeted minimum loss estimate of the interventional direct effect
-tmle_de <- medoutcon(W = example_data[, ..w_names],
-                     A = example_data$A,
-                     Z = example_data$Z,
-                     M = example_data[, ..m_names],
-                     Y = example_data$Y,
-                     g_learners = sl_lrnr,
-                     h_learners = sl_lrnr,
-                     b_learners = sl_lrnr,
-                     q_learners = sl_lrnr,
-                     r_learners = sl_lrnr,
-                     effect = "direct",
-                     estimator = "tmle",
-                     estimator_args = list(cv_folds = 2, max_iter = 5))
+tmle_de <- medoutcon(
+  W = example_data[, ..w_names],
+  A = example_data$A,
+  Z = example_data$Z,
+  M = example_data[, ..m_names],
+  Y = example_data$Y,
+  g_learners = sl_lrnr,
+  h_learners = sl_lrnr,
+  b_learners = sl_lrnr,
+  q_learners = sl_lrnr,
+  r_learners = sl_lrnr,
+  effect = "direct",
+  estimator = "tmle",
+  estimator_args = list(cv_folds = 2, max_iter = 5)
+)
 summary(tmle_de)
## # A tibble: 1 × 7
 ##    lwr_ci param_est upr_ci var_est eif_mean estimator param                
 ##     <dbl>     <dbl>  <dbl>   <dbl>    <dbl> <chr>     <chr>                
-## 1 -0.0757    0.0247  0.125 0.00262 -0.00234 tmle      direct_interventional
+## 1 -0.0941 0.0197 0.134 0.00337 0.00354 tmle direct_interventional

From the output of the summary method, we note that the TML estimate of the interventional direct effect \(\hat{\theta}_{\text{tmle}}^{\text{DE}}\) is -0.025, with 95% confidence interval [-0.076, 0.125]. Here, we recall -that the TML estimator generally exhibits better finite-sample -performance than the one-step estimator (van der -Laan and Rose 2011, 2018), so the TML estimate is likely to be -more reliable in our modest sample size of \(n +0.02, with 95% confidence interval [-0.094, 0.134]. Here, we recall that +the TML estimator generally exhibits better finite-sample performance +than the one-step estimator (van der Laan and +Rose 2011, 2018), so the TML estimate is likely to be more +reliable in our modest sample size of \(n =\) 500.

@@ -525,59 +531,63 @@

Estimating the indirect effect
 # compute one-step estimate of the interventional indirect effect
-os_ie <- medoutcon(W = example_data[, ..w_names],
-                   A = example_data$A,
-                   Z = example_data$Z,
-                   M = example_data[, ..m_names],
-                   Y = example_data$Y,
-                   g_learners = sl_lrnr,
-                   h_learners = sl_lrnr,
-                   b_learners = sl_lrnr,
-                   q_learners = sl_lrnr,
-                   r_learners = sl_lrnr,
-                   effect = "indirect",
-                   estimator = "onestep")
+os_ie <- medoutcon(
+  W = example_data[, ..w_names],
+  A = example_data$A,
+  Z = example_data$Z,
+  M = example_data[, ..m_names],
+  Y = example_data$Y,
+  g_learners = sl_lrnr,
+  h_learners = sl_lrnr,
+  b_learners = sl_lrnr,
+  q_learners = sl_lrnr,
+  r_learners = sl_lrnr,
+  effect = "indirect",
+  estimator = "onestep"
+)
 summary(os_ie)

## # A tibble: 1 × 7
 ##   lwr_ci param_est  upr_ci var_est  eif_mean estimator param                  
 ##    <dbl>     <dbl>   <dbl>   <dbl>     <dbl> <chr>     <chr>                  
-## 1 -0.241    -0.161 -0.0813 0.00167 -3.12e-17 onestep   indirect_interventional
+## 1 -0.206 -0.143 -0.0802 0.00103 -5.42e-17 onestep indirect_interventional

From the output of the summary method, we note that the one-step estimate of the interventional indirect effect \(\hat{\theta}_{\text{os}}^{\text{IE}}\) is --0.161, with 95% confidence interval [-0.241, -0.081].

+-0.143, with 95% confidence interval [-0.206, -0.08].

As before, let’s compare the one-step estimate to the TML estimate. Analogous to the case of the one-step estimator, the TML estimator can be evaluated via a single call to the medoutcon function, as demonstrated below

 # compute targeted minimum loss estimate of the interventional indirect effect
-tmle_ie <- medoutcon(W = example_data[, ..w_names],
-                     A = example_data$A,
-                     Z = example_data$Z,
-                     M = example_data[, ..m_names],
-                     Y = example_data$Y,
-                     g_learners = sl_lrnr,
-                     h_learners = sl_lrnr,
-                     b_learners = sl_lrnr,
-                     q_learners = sl_lrnr,
-                     r_learners = sl_lrnr,
-                     effect = "indirect",
-                     estimator = "tmle")
+tmle_ie <- medoutcon(
+  W = example_data[, ..w_names],
+  A = example_data$A,
+  Z = example_data$Z,
+  M = example_data[, ..m_names],
+  Y = example_data$Y,
+  g_learners = sl_lrnr,
+  h_learners = sl_lrnr,
+  b_learners = sl_lrnr,
+  q_learners = sl_lrnr,
+  r_learners = sl_lrnr,
+  effect = "indirect",
+  estimator = "tmle"
+)
 summary(tmle_ie)
## # A tibble: 1 × 7
 ##   lwr_ci param_est  upr_ci var_est eif_mean estimator param                  
 ##    <dbl>     <dbl>   <dbl>   <dbl>    <dbl> <chr>     <chr>                  
-## 1 -0.204    -0.136 -0.0671 0.00122 0.000383 tmle      indirect_interventional
+## 1 -0.197 -0.121 -0.0451 0.00151 -0.00834 tmle indirect_interventional

From the output of the summary method, we note that the TML estimate of the interventional indirect effect \(\hat{\theta}_{\text{tmle}}^{\text{IE}}\) is --0.136, with 95% confidence interval [-0.204, -0.067]. As before, the +-0.121, with 95% confidence interval [-0.197, -0.045]. As before, the TML estimator provides better finite-sample performance than the one-step estimator, so it may be preferred in this example.

References

-
+
Chernozhukov, Victor, Denis Chetverikov, Mert Demirer, Esther Duflo, Christian Hansen, Whitney Newey, and James Robins. 2018. @@ -649,7 +659,7 @@

References

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/authors.html b/docs/authors.html index 4bf2674..2a6388e 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3
@@ -87,15 +87,15 @@

Citation

-

Hejazi NS, Díaz I, Rudolph KE (2022). +

Hejazi NS, Díaz I, Rudolph KE (2024). medoutcon: Efficient causal mediation analysis for the natural and interventional effects. -doi:10.5281/zenodo.5809519, R package version 0.2.0, https://github.com/nhejazi/medoutcon. +doi:10.5281/zenodo.5809519, R package version 0.2.3, https://github.com/nhejazi/medoutcon.

@Manual{,
   title = {{medoutcon}: Efficient causal mediation analysis for the natural and interventional effects},
   author = {Nima S Hejazi and Iván Díaz and Kara E Rudolph},
-  year = {2022},
-  note = {R package version 0.2.0},
+  year = {2024},
+  note = {R package version 0.2.3},
   doi = {10.5281/zenodo.5809519},
   url = {https://github.com/nhejazi/medoutcon},
 }
@@ -137,7 +137,7 @@

Citation

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/index.html b/docs/index.html index 63f9d21..d1a5cd9 100644 --- a/docs/index.html +++ b/docs/index.html @@ -44,7 +44,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -102,7 +102,7 @@

What’s medoutcon?

-

The medoutcon R package provides facilities for efficient estimation of path-specific (in)direct effects that measure the impact of a treatment variable A on an outcome variable Y, through a direct path (through A only) and an indirect path (through a set of mediators M only). In the presence of an intermediate mediator-outcome confounder Z, itself affected by the treatment A, these correspond to the interventional (in)direct effects described by Dı́az et al. (2020), though similar (yet less general) effect definitions and/or estimation strategies have appeared in VanderWeele, Vansteelandt, and Robins (2014), Rudolph et al. (2017), Zheng and van der Laan (2017), and Benkeser and Ran (2021). When no intermediate confounders are present, these effect definitions simplify to the well-studied natural (in)direct effects, and our estimators are analogs of those formulated by Zheng and van der Laan (2012). Both an efficient one-step bias-corrected estimator with cross-fitting (Pfanzagl and Wefelmeyer 1985; Zheng and van der Laan 2011; Chernozhukov et al. 2018) and a cross-validated targeted minimum loss estimator (TMLE) (van der Laan and Rose 2011; Zheng and van der Laan 2011) are made available. medoutcon integrates with the sl3 R package (Coyle et al. 2021) to leverage statistical machine learning in the estimation procedure.

+

The medoutcon R package provides facilities for efficient estimation of path-specific (in)direct effects that measure the impact of a treatment variable A on an outcome variable Y, through a direct path (through A only) and an indirect path (through a set of mediators M only). In the presence of an intermediate mediator-outcome confounder Z, itself affected by the treatment A, these correspond to the interventional (in)direct effects described by Dı́az et al. (2020), though similar (yet less general) effect definitions and/or estimation strategies have appeared in @vanderweele2014effect, Rudolph et al. (2017), Zheng and van der Laan (2017), and Benkeser and Ran (2021). When no intermediate confounders are present, these effect definitions simplify to the well-studied natural (in)direct effects, and our estimators are analogs of those formulated by Zheng and van der Laan (2012). Both an efficient one-step bias-corrected estimator with cross-fitting (Pfanzagl and Wefelmeyer 1985; Zheng and van der Laan 2011; Chernozhukov et al. 2018) and a cross-validated targeted minimum loss estimator (TMLE) (van der Laan and Rose 2011; Zheng and van der Laan 2011) are made available. medoutcon integrates with the sl3 R package (Coyle et al. 2021) to leverage statistical machine learning in the estimation procedure.


@@ -120,21 +120,34 @@

Example
 library(data.table)
 library(tidyverse)
-#> ── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.3.2 ──
-#> ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
-#> ✔ tibble  3.1.8      ✔ dplyr   1.0.10
-#> ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
-#> ✔ readr   2.1.2      ✔ forcats 0.5.2 
-#> ── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ──
-#> ✖ dplyr::between()   masks data.table::between()
-#> ✖ dplyr::filter()    masks stats::filter()
-#> ✖ dplyr::first()     masks data.table::first()
-#> ✖ dplyr::lag()       masks stats::lag()
-#> ✖ dplyr::last()      masks data.table::last()
-#> ✖ purrr::transpose() masks data.table::transpose()
+#> ── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
+#> ✔ dplyr     1.1.3     ✔ readr     2.1.4
+#> ✔ forcats   1.0.0     ✔ stringr   1.5.0
+#> ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
+#> ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
+#> ✔ purrr     1.0.2     
+#> ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
+#> ✖ dplyr::between()     masks data.table::between()
+#> ✖ dplyr::filter()      masks stats::filter()
+#> ✖ dplyr::first()       masks data.table::first()
+#> ✖ lubridate::hour()    masks data.table::hour()
+#> ✖ lubridate::isoweek() masks data.table::isoweek()
+#> ✖ dplyr::lag()         masks stats::lag()
+#> ✖ dplyr::last()        masks data.table::last()
+#> ✖ lubridate::mday()    masks data.table::mday()
+#> ✖ lubridate::minute()  masks data.table::minute()
+#> ✖ lubridate::month()   masks data.table::month()
+#> ✖ lubridate::quarter() masks data.table::quarter()
+#> ✖ lubridate::second()  masks data.table::second()
+#> ✖ purrr::transpose()   masks data.table::transpose()
+#> ✖ lubridate::wday()    masks data.table::wday()
+#> ✖ lubridate::week()    masks data.table::week()
+#> ✖ lubridate::yday()    masks data.table::yday()
+#> ✖ lubridate::year()    masks data.table::year()
+#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
 library(medoutcon)
-#> medoutcon v0.1.6: Efficient Natural and Interventional Causal Mediation Analysis
-set.seed(1584)
+#> medoutcon v0.2.3: Efficient Natural and Interventional Causal Mediation Analysis
+set.seed(02138)
 
 # produces a simple data set based on ca causal model with mediation
 make_example_data <- function(n_obs = 1000) {
@@ -159,55 +172,59 @@ 

Example y <- rbinom(n_obs, 1, plogis(1 / (rowSums(w) - z + a + m))) ## construct output - dat <- as.data.table(cbind(w = w, a = a, z = z, m = m, y = y)) - setnames(dat, c(w_names, "A", "Z", m_names, "Y")) + dat <- as.data.table(cbind(w = w, a = a, z = z, m = m, y = y)) + setnames(dat, c(w_names, "A", "Z", m_names, "Y")) return(dat) } # set seed and simulate example data -example_data <- make_example_data() +example_data <- make_example_data(n_obs = 5000L) w_names <- str_subset(colnames(example_data), "W") m_names <- str_subset(colnames(example_data), "M") # quick look at the data head(example_data) #> W_1 W_2 W_3 A Z M Y -#> 1: 1 0 1 0 0 0 1 -#> 2: 0 1 0 0 0 1 0 -#> 3: 1 1 1 1 0 1 1 -#> 4: 0 1 1 0 0 1 0 -#> 5: 0 0 0 0 0 1 1 -#> 6: 1 0 1 1 0 1 0 +#> 1: 1 0 0 0 0 1 0 +#> 2: 0 0 0 0 0 0 1 +#> 3: 1 0 1 1 1 1 0 +#> 4: 1 0 1 1 0 1 1 +#> 5: 1 0 1 0 1 1 1 +#> 6: 1 0 0 0 0 1 0 # compute one-step estimate of the interventional direct effect -os_de <- medoutcon(W = example_data[, ..w_names], - A = example_data$A, - Z = example_data$Z, - M = example_data[, ..m_names], - Y = example_data$Y, - effect = "direct", - estimator = "onestep") +os_de <- medoutcon( + W = example_data[, ..w_names], + A = example_data$A, + Z = example_data$Z, + M = example_data[, ..m_names], + Y = example_data$Y, + effect = "direct", + estimator = "onestep" +) os_de #> Interventional Direct Effect #> Estimator: onestep -#> Estimate: -0.065 -#> Std. Error: 0.054 -#> 95% CI: [-0.17, 0.041] +#> Estimate: -0.101 +#> Std. Error: 0.028 +#> 95% CI: [-0.156, -0.045] # compute targeted minimum loss estimate of the interventional direct effect -tmle_de <- medoutcon(W = example_data[, ..w_names], - A = example_data$A, - Z = example_data$Z, - M = example_data[, ..m_names], - Y = example_data$Y, - effect = "direct", - estimator = "tmle") +tmle_de <- medoutcon( + W = example_data[, ..w_names], + A = example_data$A, + Z = example_data$Z, + M = example_data[, ..m_names], + Y = example_data$Y, + effect = "direct", + estimator = "tmle" +) tmle_de #> Interventional Direct Effect #> Estimator: tmle -#> Estimate: -0.06 -#> Std. Error: 0.058 -#> 95% CI: [-0.173, 0.053]

+#> Estimate: -0.103 +#> Std. Error: 0.029 +#> 95% CI: [-0.16, -0.046]

For details on how to use data adaptive regression (machine learning) techniques in the estimation of nuisance parameters, consider consulting the vignette that accompanies the package.


@@ -227,76 +244,76 @@

ContributionsCitation

After using the medoutcon R package, please cite the following:

-
    @article{diaz2020nonparametric,
-      title={Non-parametric efficient causal mediation with intermediate
-        confounders},
-      author={D{\'\i}az, Iv{\'a}n and Hejazi, Nima S and Rudolph, Kara E
-        and {van der Laan}, Mark J},
-      year={2020},
-      url = {https://arxiv.org/abs/1912.09936},
-      doi = {10.1093/biomet/asaa085},
-      journal={Biometrika},
-      volume = {108},
-      number = {3},
-      pages = {627--641},
-      publisher={Oxford University Press}
-    }
-
-    @article{hejazi2022medoutcon-joss,
-      author = {Hejazi, Nima S and Rudolph, Kara E and D{\'\i}az,
-        Iv{\'a}n},
-      title = {{medoutcon}: Nonparametric efficient causal mediation
-        analysis with machine learning in {R}},
-      year = {2022},
-      doi = {10.21105/joss.03979},
-      url = {https://doi.org/10.21105/joss.03979},
-      journal = {Journal of Open Source Software},
-      publisher = {The Open Journal}
-    }
-
-    @software{hejazi2022medoutcon-rpkg,
-      author={Hejazi, Nima S and D{\'\i}az, Iv{\'a}n and Rudolph, Kara E},
-      title = {{medoutcon}: Efficient natural and interventional causal
-        mediation analysis},
-      year  = {2022},
-      doi = {10.5281/zenodo.5809519},
-      url = {https://github.com/nhejazi/medoutcon},
-      note = {R package version 0.1.6}
-    }
+
    @article{diaz2020nonparametric,
+      title={Non-parametric efficient causal mediation with intermediate
+        confounders},
+      author={D{\'\i}az, Iv{\'a}n and Hejazi, Nima S and Rudolph, Kara E
+        and {van der Laan}, Mark J},
+      year={2020},
+      url = {https://arxiv.org/abs/1912.09936},
+      doi = {10.1093/biomet/asaa085},
+      journal={Biometrika},
+      volume = {108},
+      number = {3},
+      pages = {627--641},
+      publisher={Oxford University Press}
+    }
+
+    @article{hejazi2022medoutcon-joss,
+      author = {Hejazi, Nima S and Rudolph, Kara E and D{\'\i}az,
+        Iv{\'a}n},
+      title = {{medoutcon}: Nonparametric efficient causal mediation
+        analysis with machine learning in {R}},
+      year = {2022},
+      doi = {10.21105/joss.03979},
+      url = {https://doi.org/10.21105/joss.03979},
+      journal = {Journal of Open Source Software},
+      publisher = {The Open Journal}
+    }
+
+    @software{hejazi2022medoutcon-rpkg,
+      author={Hejazi, Nima S and D{\'\i}az, Iv{\'a}n and Rudolph, Kara E},
+      title = {{medoutcon}: Efficient natural and interventional causal
+        mediation analysis},
+      year  = {2024},
+      doi = {10.5281/zenodo.5809519},
+      url = {https://github.com/nhejazi/medoutcon},
+      note = {R package version 0.2.3}
+    }

License

-

© 2020-2022 Nima S. Hejazi

+

© 2020-2024 Nima S. Hejazi

The contents of this repository are distributed under the MIT license. See below for details:

-
MIT License
-
-Copyright (c) 2020-2022 Nima S. Hejazi
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+
MIT License
+
+Copyright (c) 2020-2024 Nima S. Hejazi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

References

-
+
Benkeser, David, and Jialu Ran. 2021. “Nonparametric Inference for Interventional Effects with Multiple Mediators.” Journal of Causal Inference. https://doi.org/10.1515/jci-2020-0018.
@@ -304,7 +321,7 @@

Referenceshttps://doi.org/10.1111/ectj.12097.

-Coyle, Jeremy R, Nima S Hejazi, Ivana Malenica, Rachael V Phillips, and Oleg Sofrygin. 2021. sl3: Modern Machine Learning Pipelines for Super Learning (version 1.4.4). https://doi.org/10.5281/zenodo.1342293. +Coyle, Jeremy R, Nima S Hejazi, Ivana Malenica, Rachael V Phillips, and Oleg Sofrygin. 2021. “sl3: Modern Machine Learning Pipelines for Super Learning.” https://doi.org/10.5281/zenodo.1342293.
Dı́az, Iván, Nima S Hejazi, Kara E Rudolph, and Mark J van der Laan. 2020. “Non-Parametric Efficient Causal Mediation with Intermediate Confounders.” Biometrika 108 (3): 627–41. https://doi.org/10.1093/biomet/asaa085. @@ -318,9 +335,6 @@

References van der Laan, Mark J, and Sherri Rose. 2011. Targeted Learning: Causal Inference for Observational and Experimental Data. Springer Science & Business Media.

-
-VanderWeele, Tyler J, Stijn Vansteelandt, and James M Robins. 2014. “Effect Decomposition in the Presence of an Exposure-Induced Mediator-Outcome Confounder.” Epidemiology 25 (2): 300. https://doi.org/10.1097/ede.0000000000000034. -
Zheng, Wenjing, and Mark J van der Laan. 2011. “Cross-Validated Targeted Minimum-Loss-Based Estimation.” In Targeted Learning: Causal Inference for Observational and Experimental Data, 459–74. Springer. https://doi.org/10.1007/978-1-4419-9782-1_27.
@@ -379,7 +393,7 @@

Developers

Dev status

    -
  • R-CMD-check
  • +
  • R-CMD-check
  • Coverage Status
  • Project Status: Active – The project has reached a stable, usable state and is being actively developed.
  • MIT license
  • @@ -399,7 +413,7 @@

    Dev status

    -

    Site built with pkgdown 2.0.6.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/news/index.html b/docs/news/index.html index 945b4ae..c06a457 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3
@@ -57,6 +57,22 @@

Changelog

Source: NEWS.md
+
+ +
  • Added a new named argument cv_stratify to est_onestep() and est_tml() and to the estimator_args list-argument in medoutcon(), which allows for stratified folds to be generated for cross-fitting (by passing these to the strata_ids argument of make_folds() from the origami package). This is also triggered by an override in est_onestep() and est_tml() when the proportion of detected cases is less than 0.1, a heuristic for rare outcomes.
  • +
  • Increased the default number of folds for cross-fitting from 5 to 10, setting cv_folds = 10L in named arguments to est_onestep() and est_tml() and to the estimator_args list-argument in medoutcon().
  • +
  • Changed default propensity score truncation bounds specified in g_bounds to c(0.005, 0.995) from c(0.01, 0.99) (in v0.22), based on sanity checks and manual experimentation.
  • +
  • Wrapped instances of sl3_Task() in which outcome_type = "continuous" is specified in suppressWarnings() to sink warnings when the outcome variable for a given nuisance estimation task fails sl3’s check for continuous-ness.
  • +
+
+ +
+
+ +
  • Fixes bug in weighted TMLEs introduced during prior update to est_tml().
  • +
  • Added support for a semiparametric correction for outcome-dependent two-phase sampling designs with known or estimated sampling weights.
  • @@ -87,7 +103,7 @@
-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 201c2d6..6ebfdd4 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,9 +1,9 @@ -pandoc: '2.18' -pkgdown: 2.0.6 +pandoc: 3.1.9 +pkgdown: 2.0.7 pkgdown_sha: ~ articles: intro_medoutcon: intro_medoutcon.html -last_built: 2022-10-12T21:35Z +last_built: 2024-03-17T21:39Z urls: reference: https://code.nimahejazi.org/medoutcon/reference article: https://code.nimahejazi.org/medoutcon/articles diff --git a/docs/reference/bound_precision.html b/docs/reference/bound_precision.html index 51f8f9c..7aff264 100644 --- a/docs/reference/bound_precision.html +++ b/docs/reference/bound_precision.html @@ -18,7 +18,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -94,7 +94,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/bound_propensity.html b/docs/reference/bound_propensity.html index f350459..eb9eca3 100644 --- a/docs/reference/bound_propensity.html +++ b/docs/reference/bound_propensity.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -91,7 +91,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/confint.medoutcon.html b/docs/reference/confint.medoutcon.html index 0dde786..e3484fb 100644 --- a/docs/reference/confint.medoutcon.html +++ b/docs/reference/confint.medoutcon.html @@ -1,5 +1,5 @@ -Confidence intervals for interventional mediation effect estimates — confint.medoutcon • medoutconConfidence intervals for natural/interventional (in)direct effect estimates — confint.medoutcon • medoutconEIF for stochastic interventional (in)direct effects — cv_eif • medoutconEIF for natural and interventional (in)direct effects — cv_eif • medoutcon @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -53,13 +53,13 @@
-

EIF for stochastic interventional (in)direct effects

+

EIF for natural and interventional (in)direct effects

@@ -77,7 +77,8 @@

EIF for stochastic interventional (in)direct effects

d_learners, effect_type = c("interventional", "natural"), w_names, - m_names + m_names, + g_bounds = c(0.005, 0.995) )
@@ -179,6 +180,12 @@

Arguments

correspond to mediators (M). The input for this argument is automatically generated by medoutcon.

+ +
g_bounds
+

A numeric vector containing two values, the +first being the minimum allowable estimated propensity score value and the +second being the maximum allowable for estimated propensity score value.

+
@@ -193,7 +200,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/est_onestep.html b/docs/reference/est_onestep.html index cf5b8f5..f1ecabd 100644 --- a/docs/reference/est_onestep.html +++ b/docs/reference/est_onestep.html @@ -1,5 +1,5 @@ -One-step estimator for stochastic interventional (in)direct effects — est_onestep • medoutconOne-step estimator for natural and interventional (in)direct effects — est_onestep • medoutcon @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -53,13 +53,13 @@
-

One-step estimator for stochastic interventional (in)direct effects

+

One-step estimator for natural and interventional (in)direct effects

@@ -77,9 +77,11 @@

One-step estimator for stochastic interventional (in)direct effects

w_names, m_names, y_bounds, + g_bounds = c(0.005, 0.995), effect_type = c("interventional", "natural"), svy_weights = NULL, - cv_folds = 5L + cv_folds = 10L, + cv_stratify = FALSE )
@@ -176,6 +178,12 @@

Arguments

the unit interval.

+
g_bounds
+

A numeric vector containing two values, the +first being the minimum allowable estimated propensity score value and the +second being the maximum allowable for estimated propensity score value.

+ +
effect_type

A character indicating whether components of the interventional or natural (in)direct effects are to be estimated. In the @@ -196,6 +204,15 @@

Arguments

make_folds, this value specified must be greater than or equal to 2; the default is to create 5 folds.

+ +
cv_stratify
+

A logical atomic vector indicating whether V-fold +cross-validation should stratify the folds based on the outcome variable. +If TRUE, the folds are stratified by passing the outcome variable to +the strata_ids argument of make_folds. While +the default is FALSE, an override is triggered when the incidence of +the binary outcome variable falls below 0.1.

+
@@ -210,7 +227,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/est_plugin.html b/docs/reference/est_plugin.html index 5953561..5c9acb6 100644 --- a/docs/reference/est_plugin.html +++ b/docs/reference/est_plugin.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -92,7 +92,7 @@

Value

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/est_tml.html b/docs/reference/est_tml.html index 6f5caef..610bc32 100644 --- a/docs/reference/est_tml.html +++ b/docs/reference/est_tml.html @@ -1,5 +1,5 @@ -TML estimator for stochastic interventional (in)direct effects — est_tml • medoutconTML estimator for natural and interventional (in)direct effects — est_tml • medoutcon @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -53,13 +53,13 @@
-

TML estimator for stochastic interventional (in)direct effects

+

TML estimator for natural and interventional (in)direct effects

@@ -77,11 +77,13 @@

TML estimator for stochastic interventional (in)direct effects

w_names, m_names, y_bounds, + g_bounds = c(0.005, 0.95), effect_type = c("interventional", "natural"), svy_weights = NULL, - cv_folds = 5L, - max_iter = 5L, - tiltmod_tol = 10 + cv_folds = 10L, + cv_stratify = FALSE, + max_iter = 10L, + tiltmod_tol = 5 )
@@ -178,6 +180,12 @@

Arguments

the unit interval.

+
g_bounds
+

A numeric vector containing two values, the +first being the minimum allowable estimated propensity score value and the +second being the maximum allowable for estimated propensity score value.

+ +
effect_type

A character indicating whether components of the interventional or natural (in)direct effects are to be estimated. In the @@ -199,6 +207,15 @@

Arguments

equal to 2; the default is to create 10 folds.

+
cv_stratify
+

A logical atomic vector indicating whether V-fold +cross-validation should stratify the folds based on the outcome variable. +If TRUE, the folds are stratified by passing the outcome variable to +the strata_ids argument of make_folds. While +the default is FALSE, an override is triggered when the incidence of +the binary outcome variable falls below 0.1.

+ +
max_iter

A numeric integer giving the maximum number of steps to be taken for the iterative procedure to construct a TML estimator.

@@ -223,7 +240,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/fit_moc_mech.html b/docs/reference/fit_moc_mech.html index e5be0f1..f72a84d 100644 --- a/docs/reference/fit_moc_mech.html +++ b/docs/reference/fit_moc_mech.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -136,7 +136,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/fit_nuisance_d.html b/docs/reference/fit_nuisance_d.html index 065cb71..f808f73 100644 --- a/docs/reference/fit_nuisance_d.html +++ b/docs/reference/fit_nuisance_d.html @@ -20,7 +20,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -178,7 +178,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/fit_nuisance_u.html b/docs/reference/fit_nuisance_u.html index ff425ca..fd4def0 100644 --- a/docs/reference/fit_nuisance_u.html +++ b/docs/reference/fit_nuisance_u.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -144,7 +144,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/fit_nuisance_v.html b/docs/reference/fit_nuisance_v.html index 5d32a30..d6ef99e 100644 --- a/docs/reference/fit_nuisance_v.html +++ b/docs/reference/fit_nuisance_v.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -139,7 +139,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/fit_out_mech.html b/docs/reference/fit_out_mech.html index 387f0a3..55264cf 100644 --- a/docs/reference/fit_out_mech.html +++ b/docs/reference/fit_out_mech.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -126,7 +126,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/fit_treat_mech.html b/docs/reference/fit_treat_mech.html index e47dd03..b82822f 100644 --- a/docs/reference/fit_treat_mech.html +++ b/docs/reference/fit_treat_mech.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -70,7 +70,8 @@

Fit propensity scores for treatment contrasts

learners, m_names, w_names, - type = c("g", "h") + type = c("g", "h"), + bounds = c(0.01, 0.99) ) @@ -99,8 +100,8 @@

Arguments

learners

Stack, or other learner class (inheriting from Lrnr_base), containing a set of learners from -sl3, to be used in fitting a propensity score models, i.e., g := -P(A = 1 | W) and h := P(A = 1 | M, W).

+sl3, to be used in fitting a propensity score models, i.e., g := P(A += 1 | W) and h := P(A = 1 | M, W).

m_names
@@ -117,8 +118,14 @@

Arguments

type

A character indicating which of the treatment mechanism -variants to estimate. Option "g" corresponds to the propensity score -g(A|W) while option "h" conditions on the mediators h(A|M,W).

+variants to estimate. Option "g" is the propensity score g(A|W) +while option "h" is a re-parameterized mediator density h(A|M,W).

+ + +
bounds
+

A numeric vector containing two values, the first being +the minimum allowable estimated propensity score value and the second +being the maximum allowable for estimated propensity score value.

@@ -134,7 +141,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/index.html b/docs/reference/index.html index 4747fb8..3c20fd8 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -63,7 +63,7 @@

All functions

confint(<medoutcon>)

-

Confidence intervals for interventional mediation effect estimates

+

Confidence intervals for natural/interventional (in)direct effect estimates

fit_moc_mech()

@@ -87,15 +87,15 @@

All functions

medoutcon()

-

Efficient estimation of stochastic interventional (in)direct effects

+

Efficient estimation of natural and interventional (in)direct effects

print(<medoutcon>)

-

Print method for interventional mediation effect estimate objects

+

Print method for natural/interventional (in)direct effect estimate objects

summary(<medoutcon>)

-

Summary for interventional mediation effect estimate objects

+

Summary for natural/interventional (in)direct effect estimate objects

diff --git a/docs/reference/medoutcon.html b/docs/reference/medoutcon.html index d1c63fc..6f1d4c0 100644 --- a/docs/reference/medoutcon.html +++ b/docs/reference/medoutcon.html @@ -1,5 +1,5 @@ -Efficient estimation of stochastic interventional (in)direct effects — medoutcon • medoutconEfficient estimation of natural and interventional (in)direct effects — medoutcon • medoutcon @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -53,13 +53,13 @@
-

Efficient estimation of stochastic interventional (in)direct effects

+

Efficient estimation of natural and interventional (in)direct effects

@@ -73,7 +73,7 @@

Efficient estimation of stochastic interventional (in)direct effects

obs_weights = rep(1, length(Y)), svy_weights = NULL, two_phase_weights = rep(1, length(Y)), - effect = c("direct", "indirect"), + effect = c("direct", "indirect", "pm"), contrast = NULL, g_learners = sl3::Lrnr_glm_fast$new(), h_learners = sl3::Lrnr_glm_fast$new(), @@ -84,7 +84,9 @@

Efficient estimation of stochastic interventional (in)direct effects

v_learners = sl3::Lrnr_hal9001$new(), d_learners = sl3::Lrnr_glm_fast$new(), estimator = c("tmle", "onestep"), - estimator_args = list(cv_folds = 5L, max_iter = 5L, tiltmod_tol = 5) + estimator_args = list(cv_folds = 10L, cv_stratify = FALSE, max_iter = 10L, tiltmod_tol + = 5), + g_bounds = c(0.005, 0.995) )
@@ -120,7 +122,7 @@

Arguments

R

A logical vector indicating whether a sampled observation's mediator was measured via a two-phase sampling design. Defaults to a -vector of ones, implying that two-phase sampling was not performed.

+vector of ones, indicating that two-phase sampling was not performed.

obs_weights
@@ -220,12 +222,19 @@

Arguments

estimator_args
-

A list of extra arguments to be passed (via +

A list of additional arguments passed (via ...) to the function call for the specified estimator. The default -is chosen so as to allow the number of folds used in computing the one-step -or TML estimators to be easily adjusted. In the case of the TML estimator, -the number of update (fluctuation) iterations is limited, and a tolerance -is included for the updates introduced by the tilting (fluctuation) models.

+is chosen so as to allow the number of folds used to compute the one-step +or TML estimators to be adjusted and for stratified cross-validation to be +used in cases of rare outcomes. In the case of the TML estimator, the +number of update (fluctuation) iterations is limited, and a tolerance is +included for the updates introduced by tilting (fluctuation) models.

+ + +
g_bounds
+

A numeric vector containing two values, the first +being the minimum allowable estimated propensity score value and the +second being the maximum allowable for estimated propensity scores.

@@ -276,7 +285,7 @@

Examples

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/print.medoutcon.html b/docs/reference/print.medoutcon.html index 376b9c8..5f4fecf 100644 --- a/docs/reference/print.medoutcon.html +++ b/docs/reference/print.medoutcon.html @@ -1,5 +1,5 @@ -Print method for interventional mediation effect estimate objects — print.medoutcon • medoutconPrint method for natural/interventional (in)direct effect estimate objects — print.medoutcon • medoutcon @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -53,7 +53,7 @@
@@ -90,7 +90,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/scale_from_unit.html b/docs/reference/scale_from_unit.html index e391e31..9d880ed 100644 --- a/docs/reference/scale_from_unit.html +++ b/docs/reference/scale_from_unit.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -94,7 +94,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/scale_to_unit.html b/docs/reference/scale_to_unit.html index 543c134..b9f7bfa 100644 --- a/docs/reference/scale_to_unit.html +++ b/docs/reference/scale_to_unit.html @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -86,7 +86,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/summary.medoutcon.html b/docs/reference/summary.medoutcon.html index 0a41e10..2e73a72 100644 --- a/docs/reference/summary.medoutcon.html +++ b/docs/reference/summary.medoutcon.html @@ -1,5 +1,5 @@ -Summary for interventional mediation effect estimate objects — summary.medoutcon • medoutconSummary for natural/interventional (in)direct effect estimate objects — summary.medoutcon • medoutcon @@ -17,7 +17,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -53,7 +53,7 @@
@@ -96,7 +96,7 @@

Arguments

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/two_phase_eif.html b/docs/reference/two_phase_eif.html index 14ea6b0..908c4cb 100644 --- a/docs/reference/two_phase_eif.html +++ b/docs/reference/two_phase_eif.html @@ -18,7 +18,7 @@ medoutcon - 0.2.0 + 0.2.3 @@ -116,7 +116,7 @@

Value

-

Site built with pkgdown 2.0.6.

+

Site built with pkgdown 2.0.7.

diff --git a/man/cv_eif.Rd b/man/cv_eif.Rd index 2c64c56..f436d99 100644 --- a/man/cv_eif.Rd +++ b/man/cv_eif.Rd @@ -19,7 +19,7 @@ cv_eif( effect_type = c("interventional", "natural"), w_names, m_names, - g_bounds = c(0.01, 0.99) + g_bounds = c(0.005, 0.995) ) } \arguments{ diff --git a/man/est_onestep.Rd b/man/est_onestep.Rd index 9b45377..2ed4d53 100644 --- a/man/est_onestep.Rd +++ b/man/est_onestep.Rd @@ -18,10 +18,12 @@ est_onestep( w_names, m_names, y_bounds, - g_bounds = c(0.01, 0.99), + g_bounds = c(0.005, 0.995), effect_type = c("interventional", "natural"), svy_weights = NULL, - cv_folds = 5L + cv_folds = 10L, + cv_strat = FALSE, + strat_pmin = 0.1 ) } \arguments{ @@ -108,6 +110,18 @@ be created for cross-validation. Use of cross-validation allows for entropy conditions on the one-step estimator to be relaxed. For compatibility with \code{\link[origami]{make_folds}}, this value specified must be greater than or equal to 2; the default is to create 5 folds.} + +\item{cv_strat}{A \code{logical} atomic vector indicating whether V-fold +cross-validation should stratify the folds based on the outcome variable. +If \code{TRUE}, the folds are stratified by passing the outcome variable to +the \code{strata_ids} argument of \code{\link[origami]{make_folds}}. While +the default is \code{FALSE}, an override is triggered when the incidence of +the binary outcome variable falls below the tolerance in \code{strat_pmin}.} + +\item{strat_pmin}{A \code{numeric} atomic vector indicating a tolerance for +the minimum proportion of cases (for a binary outcome variable) below which +stratified V-fold cross-validation is invoked if \code{cv_strat} is set to +\code{TRUE} (default is \code{FALSE}). The default tolerance is 0.1.} } \description{ One-step estimator for natural and interventional (in)direct effects diff --git a/man/est_tml.Rd b/man/est_tml.Rd index dd406ef..05cfda4 100644 --- a/man/est_tml.Rd +++ b/man/est_tml.Rd @@ -18,11 +18,13 @@ est_tml( w_names, m_names, y_bounds, - g_bounds = c(0.01, 0.99), + g_bounds = c(0.005, 0.95), effect_type = c("interventional", "natural"), svy_weights = NULL, - cv_folds = 5L, - max_iter = 5L, + cv_folds = 10L, + cv_strat = FALSE, + strat_pmin = 0.1, + max_iter = 10L, tiltmod_tol = 5 ) } @@ -111,6 +113,18 @@ conditions on the TML estimator to be relaxed. Note: for compatibility with \code{\link[origami]{make_folds}}, this value must be greater than or equal to 2; the default is to create 10 folds.} +\item{cv_strat}{A \code{logical} atomic vector indicating whether V-fold +cross-validation should stratify the folds based on the outcome variable. +If \code{TRUE}, the folds are stratified by passing the outcome variable to +the \code{strata_ids} argument of \code{\link[origami]{make_folds}}. While +the default is \code{FALSE}, an override is triggered when the incidence of +the binary outcome variable falls below the tolerance in \code{strat_pmin}.} + +\item{strat_pmin}{A \code{numeric} atomic vector indicating a tolerance for +the minimum proportion of cases (for a binary outcome variable) below which +stratified V-fold cross-validation is invoked if \code{cv_strat} is set to +\code{TRUE} (default is \code{FALSE}). The default tolerance is 0.1.} + \item{max_iter}{A \code{numeric} integer giving the maximum number of steps to be taken for the iterative procedure to construct a TML estimator.} diff --git a/man/medoutcon.Rd b/man/medoutcon.Rd index e601466..3130d70 100644 --- a/man/medoutcon.Rd +++ b/man/medoutcon.Rd @@ -25,8 +25,9 @@ medoutcon( v_learners = sl3::Lrnr_hal9001$new(), d_learners = sl3::Lrnr_glm_fast$new(), estimator = c("tmle", "onestep"), - estimator_args = list(cv_folds = 5L, max_iter = 5L, tiltmod_tol = 5), - g_bounds = c(0.01, 0.99) + estimator_args = list(cv_folds = 10L, cv_strat = FALSE, strat_pmin = 0.1, max_iter = + 10L, tiltmod_tol = 5), + g_bounds = c(0.005, 0.995) ) } \arguments{ @@ -63,9 +64,9 @@ weights corresponding to the inverse probability of the mediator being measured. Defaults to a vector of ones.} \item{effect}{A \code{character} indicating whether to compute the direct or -the indirect effect as discussed in . -This is ignored when the argument \code{contrast} is provided. By default, -the direct effect is estimated.} +the indirect effects of . This is +ignored when the argument \code{contrast} is provided. By default, the +direct effect is estimated.} \item{contrast}{A \code{numeric} double indicating the two values of the intervention \code{A} to be compared. The default value of \code{NULL} has @@ -119,17 +120,17 @@ contrast-specific parameter) to be computed. Both an efficient one-step estimator using cross-fitting and a cross-validated targeted minimum loss estimator (TMLE) are available. The default is the TML estimator.} -\item{estimator_args}{A \code{list} of extra arguments to be passed (via +\item{estimator_args}{A \code{list} of additional arguments passed (via \code{...}) to the function call for the specified estimator. The default is chosen so as to allow the number of folds used to compute the one-step -or TML estimators to be easily adjusted. In the case of the TML estimator, -the number of update (fluctuation) iterations is limited, and a tolerance -is included for the updates introduced by tilting (fluctuation) models.} +or TML estimators to be adjusted and for stratified cross-validation to be +used in cases of rare outcomes. In the case of the TML estimator, the +number of update (fluctuation) iterations is limited, and a tolerance is +included for the updates introduced by tilting (fluctuation) models.} \item{g_bounds}{A \code{numeric} vector containing two values, the first being the minimum allowable estimated propensity score value and the -second being the maximum allowable for estimated propensity scores. -Defaults to \code{c(0.001, 0.999)}.} +second being the maximum allowable for estimated propensity scores.} } \description{ Efficient estimation of natural and interventional (in)direct effects