Merge pull request #8 from CalebRollins1/master

Merge AutoDML implementation of @CalebRollins1
MartinSpindler · Dec 29, 2021 · 123b2e0 · 123b2e0
2 parents 5155710 + 96fff58
commit 123b2e0
Show file tree

Hide file tree

Showing 52 changed files with 2,991 additions and 1,848 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -64,12 +64,14 @@ S3method(summary,tsls)
 S3method(tsls,default)
 S3method(tsls,formula)
 export(LassoShooting.fit)
+export(RMD_stable)
 export(lambdaCalculation)
 export(p_adjust)
 export(print_coef)
 export(rlasso)
 export(rlassoATE)
 export(rlassoATET)
+export(rlassoAutoDML)
 export(rlassoEffect)
 export(rlassoEffects)
 export(rlassoIV)

diff --git a/R/LassoShooting.fit.R b/R/LassoShooting.fit.R
@@ -27,14 +27,18 @@
 #' @export
 
 
-LassoShooting.fit <- function(x, y, lambda, control = list(maxIter = 1000, 
-                                                           optTol = 10^(-5), zeroThreshold = 10^(-6)), XX = NULL, Xy = NULL, beta.start = NULL) {
+LassoShooting.fit <- function(x, y, lambda, control = list(
+                                maxIter = 1000,
+                                optTol = 10^(-5), zeroThreshold = 10^(-6)
+                              ), XX = NULL, Xy = NULL, beta.start = NULL) {
   n <- dim(x)[1]
   p <- dim(x)[2]
-  if (is.null(XX)) 
+  if (is.null(XX)) {
     (XX <- crossprod(x))
-  if (is.null(Xy)) 
+  }
+  if (is.null(Xy)) {
     (Xy <- crossprod(x, y))
+  }
   # Start from the LS solution for beta if no beta.start is provided
   if (is.null(beta.start)) {
     # Ridge start beta <- MASS::ginv(XX + diag(as.vector(lambda), p) %*%
@@ -49,7 +53,7 @@ LassoShooting.fit <- function(x, y, lambda, control = list(maxIter = 1000,
   m <- 1
   XX2 <- XX * 2
   Xy2 <- Xy * 2
-  
+
   while (m < control$maxIter) {
     beta_old <- beta
     for (j in 1:p) {
@@ -59,13 +63,16 @@ LassoShooting.fit <- function(x, y, lambda, control = list(maxIter = 1000,
         beta[j] <- 0
         next
       }
-
-      if (S0 > lambda[j]) 
-        beta[j] <- (lambda[j] - S0)/XX2[j, j]
-      if (S0 < -1 * lambda[j]) 
-        beta[j] <- (-1 * lambda[j] - S0)/XX2[j, j]
-      if (abs(S0) <= lambda[j]) 
+
+      if (S0 > lambda[j]) {
+        beta[j] <- (lambda[j] - S0) / XX2[j, j]
+      }
+      if (S0 < -1 * lambda[j]) {
+        beta[j] <- (-1 * lambda[j] - S0) / XX2[j, j]
+      }
+      if (abs(S0) <= lambda[j]) {
         beta[j] <- 0
+      }
     }
     # Update
     wp <- cbind(wp, beta)
@@ -78,4 +85,4 @@ LassoShooting.fit <- function(x, y, lambda, control = list(maxIter = 1000,
   w <- beta
   w[abs(w) < control$zeroThreshold] <- 0
   return(list(coefficients = w, coef.list = wp, num.it = m))
-}
+}
diff --git a/R/help_functions.R b/R/help_functions.R
diff --git a/R/p_adjust.R b/R/p_adjust.R
@@ -1,65 +1,65 @@
-#'Multiple Testing Adjustment of p-values for S3 objects \code{rlassoEffects}
-#'and \code{lm}
+#' Multiple Testing Adjustment of p-values for S3 objects \code{rlassoEffects}
+#' and \code{lm}
 #'
-#'Multiple hypotheses testing adjustment of p-values from a high-dimensional
-#'linear model.
+#' Multiple hypotheses testing adjustment of p-values from a high-dimensional
+#' linear model.
 #'
-#'Multiple testing adjustment is performed for S3 objects of class
-#'\code{rlassoEffects} and \code{lm}. Implemented methods for multiple testing
-#'adjustment are Romano-Wolf stepdown '\code{RW}' (default) and the adjustment
-#'methods available in the \code{p.adjust} function of the \code{stats} package,
-#'including the Bonferroni, Bonferroni-Holm, and Benjamini-Hochberg corrections,
-#'see \code{\link{p.adjust.methods}}.
+#' Multiple testing adjustment is performed for S3 objects of class
+#' \code{rlassoEffects} and \code{lm}. Implemented methods for multiple testing
+#' adjustment are Romano-Wolf stepdown '\code{RW}' (default) and the adjustment
+#' methods available in the \code{p.adjust} function of the \code{stats} package,
+#' including the Bonferroni, Bonferroni-Holm, and Benjamini-Hochberg corrections,
+#' see \code{\link{p.adjust.methods}}.
 #'
-#'Objects of class \code{rlassoEffects} are constructed by
-#'\code{\link{rlassoEffects}}.
+#' Objects of class \code{rlassoEffects} are constructed by
+#' \code{\link{rlassoEffects}}.
 #'
-#'@param x an object of S3 class \code{rlassoEffects} or \code{lm}.
-#'@param method the method of p-value adjustment for multiple testing.
+#' @param x an object of S3 class \code{rlassoEffects} or \code{lm}.
+#' @param method the method of p-value adjustment for multiple testing.
 #'  Romano-Wolf stepdown ('\code{RW}') is chosen by default.
-#'@param test.index vector of integers, logicals or variables names indicating
+#' @param test.index vector of integers, logicals or variables names indicating
 #'  the position of coefficients (integer case), logical vector of length of the
 #'  coefficients (TRUE or FALSE) or the coefficient names of x which should be
 #'  tested simultaneously (only for S3 class \code{lm}). If missing, all
 #'  coefficients are considered.
-#'@param B number of bootstrap repetitions (default 1000).
-#'@param ... further arguments passed on to methods.
-#'@rdname p_adjust
-#'@aliases p_adjust.rlassoEffects p_adjust.lm
-#'@return A matrix with the estimated coefficients and the p-values that are
+#' @param B number of bootstrap repetitions (default 1000).
+#' @param ... further arguments passed on to methods.
+#' @rdname p_adjust
+#' @aliases p_adjust.rlassoEffects p_adjust.lm
+#' @return A matrix with the estimated coefficients and the p-values that are
 #'  adjusted according to the specified method.
-#'@references J.P. Romano, M. Wolf (2005). Exact and approximate stepdown
+#' @references J.P. Romano, M. Wolf (2005). Exact and approximate stepdown
 #'  methods for multiple hypothesis testing. Journal of the American Statistical
 #'  Association, 100(469), 94-108.
-#'@references J.P. Romano, M. Wolf (2016). Efficient computation of adjusted
+#' @references J.P. Romano, M. Wolf (2016). Efficient computation of adjusted
 #'  p-values for resampling-based stepdown multiple testing. Statistics and
 #'  Probability Letters, (113), 38-40.
-#'@references A. Belloni, V. Chernozhukov, K. Kato (2015). Uniform
+#' @references A. Belloni, V. Chernozhukov, K. Kato (2015). Uniform
 #'  post-selection inference for least absolute deviation regression and other
 #'  Z-estimation problems. Biometrika, 102(1), 77-94.
 #'
 #'
 #' @examples
-#' library(hdm);
+#' library(hdm)
 #' set.seed(1)
-#' n = 100 #sample size
-#' p = 25 # number of variables
-#' s = 3 # nubmer of non-zero variables
-#' X = matrix(rnorm(n*p), ncol=p)
-#' colnames(X) <- paste("X", 1:p, sep="")
-#' beta = c(rep(3,s), rep(0,p-s))
-#' y = 1 + X%*%beta + rnorm(n)
-#' data = data.frame(cbind(y,X))
+#' n <- 100 # sample size
+#' p <- 25 # number of variables
+#' s <- 3 # nubmer of non-zero variables
+#' X <- matrix(rnorm(n * p), ncol = p)
+#' colnames(X) <- paste("X", 1:p, sep = "")
+#' beta <- c(rep(3, s), rep(0, p - s))
+#' y <- 1 + X %*% beta + rnorm(n)
+#' data <- data.frame(cbind(y, X))
 #' colnames(data)[1] <- "y"
-#' lasso.effect = rlassoEffects(X, y, index=c(1:20))
-#' pvals.lasso.effect = p_adjust(lasso.effect, method = "RW", B = 1000)
-#' ols = lm(y ~ -1 + X, data)
-#' pvals.ols = p_adjust(ols, method = "RW", B = 1000)
-#' pvals.ols = p_adjust(ols, method = "RW", B = 1000, test.index = c(1,2,5))
-#' pvals.ols = p_adjust(ols, method = "RW", B = 1000, test.index = c(rep(TRUE, 5), rep(FALSE, p-5)))
+#' lasso.effect <- rlassoEffects(X, y, index = c(1:20))
+#' pvals.lasso.effect <- p_adjust(lasso.effect, method = "RW", B = 1000)
+#' ols <- lm(y ~ -1 + X, data)
+#' pvals.ols <- p_adjust(ols, method = "RW", B = 1000)
+#' pvals.ols <- p_adjust(ols, method = "RW", B = 1000, test.index = c(1, 2, 5))
+#' pvals.ols <- p_adjust(ols, method = "RW", B = 1000, test.index = c(rep(TRUE, 5), rep(FALSE, p - 5)))
 #' @export
 
-p_adjust = function(x, ...){
+p_adjust <- function(x, ...) {
   UseMethod("p_adjust")
 }
 
@@ -89,41 +89,41 @@ p_adjust.rlassoEffects <- function(x, method = "RW", B = 1000, ...) {
     Omegahat <- matrix(NA, ncol = k, nrow = k)
     for (j in 1:k) {
       for (l in 1:k) {
-        Omegahat[j, l] = Omegahat[l, j] = 1/(Ev2[j] * Ev2[l]) * mean(ev[, j] * ev[, l])
+        Omegahat[j, l] <- Omegahat[l, j] <- 1 / (Ev2[j] * Ev2[l]) * mean(ev[, j] * ev[, l])
       }
     }
     se <- sqrt(diag(Omegahat))
 
     Beta_i <- matrix(NA, ncol = k, nrow = B)
     for (i in 1:B) {
-      Beta_i[i, ] <- MASS::mvrnorm(mu = rep(0, k), Sigma = Omegahat/n)
+      Beta_i[i, ] <- MASS::mvrnorm(mu = rep(0, k), Sigma = Omegahat / n)
     }
 
-    tstats <- cf/se
+    tstats <- cf / se
     stepdown.index <- order(abs(tstats), decreasing = TRUE)
     ro <- order(stepdown.index)
 
     for (s in 1:k) {
       if (s == 1) {
-        sim <- apply(Beta_i, 1, function(z) max(abs(z)/se))
-        pinit[s] <- pmin(1, (sum(sim >= abs(tstats[stepdown.index][s])))/B)
+        sim <- apply(Beta_i, 1, function(z) max(abs(z) / se))
+        pinit[s] <- pmin(1, (sum(sim >= abs(tstats[stepdown.index][s]))) / B)
       }
       if (s > 1) {
-        sim <- apply(Beta_i[, -stepdown.index[1:(s - 1)], drop = F], 1, function(z) max(abs(z)/se[-stepdown.index[1:(s - 1)]]))
-        pinit[s] <- pmin(1, (sum(sim >= abs(tstats[stepdown.index][s])))/B)
+        sim <- apply(Beta_i[, -stepdown.index[1:(s - 1)], drop = F], 1, function(z) max(abs(z) / se[-stepdown.index[1:(s - 1)]]))
+        pinit[s] <- pmin(1, (sum(sim >= abs(tstats[stepdown.index][s]))) / B)
       }
     }
 
-      for (j in 1:k) {
-        if (j == 1) {
-          corr.padj[j] <- pinit[j]
-        }
+    for (j in 1:k) {
+      if (j == 1) {
+        corr.padj[j] <- pinit[j]
+      }
 
-        if (j > 1) {
-          corr.padj[j] <- max(pinit[j], corr.padj[j - 1])
-        }
+      if (j > 1) {
+        corr.padj[j] <- max(pinit[j], corr.padj[j - 1])
       }
-      pval <- corr.padj[ro]
+    }
+    pval <- corr.padj[ro]
   }
 
   res <- as.matrix(cbind(cf, pval))
@@ -135,13 +135,13 @@ p_adjust.rlassoEffects <- function(x, method = "RW", B = 1000, ...) {
 
 #' @describeIn p_adjust \code{\link[stats]{lm}}.
 #' @export
-p_adjust.lm = function(x, method = "RW", B = 1000, test.index = NULL, ...) {
+p_adjust.lm <- function(x, method = "RW", B = 1000, test.index = NULL, ...) {
   checkmate::checkClass(x, "lm")
   checkmate::checkChoice(method, c("RW", stats::p.adjust.methods))
   checkmate::assert(checkmate::checkNull(test.index), checkmate::checkLogical(test.index), checkmate::checkNumeric(test.index), checkmate::checkCharacter(test.index))
 
-  cf = coef(x)
-  pnames = names(cf)
+  cf <- coef(x)
+  pnames <- names(cf)
 
   if (is.null(test.index)) {
     k <- length(coef(x))
@@ -152,7 +152,6 @@ p_adjust.lm = function(x, method = "RW", B = 1000, test.index = NULL, ...) {
       stopifnot(length(test.index) == length(coef(x)))
       index <- which(test.index == T)
     } else {
-
       if (is.numeric(test.index)) {
         index <- as.integer(test.index)
         stopifnot(all(test.index <= length(coef(x))) && length(test.index) <= length(coef(x)))
@@ -187,12 +186,12 @@ p_adjust.lm = function(x, method = "RW", B = 1000, test.index = NULL, ...) {
 
     for (s in 1:k) {
       if (s == 1) {
-        sim <- apply(Beta_i, 1, function(z) max(abs(z)/se))
-        pinit[s] <- pmin(1,  (sum(sim >= abs(tstats[stepdown.index][s])))/B)
+        sim <- apply(Beta_i, 1, function(z) max(abs(z) / se))
+        pinit[s] <- pmin(1, (sum(sim >= abs(tstats[stepdown.index][s]))) / B)
       }
-      if (s > 1 ) {
-        sim <- apply(Beta_i[, -stepdown.index[1:(s - 1)], drop = FALSE], 1, function(z) max(abs(z)/se[-stepdown.index[1:(s - 1)]]))
-        pinit[s] <- pmin(1, (sum(sim >= abs(tstats[stepdown.index][s])))/B)
+      if (s > 1) {
+        sim <- apply(Beta_i[, -stepdown.index[1:(s - 1)], drop = FALSE], 1, function(z) max(abs(z) / se[-stepdown.index[1:(s - 1)]]))
+        pinit[s] <- pmin(1, (sum(sim >= abs(tstats[stepdown.index][s]))) / B)
       }
     }
 
@@ -214,4 +213,3 @@ p_adjust.lm = function(x, method = "RW", B = 1000, test.index = NULL, ...) {
 
   return(res)
 }
-