DeclareDesign · lukesonnet · Feb 28, 2019 · Jan 24, 2019 · Jan 24, 2019 · Feb 3, 2019
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -21,5 +21,6 @@ update_repo.R
 ^tests/testthat/test-zzzbroom\.R$
 ^estimatr_.*\.tar\.gz$
 ^tests/testthat/test-texreg\.R$
+^tests/testthat/test-gtsummary\.R$
 ^cran-comments\.md$
 
diff --git a/.travis.yml b/.travis.yml
@@ -26,10 +26,12 @@ matrix:
     - os: osx
       r: release
       if: branch = master
+      brew_packages: libgit2
 
     - os: osx
       r: 3.4
       if: branch = master
+      brew_packages: libgit2
 
 env:
   global:
@@ -47,6 +49,8 @@ addons:
 r_github_packages:
 - DeclareDesign/DDtools
 - ropensci/git2r
+- rstudio/gt
+- vincentarelbundock/gtsummary
 
 after_success:
 - Rscript -e DDtools::after_build

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: estimatr
 Type: Package
 Title: Fast Estimators for Design-Based Inference
-Version: 0.14
-Date: 2018-10-29
+Version: 0.15
+Date: 2019-02-27
 Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@ucla.edu", role = c("aut", "cre")),
              person("Jasper", "Cooper", email = "jjc2247@columbia.edu", role = c("aut")),
              person("Alexander", "Coppock", email = "alex.coppock@yale.edu", role = c("aut")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,10 @@ S3method(confint,difference_in_means)
 S3method(confint,horvitz_thompson)
 S3method(confint,iv_robust)
 S3method(confint,lm_robust)
+S3method(glance,difference_in_means)
+S3method(glance,horvitz_thompson)
+S3method(glance,iv_robust)
+S3method(glance,lm_robust)
 S3method(nobs,iv_robust)
 S3method(nobs,lm_robust)
 S3method(nobs,summary.lm_robust)
@@ -33,6 +37,7 @@ export(difference_in_means)
 export(extract.iv_robust)
 export(extract.lm_robust)
 export(gen_pr_matrix_cluster)
+export(glance)
 export(horvitz_thompson)
 export(iv_robust)
 export(lm_lin)
@@ -43,6 +48,7 @@ export(starprep)
 export(tidy)
 importFrom(Formula,as.Formula)
 importFrom(Rcpp,evalCpp)
+importFrom(generics,glance)
 importFrom(generics,tidy)
 importFrom(methods,className)
 importFrom(methods,isGeneric)
@@ -66,20 +72,25 @@ importFrom(stats,df.residual)
 importFrom(stats,fitted.values)
 importFrom(stats,formula)
 importFrom(stats,lm)
+importFrom(stats,lm.fit)
 importFrom(stats,model.extract)
 importFrom(stats,model.frame)
 importFrom(stats,model.frame.default)
 importFrom(stats,model.matrix)
 importFrom(stats,model.matrix.default)
 importFrom(stats,model.matrix.lm)
 importFrom(stats,model.response)
+importFrom(stats,na.omit)
 importFrom(stats,na.pass)
 importFrom(stats,nobs)
+importFrom(stats,pchisq)
 importFrom(stats,pf)
+importFrom(stats,printCoefmat)
 importFrom(stats,pt)
 importFrom(stats,qt)
 importFrom(stats,reformulate)
 importFrom(stats,resid)
+importFrom(stats,residuals)
 importFrom(stats,sd)
 importFrom(stats,setNames)
 importFrom(stats,terms)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,9 @@
-# estimatr 0.14.0
+# estimatr 0.15.0 (GitHub)
+
+* Add `diagnostics` to `iv_robust()`
+* Add `glance()` methods for all estimators
+
+# estimatr 0.14.0 (CRAN)
 
 * Removes `broom` hack for `tidy` method and instead relies on importing `generics`
 

diff --git a/R/S3_glance.R b/R/S3_glance.R
@@ -0,0 +1,182 @@
+# Helpers to retrieve values
+retrieve_value <- function(x, what) if(exists(what, x)) x[[what]] else NA_real_
+retrieve_fstatistic <- function(x) {
+  if (exists("fstatistic", x)) {
+    data.frame(
+      statistic = x[["fstatistic"]][1],
+      p.value = pf(x[["fstatistic"]][1], x[["fstatistic"]][2], x[["fstatistic"]][3], lower.tail = FALSE)
+    )
+  } else {
+    data.frame(statistic = NA_real_, p.value = NA_real_)
+  }
+}
+
+#' @importFrom generics glance
+#' @export
+generics::glance
+
+#' Glance at an estimatr object
+#' @name estimatr_glancers
+#' @templateVar class lm_robust
+#' @return For \code{glance.lm_robust}, a data.frame with columns:
+#'   \item{r.squared}{the \eqn{R^2},
+#'   \deqn{R^2 = 1 - Sum(e[i]^2) / Sum((y[i] - y^*)^2),} where \eqn{y^*}
+#'   is the mean of \eqn{y[i]} if there is an intercept and zero otherwise,
+#'   and \eqn{e[i]} is the ith residual.}
+#'   \item{adj.r.squared}{the \eqn{R^2} but penalized for having more parameters, \code{rank}}
+#'   \item{se_type}{the standard error type specified by the user}
+#'   \item{statistic}{the value of the F-statistic}
+#'   \item{p.value}{p-value from the F test}
+#'   \item{df.residual}{residual degrees of freedom}
+#'   \item{N}{the number of observations used}
+#'
+#' @param x An object returned by one of the estimators
+#' @param ... extra arguments (not used)
+#'
+#' @export
+#' @family estimatr glancers
+#' @seealso [generics::glance()], [estimatr::lm_robust()], [estimatr::lm_lin()], [estimatr::iv_robust()], [estimatr::difference_in_means()], [estimatr::horvitz_thompson()]
+#' @md
+glance.lm_robust <- function(x, ...) {
+
+  if (length(x[["outcome"]]) > 1) {
+    stop("Cannot use `glance` on linear models with multiple responses.")
+  }
+
+  ret <- cbind(
+    data.frame(
+      r.squared = x[["r.squared"]],
+      adj.r.squared = x[["adj.r.squared"]]
+    ),
+    retrieve_fstatistic(x),
+    data.frame(
+      df.residual = x[["df"]][1],
+      N = as.integer(x[["N"]]),
+      se_type = x[["se_type"]],
+      stringsAsFactors = FALSE
+    )
+  )
+
+  rownames(ret) <- NULL
+
+  ret
+}
+
+#' @name estimatr_glancers
+#' @templateVar class iv_robust
+#' @return For \code{glance.iv_robust}, a data.frame with columns:
+#'   \item{r.squared}{The \eqn{R^2} of the second stage regression}
+#'   \item{adj.r.squared}{The \eqn{R^2} but penalized for having more parameters, \code{rank}}
+#'   \item{df.residual}{residual degrees of freedom}
+#'   \item{N}{the number of observations used}
+#'   \item{se_type}{the standard error type specified by the user}
+#'   \item{statistic}{the value of the F-statistic}
+#'   \item{p.value}{p-value from the F test}
+#'   \item{statistic.weakinst}{the value of the first stage F-statistic, useful for the weak instruments test; only reported if there is only one endogenous variable}
+#'   \item{p.value.weakinst}{p-value from the first-stage F test, a test of weak instruments; only reported if there is only one endogenous variable}
+#'   \item{statistic.endogeneity}{the value of the F-statistic for the test of endogeneity; often called the Wu-Hausman statistic, with robust standard errors, we employ the regression based test}
+#'   \item{p.value.endogeneity}{p-value from the F-test for endogeneity}
+#'   \item{statistic.overid}{the value of the chi-squared statistic for the test of instrument correlation with the error term; only reported with overidentification}
+#'   \item{p.value.overid}{p-value from the chi-squared test; only reported with overidentification}
+#'
+#' @inheritParams glance.lm_robust
+#'
+#' @export
+#' @family estimatr glancers
+#' @md
+glance.iv_robust <- function(x, ...) {
+
+  if (length(x[["outcome"]]) > 1) {
+    stop("Cannot use `glance` on linear models with multiple responses.")
+  }
+
+  ret <- cbind(
+    data.frame(
+      r.squared = x[["r.squared"]],
+      adj.r.squared = x[["adj.r.squared"]],
+      df.residual = x[["df.residual"]],
+      N = as.integer(x[["N"]]),
+      se_type = x[["se_type"]],
+      stringsAsFactors = FALSE
+    ),
+    retrieve_fstatistic(x),
+    if (exists("diagnostic_firststage_fstatistic", x) && length(x[["diagnostic_firststage_fstatistic"]] == 4)) {
+      data.frame(
+        statistic.weakinst = x[["diagnostic_firststage_fstatistic"]]["value"],
+        p.value.weakinst = x[["diagnostic_firststage_fstatistic"]]["p.value"]
+      )
+    } else {
+      data.frame(statistic.weakinst = NA_real_, p.value.weakinst = NA_real_)
+    },
+    if (exists("diagnostic_endogeneity_fstatistic", x)) {
+      data.frame(
+        statistic.endogeneity = x[["diagnostic_endogeneity_fstatistic"]]["value"],
+        p.value.endogeneity = x[["diagnostic_endogeneity_fstatistic"]]["p.value"]
+      )
+    } else {
+      data.frame(statistic.endogeneity = NA_real_, p.value.endogeneity = NA_real_)
+    },
+    if (exists("diagnostic_overid_fstatistic", x)) {
+      data.frame(
+        statistic.overid = x[["diagnostic_overid_fstatistic"]]["value"],
+        p.value.overid = x[["diagnostic_overid_fstatistic"]]["p.value"]
+      )
+    } else {
+      data.frame(statistic.overid = NA_real_, p.value.overid = NA_real_)
+    }
+  )
+
+  ret
+}
+
+#' @name estimatr_glancers
+#' @templateVar class difference_in_means
+#' @return For \code{glance.difference_in_means}, a data.frame with columns:
+#'   \item{design}{the design used, and therefore the estimator used}
+#'   \item{df}{the degrees of freedom}
+#'   \item{N}{the number of observations used}
+#'   \item{N_blocks}{the number of blocks, if used}
+#'   \item{N_clusters}{the number of clusters, if used}
+#'   \item{condition2}{the second, "treatment", condition}
+#'   \item{condition1}{the first, "control", condition}
+#'
+#' @inheritParams glance.lm_robust
+#'
+#' @export
+#' @family estimatr glancers
+#' @md
+glance.difference_in_means <- function(x, ...) {
+  data.frame(
+    design = x[["design"]],
+    df = x[["df"]],
+    N = as.integer(x[["N"]]),
+    N_blocks = retrieve_value(x, "N_blocks"),
+    N_clusters = retrieve_value(x, "N_clusters"),
+    condition2 = x[["condition2"]],
+    condition1 = x[["condition1"]],
+    stringsAsFactors = FALSE
+  )
+}
+
+#' @name estimatr_glancers
+#' @templateVar class horvitz_thompson
+#' @return For \code{glance.horvitz_thompson}, a data.frame with columns:
+#'   \item{N}{the number of observations used}
+#'   \item{se_type}{the type of standard error estimator used}
+#'   \item{condition2}{the second, "treatment", condition}
+#'   \item{condition1}{the first, "control", condition}
+#'
+#' @inheritParams glance.lm_robust
+#'
+#' @export
+#' @family estimatr glancers
+#' @md
+glance.horvitz_thompson <- function(x, ...) {
+  data.frame(
+    N = as.integer(x[["N"]]),
+    se_type = x[["se_type"]],
+    condition2 = x[["condition2"]],
+    condition1 = x[["condition1"]],
+    stringsAsFactors = FALSE
+  )
+}
diff --git a/R/S3_print.R b/R/S3_print.R
@@ -8,9 +8,7 @@ print.iv_robust <- function(x, ...) {
   print(summarize_tidy(x))
 }
 
-print_summary_lm_like <- function(x,
-                                  digits,
-                                  ...) {
+print_summary_lm_like <- function(x, digits, signif.stars = getOption("show.signif.stars"), ...) {
   cat(
     "\nCall:\n",
     paste(deparse(x$call, nlines = 5), sep = "\n", collapse = "\n"),
@@ -36,7 +34,7 @@ print_summary_lm_like <- function(x,
 
   print(coef(x), digits = digits)
 
-  fstat <- if (is.numeric(x$fstatistic)) {
+  fstat <- if (is.numeric(x[["fstatistic"]])) {
     paste(
       "\nF-statistic:", formatC(x$fstatistic[1L], digits = digits),
       "on", x$fstatistic[2L], "and", x$fstatistic[3L],
@@ -56,7 +54,7 @@ print_summary_lm_like <- function(x,
     fstat
   )
 
-  if (!is.null(x$proj_fstatistic)) {
+  if (is.numeric(x[["proj_fstatistic"]])) {
     cat(
       "\nMultiple R-squared (proj. model): ",
       formatC(x$proj_r.squared, digits = digits),
@@ -76,21 +74,37 @@ print_summary_lm_like <- function(x,
   }
   cat("\n")
 
+  if (is.numeric(x[["diagnostic_endogeneity_test"]])) {
+    cat("\nDiagnostics:\n")
+    printCoefmat(
+      build_ivreg_diagnostics_mat(x),
+      cs.ind = 1L:2L,
+      tst.ind = 3L,
+      has.Pvalue = TRUE,
+      P.values = TRUE,
+      digits = digits,
+      signif.stars = signif.stars,
+      na.print = "NA",
+      ...
+    )
+  }
   invisible(x)
 }
 
 #' @export
 print.summary.lm_robust <- function(x,
                                     digits = max(3L, getOption("digits") - 3L),
+                                    signif.stars = getOption("show.signif.stars"),
                                     ...) {
   print_summary_lm_like(x, digits, ...)
 }
 
 #' @export
 print.summary.iv_robust <- function(x,
                                     digits = max(3L, getOption("digits") - 3L),
+                                    signif.stars = getOption("show.signif.stars"),
                                     ...) {
-  print_summary_lm_like(x, digits, ...)
+  print_summary_lm_like(x, digits, signif.stars, ...)
 }
 
 #' @export