Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add glance methods and IV diagnostics #285

Merged
merged 30 commits into from
Feb 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
de8d78f
remove unnecessary items in summary list when no FEs
lukesonnet Jan 24, 2019
b9e7f0b
Add glance for lm_robust and some simple tests #282
lukesonnet Jan 24, 2019
78f61ad
Add diagnostics
lukesonnet Feb 3, 2019
70e673b
add glances and iv diagnostics
lukesonnet Feb 4, 2019
ec97123
proper weak instrument fstats; refactor fstats
lukesonnet Feb 6, 2019
cf70978
debug diagnostics; tests for simple HC0 and classical SEs working
lukesonnet Feb 6, 2019
79af875
Change default diagnostic to FALSE
lukesonnet Feb 6, 2019
37ba390
remove residuals and fix tests
lukesonnet Feb 7, 2019
1cbf941
fix overid test with robust ses; clean up iv diagnostic code
lukesonnet Feb 18, 2019
ecc0dad
progress on stata tests
lukesonnet Feb 18, 2019
b1a20d4
fix robust endogeneity test
lukesonnet Feb 19, 2019
534a3ac
set up diagnostics tests
lukesonnet Feb 19, 2019
e5bb199
finish basic testing
lukesonnet Feb 19, 2019
480a86b
broken paths
lukesonnet Feb 19, 2019
8cf3b13
Fix glance and tests
lukesonnet Feb 19, 2019
c029e17
glance.iv_robust test
lukesonnet Feb 19, 2019
0b25b17
HT glance
lukesonnet Feb 20, 2019
d8b0b06
fix testing
lukesonnet Feb 20, 2019
58847e1
print iv diagnostics if they exist
lukesonnet Feb 20, 2019
e5dde45
Finalize tests and exceptions for diagnostics
lukesonnet Feb 25, 2019
ee9da78
Add weights to docs
lukesonnet Feb 25, 2019
76724bc
Document diagnostic return value
lukesonnet Feb 25, 2019
95a3a66
Clean up glance
lukesonnet Feb 26, 2019
ea6c791
Clean up glance
lukesonnet Feb 26, 2019
afb9711
Clean up print
lukesonnet Feb 26, 2019
1d76100
Clean up dim
lukesonnet Feb 26, 2019
219d290
Add libgit2
lukesonnet Feb 26, 2019
b75b86e
fix exports
lukesonnet Feb 26, 2019
6fbc9c4
update description and news
lukesonnet Feb 27, 2019
2748c69
edit travis badge
lukesonnet Feb 27, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ update_repo.R
^tests/testthat/test-zzzbroom\.R$
^estimatr_.*\.tar\.gz$
^tests/testthat/test-texreg\.R$
^tests/testthat/test-gtsummary\.R$
^cran-comments\.md$

4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ matrix:
- os: osx
r: release
if: branch = master
brew_packages: libgit2

- os: osx
r: 3.4
if: branch = master
brew_packages: libgit2

env:
global:
Expand All @@ -47,6 +49,8 @@ addons:
r_github_packages:
- DeclareDesign/DDtools
- ropensci/git2r
- rstudio/gt
- vincentarelbundock/gtsummary

after_success:
- Rscript -e DDtools::after_build
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: estimatr
Type: Package
Title: Fast Estimators for Design-Based Inference
Version: 0.14
Date: 2018-10-29
Version: 0.15
Date: 2019-02-27
Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@ucla.edu", role = c("aut", "cre")),
person("Jasper", "Cooper", email = "jjc2247@columbia.edu", role = c("aut")),
person("Alexander", "Coppock", email = "alex.coppock@yale.edu", role = c("aut")),
Expand Down
11 changes: 11 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ S3method(confint,difference_in_means)
S3method(confint,horvitz_thompson)
S3method(confint,iv_robust)
S3method(confint,lm_robust)
S3method(glance,difference_in_means)
S3method(glance,horvitz_thompson)
S3method(glance,iv_robust)
S3method(glance,lm_robust)
S3method(nobs,iv_robust)
S3method(nobs,lm_robust)
S3method(nobs,summary.lm_robust)
Expand Down Expand Up @@ -33,6 +37,7 @@ export(difference_in_means)
export(extract.iv_robust)
export(extract.lm_robust)
export(gen_pr_matrix_cluster)
export(glance)
export(horvitz_thompson)
export(iv_robust)
export(lm_lin)
Expand All @@ -43,6 +48,7 @@ export(starprep)
export(tidy)
importFrom(Formula,as.Formula)
importFrom(Rcpp,evalCpp)
importFrom(generics,glance)
importFrom(generics,tidy)
importFrom(methods,className)
importFrom(methods,isGeneric)
Expand All @@ -66,20 +72,25 @@ importFrom(stats,df.residual)
importFrom(stats,fitted.values)
importFrom(stats,formula)
importFrom(stats,lm)
importFrom(stats,lm.fit)
importFrom(stats,model.extract)
importFrom(stats,model.frame)
importFrom(stats,model.frame.default)
importFrom(stats,model.matrix)
importFrom(stats,model.matrix.default)
importFrom(stats,model.matrix.lm)
importFrom(stats,model.response)
importFrom(stats,na.omit)
importFrom(stats,na.pass)
importFrom(stats,nobs)
importFrom(stats,pchisq)
importFrom(stats,pf)
importFrom(stats,printCoefmat)
importFrom(stats,pt)
importFrom(stats,qt)
importFrom(stats,reformulate)
importFrom(stats,resid)
importFrom(stats,residuals)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,terms)
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# estimatr 0.14.0
# estimatr 0.15.0 (GitHub)

* Add `diagnostics` to `iv_robust()`
* Add `glance()` methods for all estimators

# estimatr 0.14.0 (CRAN)

* Removes `broom` hack for `tidy` method and instead relies on importing `generics`

Expand Down
182 changes: 182 additions & 0 deletions R/S3_glance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# Helpers to retrieve values
retrieve_value <- function(x, what) if(exists(what, x)) x[[what]] else NA_real_
retrieve_fstatistic <- function(x) {
if (exists("fstatistic", x)) {
data.frame(
statistic = x[["fstatistic"]][1],
p.value = pf(x[["fstatistic"]][1], x[["fstatistic"]][2], x[["fstatistic"]][3], lower.tail = FALSE)
)
} else {
data.frame(statistic = NA_real_, p.value = NA_real_)
}
}

#' @importFrom generics glance
#' @export
generics::glance

#' Glance at an estimatr object
#' @name estimatr_glancers
#' @templateVar class lm_robust
#' @return For \code{glance.lm_robust}, a data.frame with columns:
#' \item{r.squared}{the \eqn{R^2},
#' \deqn{R^2 = 1 - Sum(e[i]^2) / Sum((y[i] - y^*)^2),} where \eqn{y^*}
#' is the mean of \eqn{y[i]} if there is an intercept and zero otherwise,
#' and \eqn{e[i]} is the ith residual.}
#' \item{adj.r.squared}{the \eqn{R^2} but penalized for having more parameters, \code{rank}}
#' \item{se_type}{the standard error type specified by the user}
#' \item{statistic}{the value of the F-statistic}
#' \item{p.value}{p-value from the F test}
#' \item{df.residual}{residual degrees of freedom}
#' \item{N}{the number of observations used}
#'
#' @param x An object returned by one of the estimators
#' @param ... extra arguments (not used)
#'
#' @export
#' @family estimatr glancers
#' @seealso [generics::glance()], [estimatr::lm_robust()], [estimatr::lm_lin()], [estimatr::iv_robust()], [estimatr::difference_in_means()], [estimatr::horvitz_thompson()]
#' @md
glance.lm_robust <- function(x, ...) {

if (length(x[["outcome"]]) > 1) {
stop("Cannot use `glance` on linear models with multiple responses.")
}

ret <- cbind(
data.frame(
r.squared = x[["r.squared"]],
adj.r.squared = x[["adj.r.squared"]]
),
retrieve_fstatistic(x),
data.frame(
df.residual = x[["df"]][1],
N = as.integer(x[["N"]]),
se_type = x[["se_type"]],
stringsAsFactors = FALSE
)
)

rownames(ret) <- NULL

ret
}

#' @name estimatr_glancers
#' @templateVar class iv_robust
#' @return For \code{glance.iv_robust}, a data.frame with columns:
#' \item{r.squared}{The \eqn{R^2} of the second stage regression}
#' \item{adj.r.squared}{The \eqn{R^2} but penalized for having more parameters, \code{rank}}
#' \item{df.residual}{residual degrees of freedom}
#' \item{N}{the number of observations used}
#' \item{se_type}{the standard error type specified by the user}
#' \item{statistic}{the value of the F-statistic}
#' \item{p.value}{p-value from the F test}
#' \item{statistic.weakinst}{the value of the first stage F-statistic, useful for the weak instruments test; only reported if there is only one endogenous variable}
#' \item{p.value.weakinst}{p-value from the first-stage F test, a test of weak instruments; only reported if there is only one endogenous variable}
#' \item{statistic.endogeneity}{the value of the F-statistic for the test of endogeneity; often called the Wu-Hausman statistic, with robust standard errors, we employ the regression based test}
#' \item{p.value.endogeneity}{p-value from the F-test for endogeneity}
#' \item{statistic.overid}{the value of the chi-squared statistic for the test of instrument correlation with the error term; only reported with overidentification}
#' \item{p.value.overid}{p-value from the chi-squared test; only reported with overidentification}
#'
#' @inheritParams glance.lm_robust
#'
#' @export
#' @family estimatr glancers
#' @md
glance.iv_robust <- function(x, ...) {

if (length(x[["outcome"]]) > 1) {
stop("Cannot use `glance` on linear models with multiple responses.")
}

ret <- cbind(
data.frame(
r.squared = x[["r.squared"]],
adj.r.squared = x[["adj.r.squared"]],
df.residual = x[["df.residual"]],
N = as.integer(x[["N"]]),
se_type = x[["se_type"]],
stringsAsFactors = FALSE
),
retrieve_fstatistic(x),
if (exists("diagnostic_firststage_fstatistic", x) && length(x[["diagnostic_firststage_fstatistic"]] == 4)) {
data.frame(
statistic.weakinst = x[["diagnostic_firststage_fstatistic"]]["value"],
p.value.weakinst = x[["diagnostic_firststage_fstatistic"]]["p.value"]
)
} else {
data.frame(statistic.weakinst = NA_real_, p.value.weakinst = NA_real_)
},
if (exists("diagnostic_endogeneity_fstatistic", x)) {
data.frame(
statistic.endogeneity = x[["diagnostic_endogeneity_fstatistic"]]["value"],
p.value.endogeneity = x[["diagnostic_endogeneity_fstatistic"]]["p.value"]
)
} else {
data.frame(statistic.endogeneity = NA_real_, p.value.endogeneity = NA_real_)
},
if (exists("diagnostic_overid_fstatistic", x)) {
data.frame(
statistic.overid = x[["diagnostic_overid_fstatistic"]]["value"],
p.value.overid = x[["diagnostic_overid_fstatistic"]]["p.value"]
)
} else {
data.frame(statistic.overid = NA_real_, p.value.overid = NA_real_)
}
)

ret
}

#' @name estimatr_glancers
#' @templateVar class difference_in_means
#' @return For \code{glance.difference_in_means}, a data.frame with columns:
#' \item{design}{the design used, and therefore the estimator used}
#' \item{df}{the degrees of freedom}
#' \item{N}{the number of observations used}
#' \item{N_blocks}{the number of blocks, if used}
#' \item{N_clusters}{the number of clusters, if used}
#' \item{condition2}{the second, "treatment", condition}
#' \item{condition1}{the first, "control", condition}
#'
#' @inheritParams glance.lm_robust
#'
#' @export
#' @family estimatr glancers
#' @md
glance.difference_in_means <- function(x, ...) {
data.frame(
design = x[["design"]],
df = x[["df"]],
N = as.integer(x[["N"]]),
N_blocks = retrieve_value(x, "N_blocks"),
N_clusters = retrieve_value(x, "N_clusters"),
condition2 = x[["condition2"]],
condition1 = x[["condition1"]],
stringsAsFactors = FALSE
)
}

#' @name estimatr_glancers
#' @templateVar class horvitz_thompson
#' @return For \code{glance.horvitz_thompson}, a data.frame with columns:
#' \item{N}{the number of observations used}
#' \item{se_type}{the type of standard error estimator used}
#' \item{condition2}{the second, "treatment", condition}
#' \item{condition1}{the first, "control", condition}
#'
#' @inheritParams glance.lm_robust
#'
#' @export
#' @family estimatr glancers
#' @md
glance.horvitz_thompson <- function(x, ...) {
data.frame(
N = as.integer(x[["N"]]),
se_type = x[["se_type"]],
condition2 = x[["condition2"]],
condition1 = x[["condition1"]],
stringsAsFactors = FALSE
)
}
26 changes: 20 additions & 6 deletions R/S3_print.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ print.iv_robust <- function(x, ...) {
print(summarize_tidy(x))
}

print_summary_lm_like <- function(x,
digits,
...) {
print_summary_lm_like <- function(x, digits, signif.stars = getOption("show.signif.stars"), ...) {
cat(
"\nCall:\n",
paste(deparse(x$call, nlines = 5), sep = "\n", collapse = "\n"),
Expand All @@ -36,7 +34,7 @@ print_summary_lm_like <- function(x,

print(coef(x), digits = digits)

fstat <- if (is.numeric(x$fstatistic)) {
fstat <- if (is.numeric(x[["fstatistic"]])) {
paste(
"\nF-statistic:", formatC(x$fstatistic[1L], digits = digits),
"on", x$fstatistic[2L], "and", x$fstatistic[3L],
Expand All @@ -56,7 +54,7 @@ print_summary_lm_like <- function(x,
fstat
)

if (!is.null(x$proj_fstatistic)) {
if (is.numeric(x[["proj_fstatistic"]])) {
cat(
"\nMultiple R-squared (proj. model): ",
formatC(x$proj_r.squared, digits = digits),
Expand All @@ -76,21 +74,37 @@ print_summary_lm_like <- function(x,
}
cat("\n")

if (is.numeric(x[["diagnostic_endogeneity_test"]])) {
cat("\nDiagnostics:\n")
printCoefmat(
build_ivreg_diagnostics_mat(x),
cs.ind = 1L:2L,
tst.ind = 3L,
has.Pvalue = TRUE,
P.values = TRUE,
digits = digits,
signif.stars = signif.stars,
na.print = "NA",
...
)
}
invisible(x)
}

#' @export
print.summary.lm_robust <- function(x,
digits = max(3L, getOption("digits") - 3L),
signif.stars = getOption("show.signif.stars"),
...) {
print_summary_lm_like(x, digits, ...)
}

#' @export
print.summary.iv_robust <- function(x,
digits = max(3L, getOption("digits") - 3L),
signif.stars = getOption("show.signif.stars"),
...) {
print_summary_lm_like(x, digits, ...)
print_summary_lm_like(x, digits, signif.stars, ...)
}

#' @export
Expand Down
Loading