Merge pull request #216 from q-w-a/se_subscripts

Response to Feature Request: Adding error subscripts option
datalorax · Mar 14, 2022 · 6e71b47 · 6e71b47
2 parents c7e2189 + 19aab24
commit 6e71b47
Show file tree

Hide file tree

Showing 9 changed files with 441 additions and 6 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -45,7 +45,12 @@ Authors@R:
              family = "Patil",
              role = "ctb",
              email = "patilindrajeet.science@gmail.com",
-             comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")))
+             comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
+      person(given = "Quinn",
+             family = "White",
+             role = "ctb",
+             email = "quinnarlise@gmail.com",
+             comment = c(ORCID = "https://orcid.org/0000-0001-5399-0237")))
 Description: The goal of 'equatiomatic' is to reduce the pain
     associated with writing 'LaTeX' formulas from fitted models. The
     primary function of the package, extract_eq(), takes a fitted model

diff --git a/NEWS.md b/NEWS.md
@@ -6,6 +6,7 @@
 * Bug fix: Prior versions did not escape characters in multilevel models when
   declaring the grouping factor (e.g., `for census_division l = 1` is now 
   rendered as `for census\_division l = 1`).
+* Feature addition: added `se_subscripts` argument, which allows the standard error for each coefficient to be included in parentheses below the coefficient when `se_subscripts = TRUE`. This is supported for `lm` and `glm` models.
 
 # equatiomatic 0.3.0
 * Export new `renderEq()` and `eqOutput()` functions for working with equatiomatic with shiny.

diff --git a/R/extract_eq.R b/R/extract_eq.R
@@ -72,7 +72,7 @@
 #'   estimates be included in the equation instead of math symbols?
 #' @param coef_digits Integer, defaults to 2. The number of decimal places to
 #'   round to when displaying model estimates.
-#' @param fix_signs Logical, defaults to \code{FALSE}. If disabled,
+#' @param fix_signs Logical, defaults to \code{TRUE}. If disabled,
 #'   coefficient estimates that are negative are preceded with a "+" (e.g.
 #'   `5(x) + -3(z)`). If enabled, the "+ -" is replaced with a "-" (e.g.
 #'   `5(x) - 3(z)`).
@@ -90,6 +90,9 @@
 #'   effects model (e.g., \code{lme4::lmer()}), should the variances and
 #'   co-variances be returned? If \code{FALSE} (the default) standard deviations
 #'   and correlations are returned instead.
+#' @param se_subscripts Logical. If \code{se_subscripts = TRUE} then the 
+#'   equation will include the standard errors below each coefficient. 
+#'   This is supported for lm and glm models.
 #' @param ... Additional arguments (for future development; not currently used).
 #' @export
 #'
@@ -159,7 +162,8 @@ extract_eq <- function(model, intercept = "alpha", greek = "beta",
                        operator_location = "end", align_env = "aligned",
                        use_coefs = FALSE, coef_digits = 2,
                        fix_signs = TRUE, font_size = NULL,
-                       mean_separate, return_variances = FALSE, ...) {
+                       mean_separate, return_variances = FALSE,
+                       se_subscripts = FALSE, ...) {
   UseMethod("extract_eq", model)
 }
 
@@ -179,21 +183,26 @@ extract_eq.default <- function(model, intercept = "alpha", greek = "beta",
                                operator_location = "end", align_env = "aligned",
                                use_coefs = FALSE, coef_digits = 2,
                                fix_signs = TRUE, font_size = NULL,
-                               mean_separate, return_variances = FALSE, ...) {
+                               mean_separate, return_variances = FALSE, 
+                               se_subscripts = FALSE, ...) {
   if (index_factors & use_coefs) {
     stop("Coefficient estimates cannot be returned when factors are indexed.")
   }
 
   lhs <- extract_lhs(model, ital_vars, show_distribution, use_coefs, 
                      swap_var_names, var_colors)
   rhs <- extract_rhs(model, index_factors)
-
+  
   eq_raw <- create_eq(
     model, lhs, rhs, ital_vars, use_coefs, coef_digits,
     fix_signs, intercept, greek, 
     greek_colors, subscript_colors, var_colors, var_subscript_colors, raw_tex,
     index_factors, swap_var_names, swap_subscript_names
   )
+
+  if (se_subscripts) {
+    eq_raw$rhs[[1]] <- add_se(eq_raw$rhs[[1]], model)
+  }
 
   if (wrap) {
     if (operator_location == "start") {
@@ -244,6 +253,10 @@ extract_eq.default <- function(model, intercept = "alpha", greek = "beta",
   if (use_coefs && fix_signs) {
     eq <- lapply(eq, fix_coef_signs)
   }
+
+  if (use_coefs && fix_signs && se_subscripts) {
+    eq <- lapply(eq, fix_coef_signs_se)
+  }
 
   if (length(eq) > 1) {
     eq <- paste(eq, collapse = " \\\\\n")
@@ -290,6 +303,8 @@ extract_eq.default <- function(model, intercept = "alpha", greek = "beta",
   return(eq)
 }
 
+
+
 # These args still need to be incorporated
 # intercept, greek, raw_tex
 # I haven't incorporated wrap yet either and we should think about if we want to
@@ -501,3 +516,39 @@ extract_eq.forecast_ARIMA <- function(model, intercept = "alpha", greek = "beta"
   # Explicit return
   return(eq)
 }
+
+#' Add Standard Errors Below Coefficients
+#' 
+#' @param coef character vector of model coefficients (from output of the function create_eq)
+#' @param model a fitted model 
+#' 
+#' @return a character vector adding the errors beneath each term 
+add_se <- function(coef, model) {
+  errors <- summary(model)$coefficients[,"Std. Error"]
+  errors <- as.character(round(errors, 3))
+  if (length(coef) != length(errors)) {
+    result <- paste0("\\underset{(", errors, ")}{", coef[-length(coef)], "}")
+    result <- c(result, coef[length(coef)])
+  }
+  else {
+    result <- paste0("\\underset{(", errors, ")}{", coef, "}")
+  }
+  return(result)
+}
+
+
+
+#' Fixes the Signs When Using se_subscripts Argument
+#' 
+#' @param equation list that contains the equation
+#' 
+#' @return a list containing the equation with fixed signs  
+fix_coef_signs_se <- function(equation) {
+  components <- strsplit(equation, " + ", fixed = TRUE)
+  components <- unlist(components)
+  terms <- components[2:length(components)]
+  negative <- ifelse(grepl(terms, pattern = "-"), " - ", " + ")
+  terms <- lapply(X = terms, FUN = gsub,  pattern = "-", replacement = "")
+  terms <- paste0(negative, terms, collapse="")
+  list(paste(components[1], terms))
+}
diff --git a/man/add_se.Rd b/man/add_se.Rd
diff --git a/man/equatiomatic-package.Rd b/man/equatiomatic-package.Rd
diff --git a/man/extract_eq.Rd b/man/extract_eq.Rd
diff --git a/man/fix_coef_signs_se.Rd b/man/fix_coef_signs_se.Rd
diff --git a/tests/testthat/_snaps/se-subscripts.md b/tests/testthat/_snaps/se-subscripts.md
@@ -0,0 +1,143 @@
+# se_subscripts argument works with numeric coefficients
+
+    $$
+    \operatorname{\widehat{disp}} = \underset{(116.748)}{67.8}  + \underset{(9.962)}{45.56(\operatorname{cyl})} - \underset{(2.952)}{5.92(\operatorname{mpg})}
+    $$
+
+---
+
+    $$
+    E( \operatorname{disp} ) = \underset{(116.748)}{\alpha} + \underset{(9.962)}{\beta_{1}(\operatorname{cyl})} + \underset{(2.952)}{\beta_{2}(\operatorname{mpg})}
+    $$
+
+---
+
+    $$
+    \log\left[ \frac { P( \operatorname{am} = \operatorname{1} ) }{ 1 - P( \operatorname{am} = \operatorname{1} ) } \right] = \underset{(12.194)}{\alpha} + \underset{(0.239)}{\beta_{1}(\operatorname{mpg})} + \underset{(2.547)}{\beta_{2}(\operatorname{wt})}
+    $$
+
+# se_subscripts argument works with wrapping
+
+    $$
+    \begin{aligned}
+    \operatorname{\widehat{disp}} &= \underset{(119.073)}{60.8}\  + \\
+    &\quad \underset{(11.801)}{42.46(\operatorname{cyl})}\ - \\
+    &\quad \underset{(3.073)}{5.56(\operatorname{mpg})}\ + \\
+    &\quad \underset{(0.255)}{0.13(\operatorname{hp})}
+    \end{aligned}
+    $$
+
+---
+
+    $$
+    \begin{aligned}
+    \widehat{E( \operatorname{disp} )} &= \underset{(116.748)}{67.8}\  + \\
+    &\quad \underset{(9.962)}{45.56(\operatorname{cyl})}\ - \\
+    &\quad \underset{(2.952)}{5.92(\operatorname{mpg})}
+    \end{aligned}
+    $$
+
+---
+
+    $$
+    \begin{aligned}
+    \widehat{E( \operatorname{disp} )} &= \underset{(116.748)}{67.8}\  + \\
+    &\quad \underset{(9.962)}{45.56(\operatorname{cyl})}\ - \\
+    &\quad \underset{(2.952)}{5.92(\operatorname{mpg})}
+    \end{aligned}
+    $$
+
+---
+
+    $$
+    \begin{aligned}
+    \log\left[ \frac { \widehat{P( \operatorname{am} = \operatorname{1} )} }{ 1 - \widehat{P( \operatorname{am} = \operatorname{1} )} } \right] &= \underset{(12.194)}{25.89}\  - \\
+    &\quad \underset{(0.239)}{0.32(\operatorname{mpg})}\ - \\
+    &\quad \underset{(2.547)}{6.42(\operatorname{wt})}
+    \end{aligned}
+    $$
+
+# se_subscripts argument works with Greek letter terms
+
+    $$
+    \operatorname{disp} = \underset{(119.073)}{\alpha} + \underset{(11.801)}{\beta_{1}(\operatorname{cyl})} + \underset{(3.073)}{\beta_{2}(\operatorname{mpg})} + \underset{(0.255)}{\beta_{3}(\operatorname{hp})} + \epsilon
+    $$
+
+---
+
+    $$
+    \begin{aligned}
+    \operatorname{disp} &= \underset{(119.073)}{\alpha}\ + \\
+    &\quad \underset{(11.801)}{\beta_{1}(\operatorname{cyl})}\ + \\
+    &\quad \underset{(3.073)}{\beta_{2}(\operatorname{mpg})}\ + \\
+    &\quad \underset{(0.255)}{\beta_{3}(\operatorname{hp})}\ + \\
+    &\quad \epsilon
+    \end{aligned}
+    $$
+
+---
+
+    $$
+    \begin{aligned}
+    E( \operatorname{disp} ) &= \underset{(116.748)}{\alpha}\ + \\
+    &\quad \underset{(9.962)}{\beta_{1}(\operatorname{cyl})}\ + \\
+    &\quad \underset{(2.952)}{\beta_{2}(\operatorname{mpg})}
+    \end{aligned}
+    $$
+
+---
+
+    $$
+    \begin{aligned}
+    \log\left[ \frac { P( \operatorname{am} = \operatorname{1} ) }{ 1 - P( \operatorname{am} = \operatorname{1} ) } \right] &= \underset{(12.194)}{\alpha}\ + \\
+    &\quad \underset{(0.239)}{\beta_{1}(\operatorname{mpg})}\ + \\
+    &\quad \underset{(2.547)}{\beta_{2}(\operatorname{wt})}
+    \end{aligned}
+    $$
+
+# se_subscripts argument works with transformed variables
+
+    $$
+    \operatorname{\widehat{disp}} = \underset{(226.662)}{481.42}  + \underset{(10.942)}{37.32(\operatorname{cyl})} - \underset{(59.243)}{165.48(\operatorname{\log(mpg)})} + \underset{(0.241)}{0.05(\operatorname{hp})}
+    $$
+
+---
+
+    $$
+    \begin{aligned}
+    \operatorname{\widehat{disp}} &= \underset{(226.662)}{481.42}\  + \\
+    &\quad \underset{(10.942)}{37.32(\operatorname{cyl})}\ - \\
+    &\quad \underset{(59.243)}{165.48(\operatorname{\log(mpg)})}\ + \\
+    &\quad \underset{(0.241)}{0.05(\operatorname{hp})}
+    \end{aligned}
+    $$
+
+---
+
+    $$
+    \widehat{E( \operatorname{disp} )} = \underset{(226.662)}{481.42}  + \underset{(10.942)}{37.32(\operatorname{cyl})} - \underset{(59.243)}{165.48(\operatorname{\log(mpg)})} + \underset{(0.241)}{0.05(\operatorname{hp})}
+    $$
+
+---
+
+    $$
+    \begin{aligned}
+    \widehat{E( \operatorname{disp} )} &= \underset{(226.662)}{481.42}\  + \\
+    &\quad \underset{(10.942)}{37.32(\operatorname{cyl})}\ - \\
+    &\quad \underset{(59.243)}{165.48(\operatorname{\log(mpg)})}\ + \\
+    &\quad \underset{(0.241)}{0.05(\operatorname{hp})}
+    \end{aligned}
+    $$
+
+# se_arguments works with factor variable
+
+    $$
+    \widehat{E( \operatorname{disp} )} = \underset{(224.642)}{427.66}  + \underset{(12.253)}{21.95(\operatorname{cyl})} - \underset{(61.235)}{120.45(\operatorname{\log(mpg)})} + \underset{(0.32)}{0.38(\operatorname{hp})} - \underset{(26.627)}{58.06(\operatorname{factor(gear)}_{\operatorname{4}})} - \underset{(35.762)}{66.88(\operatorname{factor(gear)}_{\operatorname{5}})}
+    $$
+
+---
+
+    $$
+    E( \operatorname{disp} ) = \underset{(224.642)}{\alpha} + \underset{(12.253)}{\beta_{1}(\operatorname{cyl})} + \underset{(61.235)}{\beta_{2}(\operatorname{\log(mpg)})} + \underset{(0.32)}{\beta_{3}(\operatorname{hp})} + \underset{(26.627)}{\beta_{4}(\operatorname{factor(gear)}_{\operatorname{4}})} + \underset{(35.762)}{\beta_{5}(\operatorname{factor(gear)}_{\operatorname{5}})}
+    $$
+