Skip to content

Commit

Permalink
Add by-arg (#358)
Browse files Browse the repository at this point in the history
* Add by-arg

* ver

* add sanity checks

* comment code

* fix

* comment

* add tests

* news, docs
  • Loading branch information
strengejacke authored Aug 2, 2023
1 parent a328f60 commit bdbf697
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: ggeffects
Type: Package
Encoding: UTF-8
Title: Create Tidy Data Frames of Marginal Effects for 'ggplot' from Model Outputs
Version: 1.2.3.9
Version: 1.2.3.10
Authors@R: c(
person("Daniel", "Lüdecke", role = c("aut", "cre"), email = "d.luedecke@uke.de", comment = c(ORCID = "0000-0002-8895-3206")),
person("Frederik", "Aust", role = "ctb", comment = c(ORCID = "0000-0003-4900-788X")),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

* `tibbles` are always converted into data frames, to avoid issues.

* `hypothesis_test()` gains a `by` argument, to specify a variable that is used
to group the comparisons or contrasts. This is useful for models with interaction
terms.

## Bug fixes

* Plotting residuals did not work when model object passed to `ggpredict()`
Expand Down
75 changes: 69 additions & 6 deletions R/hypothesis_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
#' contrasts or comparisons for the *slopes* of this numeric predictor are
#' computed (possibly grouped by the levels of further categorical focal
#' predictors).
#' @param by Character vector specifying the names of predictors to condition on.
#' Hypothesis test is then carried out for focal terms by each level of `by`
#' variables. This is useful especially for interaction terms, where we want
#' to test the interaction within "groups". `by` is only relevant for
#' categorical predictors.
#' @param scale Character string, indicating the scale on which the contrasts
#' or comparisons are represented. Can be `"response"` (default), which would
#' return contrasts on the response scale (e.g. for logistic regression, as
Expand Down Expand Up @@ -122,6 +127,9 @@
#' # p-value adjustment
#' hypothesis_test(m, c("c161sex", "c172code"), p_adjust = "tukey")
#'
#' # not all comparisons, only by specific group levels
#' hypothesis_test(m, "c172code", by = "c161sex")
#'
#' # specific comparisons
#' hypothesis_test(m, c("c161sex", "c172code"), test = "b2 = b1")
#'
Expand All @@ -139,6 +147,7 @@ hypothesis_test <- function(model, ...) {
#' @export
hypothesis_test.default <- function(model,
terms = NULL,
by = NULL,
test = "pairwise",
equivalence = NULL,
scale = "response",
Expand Down Expand Up @@ -205,7 +214,12 @@ hypothesis_test.default <- function(model,
need_average_predictions <- insight::is_mixed_model(model)
msg_intervals <- FALSE

# we want contrasts or comparisons for these focal predictors...
# by-variables are included in terms
if (!is.null(by)) {
terms <- unique(c(terms, by))
}

# we want contrasts or comparisons for these focal predictors...
focal <- .clean_terms(terms)

# check if we have a mixed model - in this case, we need to ensure that our
Expand All @@ -222,6 +236,27 @@ hypothesis_test.default <- function(model,
}
grid <- data_grid(model, terms, ...)

# check for valid by-variable
if (!is.null(by)) {
# all by-terms need to be in data grid
if (!all(by %in% colnames(grid))) {
insight::format_error(
paste0("Variable(s) `", toString(by[!by %in% colnames(grid)]), "` not found in data grid.")
)
}
# by-terms must be categorical
by_factors <- vapply(grid[by], is.factor, TRUE)
if (!all(by_factors)) {
insight::format_error(
"All variables in `by` must be categorical.",
paste0(
"The following variables in `by` are not categorical: ",
toString(paste0("`", by[!by_factors], "`"))
)
)
}
}

by_arg <- NULL
# for models with ordinal/categorical outcome, we need focal terms and
# response variable as "by" argument
Expand Down Expand Up @@ -580,11 +615,6 @@ hypothesis_test.default <- function(model,
out$conf.high <- .comparisons$conf.high
out$p.value <- .comparisons$p.value

# p-value adjustment?
if (!is.null(p_adjust)) {
out <- .p_adjust(out, p_adjust, .comparisons$statistic, grid, focal, df, verbose)
}

# for pairwise comparisons, we may have comparisons inside one level when we
# have multiple focal terms, like "1-1" and "a-b". In this case, the comparison
# of 1 to 1 ("1-1") is just the contrast for the level "1", we therefore can
Expand All @@ -602,6 +632,37 @@ hypothesis_test.default <- function(model,
}
}

# filter by-variables?
if (!is.null(by)) {
for (by_factor in by) {
# values in "by" are character vectors, which are saved as "level-level".
# we now extract the unique values, and filter the data frame
unique_values <- unique(grid[[by_factor]])
by_levels <- paste0(unique_values, "-", unique_values)
keep_rows <- out[[by_factor]] %in% c(by_levels, unique_values)
# filter final data frame
out <- out[keep_rows, , drop = FALSE]
# but we also need to filter the ".comparisons" data frame
.comparisons <- .comparisons[keep_rows, , drop = FALSE]
# finally, replace "level-level" just by "level"
for (i in seq_along(by_levels)) {
out[[by_factor]] <- gsub(
by_levels[i],
unique_values[i],
out[[by_factor]],
fixed = TRUE
)
}
}
# remove by-terms from focal terms
focal <- focal[!focal %in% by]
}

# p-value adjustment?
if (!is.null(p_adjust)) {
out <- .p_adjust(out, p_adjust, .comparisons$statistic, grid, focal, df, verbose)
}

# add back response levels?
if ("group" %in% colnames(.comparisons)) {
out <- cbind(
Expand Down Expand Up @@ -638,6 +699,7 @@ hypothesis_test.default <- function(model,
#' @rdname hypothesis_test
#' @export
hypothesis_test.ggeffects <- function(model,
by = NULL,
test = "pairwise",
equivalence = NULL,
p_adjust = NULL,
Expand All @@ -655,6 +717,7 @@ hypothesis_test.ggeffects <- function(model,
hypothesis_test.default(
model,
terms = focal,
by = by,
test = test,
equivalence = equivalence,
p_adjust = p_adjust,
Expand Down
8 changes: 8 additions & 0 deletions man/hypothesis_test.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 38 additions & 3 deletions tests/testthat/test-hypothesis_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ if (suppressWarnings(requiet("testthat") && requiet("ggeffects") && requiet("mar
expect_equal(
out1$p.value,
c(
0.074, 0.3503, 0.5295, 0.7504, 0.5729, 0.009, 0.0258, 0.1497,
0.074, 0.3503, 0.5295, 0.7504, 0.5729, 0.009, 0.0258, 0.1497,
0.2033, 0.8163, 0.2247, 0.137, 0.3676, 0.2538, 0.8215
),
tolerance = 1e-3,
Expand All @@ -47,7 +47,7 @@ if (suppressWarnings(requiet("testthat") && requiet("ggeffects") && requiet("mar
expect_equal(
out2$p.value,
c(
0.4704, 0.9366, 0.9887, 0.9996, 0.9931, 0.0927, 0.2215, 0.6985,
0.4704, 0.9366, 0.9887, 0.9996, 0.9931, 0.0927, 0.2215, 0.6985,
0.7976, 0.9999, 0.8276, 0.6689, 0.9453, 0.862, 0.9999
),
tolerance = 1e-3,
Expand All @@ -69,7 +69,7 @@ if (suppressWarnings(requiet("testthat") && requiet("ggeffects") && requiet("mar

data(iris)
model2 <- lm(Sepal.Width ~ Sepal.Length * Species, data = iris)
test_that("ggpredict, lm", {
test_that("hypothesis_test, interaction", {
out <- hypothesis_test(model2, c("Sepal.Length", "Species"))
expect_identical(colnames(out), c(
"Sepal.Length", "Species", "Contrast", "conf.low", "conf.high",
Expand All @@ -82,6 +82,41 @@ if (suppressWarnings(requiet("testthat") && requiet("ggeffects") && requiet("mar
expect_identical(out$Sepal.Length, c("slope", "slope", "slope"))
})

test_that("hypothesis_test, by-argument", {
skip_if_not_installed("datawizard")
data(efc)
efc$c161sex <- datawizard::to_factor(efc$c161sex)
efc$c172code <- datawizard::to_factor(efc$c172code)

mfilter <- lm(neg_c_7 ~ c161sex * c172code + e42dep + c12hour, data = efc)
prfilter <- ggpredict(mfilter, "c172code")

out <- hypothesis_test(prfilter, by = "c161sex")
expect_identical(nrow(out), 6L)
expect_identical(
out$c172code,
c(
"low level of education-intermediate level of education",
"low level of education-high level of education",
"intermediate level of education-high level of education",
"low level of education-intermediate level of education",
"low level of education-high level of education",
"intermediate level of education-high level of education"
)
)
expect_equal(out$p.value, c(0.3962, 0.6512, 0.7424, 0.9491, 0.0721, 0.0288), tolerance = 1e-3)

out <- hypothesis_test(prfilter, by = "c161sex", p_adjust = "tukey")
expect_equal(out$p.value, c(0.6727, 0.8934, 0.9422, 0.9978, 0.1699, 0.0734), tolerance = 1e-3)

prfilter <- ggpredict(mfilter, c("c172code", "c161sex"))
out <- hypothesis_test(prfilter, p_adjust = "tukey")
out <- out[out$c161sex %in% c("Male-Male", "Female-Female"), , drop = FALSE]
expect_equal(out$p.value, c(0.9581, 0.9976, 0.9995, 1, 0.4657, 0.2432), tolerance = 1e-3)

expect_error(hypothesis_test(mfilter, "c161sex", by = "c12hour"), regex = "categorical")
})

if (suppressWarnings(requiet("lme4"))) {
model3 <- suppressMessages(lmer(outcome ~ groups * episode + sex + (1 | ID), data = d))
test_that("hypothesis_test, categorical, pairwise", {
Expand Down

0 comments on commit bdbf697

Please sign in to comment.