diff --git a/NEWS.md b/NEWS.md index b69d472f44..ff33dc03d2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # ggplot2 (development version) +* Extended `stat_ecdf()` to calculate the cdf from either x or y instead from y only (@jgjl, #4005). + * Fixed a bug in `labeller()` so that `.default` is passed to `as_labeller()` when labellers are specified by naming faceting variables. (@waltersom, #4031) diff --git a/R/stat-ecdf.r b/R/stat-ecdf.r index a974719dde..8f6654c67e 100644 --- a/R/stat-ecdf.r +++ b/R/stat-ecdf.r @@ -7,6 +7,11 @@ #' The downside is that it requires more training to accurately interpret, #' and the underlying visual tasks are somewhat more challenging. #' +#' The statistic relies on the aesthetics assignment to guess which variable to +#' use as the input and which to use as the output. Either x or y must be provided +#' and one of them must be unused. The ECDF will be calculated on the given aesthetic +#' and will be output on the unused one. +#' #' @inheritParams layer #' @inheritParams geom_point #' @param na.rm If `FALSE` (the default), removes missing values with @@ -17,7 +22,6 @@ #' and (Inf, 1) #' @section Computed variables: #' \describe{ -#' \item{x}{x in data} #' \item{y}{cumulative density corresponding x} #' } #' @export @@ -67,7 +71,24 @@ stat_ecdf <- function(mapping = NULL, data = NULL, #' @usage NULL #' @export StatEcdf <- ggproto("StatEcdf", Stat, - compute_group = function(data, scales, n = NULL, pad = TRUE) { + required_aes = c("x|y"), + + default_aes = aes(y = after_stat(y)), + + setup_params = function(data, params) { + params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE, main_is_continuous = TRUE) + + has_x <- !(is.null(data$x) && is.null(params$x)) + has_y <- !(is.null(data$y) && is.null(params$y)) + if (!has_x && !has_y) { + abort("stat_ecdf() requires an x or y aesthetic.") + } + + params + }, + + compute_group = function(data, scales, n = NULL, pad = TRUE, flipped_aes = FALSE) { + data <- flip_data(data, flipped_aes) # If n is NULL, use raw values; otherwise interpolate if (is.null(n)) { x <- unique(data$x) @@ -78,13 +99,11 @@ StatEcdf <- ggproto("StatEcdf", Stat, if (pad) { x <- c(-Inf, x, Inf) } - y <- ecdf(data$x)(x) - - new_data_frame(list(x = x, y = y), n = length(x)) - }, - - default_aes = aes(y = after_stat(y)), + data_ecdf <- ecdf(data$x)(x) - required_aes = c("x") + df_ecdf <- new_data_frame(list(x = x, y = data_ecdf), n = length(x)) + df_ecdf$flipped_aes <- flipped_aes + flip_data(df_ecdf, flipped_aes) + } ) diff --git a/man/stat_ecdf.Rd b/man/stat_ecdf.Rd index 12afbfb07e..58423c3d41 100644 --- a/man/stat_ecdf.Rd +++ b/man/stat_ecdf.Rd @@ -76,10 +76,15 @@ tuning parameters and handles both continuous and categorical variables. The downside is that it requires more training to accurately interpret, and the underlying visual tasks are somewhat more challenging. } +\details{ +The statistic relies on the aesthetics assignment to guess which variable to +use as the input and which to use as the output. Either x or y must be provided +and one of them must be unused. The ECDF will be calculated on the given aesthetic +and will be output on the unused one. +} \section{Computed variables}{ \describe{ -\item{x}{x in data} \item{y}{cumulative density corresponding x} } } diff --git a/tests/testthat/test-stat-ecdf.R b/tests/testthat/test-stat-ecdf.R new file mode 100644 index 0000000000..4e56e3625b --- /dev/null +++ b/tests/testthat/test-stat-ecdf.R @@ -0,0 +1,16 @@ +context("stat_ecdf") + +test_that("stat_ecdf works in both directions", { + p <- ggplot(mpg, aes(hwy)) + stat_ecdf() + x <- layer_data(p) + expect_false(x$flipped_aes[1]) + + p <- ggplot(mpg, aes(y = hwy)) + stat_ecdf() + y <- layer_data(p) + expect_true(y$flipped_aes[1]) + + x$flipped_aes <- NULL + y$flipped_aes <- NULL + expect_identical(x, flip_data(y, TRUE)[,names(x)]) +}) +