From a8ba9bd873fa6a3026ac21c37a958bef1b245b8f Mon Sep 17 00:00:00 2001 From: Jeremias Blendin <303581+jgjl@users.noreply.github.com> Date: Sat, 16 May 2020 20:32:48 -0700 Subject: [PATCH 1/6] Update stat_ecdf to work either on the x or the y aesthetic, whatever is provided. --- R/stat-ecdf.r | 39 ++++++++++++++++++++++++--------- tests/testthat/test-stat-ecdf.R | 16 ++++++++++++++ 2 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 tests/testthat/test-stat-ecdf.R diff --git a/R/stat-ecdf.r b/R/stat-ecdf.r index ebb4fd3180..72268ad565 100644 --- a/R/stat-ecdf.r +++ b/R/stat-ecdf.r @@ -7,6 +7,11 @@ #' The downside is that it requires more training to accurately interpret, #' and the underlying visual tasks are somewhat more challenging. #' +#' The statistic relies on the aesthetics assignment to guess which variable as +#' input and as output. Either x or y must be provided and one of them must be +#' unused. The ECDF will be caculated on the given aesthetic and will be output +#' on the unused one. +#' #' @inheritParams layer #' @inheritParams geom_point #' @param na.rm If `FALSE` (the default), removes missing values with @@ -17,8 +22,7 @@ #' and (Inf, 1) #' @section Computed variables: #' \describe{ -#' \item{x}{x in data} -#' \item{y}{cumulative density corresponding x} +#' \item{ecdf}{cumulative density corresponding to the input variable} #' } #' @export #' @examples @@ -64,7 +68,24 @@ stat_ecdf <- function(mapping = NULL, data = NULL, #' @usage NULL #' @export StatEcdf <- ggproto("StatEcdf", Stat, - compute_group = function(data, scales, n = NULL, pad = TRUE) { + required_aes = c("x|y"), + + default_aes = aes(x = after_stat(ecdf), y = after_stat(ecdf)), + + setup_params = function(data, params) { + params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE, main_is_continuous = TRUE) + + has_x <- !(is.null(data$x) && is.null(params$x)) + has_y <- !(is.null(data$y) && is.null(params$y)) + if (!has_x && !has_y) { + abort("stat_ecdf() requires an x or y aesthetic.") + } + + params + }, + + compute_group = function(data, scales, n = NULL, pad = TRUE, flipped_aes = FALSE) { + data <- flip_data(data, flipped_aes) # If n is NULL, use raw values; otherwise interpolate if (is.null(n)) { x <- unique(data$x) @@ -75,13 +96,11 @@ StatEcdf <- ggproto("StatEcdf", Stat, if (pad) { x <- c(-Inf, x, Inf) } - y <- ecdf(data$x)(x) - - new_data_frame(list(x = x, y = y), n = length(x)) - }, - - default_aes = aes(y = after_stat(y)), + data_ecdf <- ecdf(data$x)(x) - required_aes = c("x") + df_ecdf <- new_data_frame(list(x = x, ecdf = data_ecdf), n = length(x)) + df_ecdf$flipped_aes <- flipped_aes + flip_data(df_ecdf, flipped_aes) + } ) diff --git a/tests/testthat/test-stat-ecdf.R b/tests/testthat/test-stat-ecdf.R new file mode 100644 index 0000000000..4e56e3625b --- /dev/null +++ b/tests/testthat/test-stat-ecdf.R @@ -0,0 +1,16 @@ +context("stat_ecdf") + +test_that("stat_ecdf works in both directions", { + p <- ggplot(mpg, aes(hwy)) + stat_ecdf() + x <- layer_data(p) + expect_false(x$flipped_aes[1]) + + p <- ggplot(mpg, aes(y = hwy)) + stat_ecdf() + y <- layer_data(p) + expect_true(y$flipped_aes[1]) + + x$flipped_aes <- NULL + y$flipped_aes <- NULL + expect_identical(x, flip_data(y, TRUE)[,names(x)]) +}) + From 12fb205a54e2a6c2ae8d00cdccb97f257ce68fc2 Mon Sep 17 00:00:00 2001 From: Jeremias Blendin <303581+jgjl@users.noreply.github.com> Date: Sun, 2 Aug 2020 21:18:52 -0700 Subject: [PATCH 2/6] Undo breaking change of renaming the stats result and go back to y. --- R/stat-ecdf.r | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/stat-ecdf.r b/R/stat-ecdf.r index 72268ad565..1680bff907 100644 --- a/R/stat-ecdf.r +++ b/R/stat-ecdf.r @@ -22,7 +22,7 @@ #' and (Inf, 1) #' @section Computed variables: #' \describe{ -#' \item{ecdf}{cumulative density corresponding to the input variable} +#' \item{y}{cumulative density corresponding x} #' } #' @export #' @examples @@ -70,7 +70,7 @@ stat_ecdf <- function(mapping = NULL, data = NULL, StatEcdf <- ggproto("StatEcdf", Stat, required_aes = c("x|y"), - default_aes = aes(x = after_stat(ecdf), y = after_stat(ecdf)), + default_aes = aes(y = after_stat(y)), setup_params = function(data, params) { params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE, main_is_continuous = TRUE) @@ -98,7 +98,7 @@ StatEcdf <- ggproto("StatEcdf", Stat, } data_ecdf <- ecdf(data$x)(x) - df_ecdf <- new_data_frame(list(x = x, ecdf = data_ecdf), n = length(x)) + df_ecdf <- new_data_frame(list(x = x, y = data_ecdf), n = length(x)) df_ecdf$flipped_aes <- flipped_aes flip_data(df_ecdf, flipped_aes) } From 01281430101485ecc87dcd4afc1de0a48e22d0dc Mon Sep 17 00:00:00 2001 From: Jeremias Blendin <303581+jgjl@users.noreply.github.com> Date: Mon, 3 Aug 2020 21:16:33 -0700 Subject: [PATCH 3/6] Add bullet to NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index b54f6188ee..8232287049 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # ggplot2 (development version) +* Extend `stat_ecdf()` to calculate the cdf from either x or y instead from y only (@jgjl, #4005). + * Fixed a bug in `geom_sf()` that caused problems with legend-type autodetection (@clauswilke, #3963). From a3760560f69c835d617b6cc2ec50ba243ce70386 Mon Sep 17 00:00:00 2001 From: Jeremias Blendin <303581+jgjl@users.noreply.github.com> Date: Mon, 3 Aug 2020 21:19:49 -0700 Subject: [PATCH 4/6] Fix typo in comments. --- R/stat-ecdf.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/stat-ecdf.r b/R/stat-ecdf.r index 1680bff907..d9224c0e9f 100644 --- a/R/stat-ecdf.r +++ b/R/stat-ecdf.r @@ -9,7 +9,7 @@ #' #' The statistic relies on the aesthetics assignment to guess which variable as #' input and as output. Either x or y must be provided and one of them must be -#' unused. The ECDF will be caculated on the given aesthetic and will be output +#' unused. The ECDF will be calculated on the given aesthetic and will be output #' on the unused one. #' #' @inheritParams layer From 016870bf00871574f9fed16afdb3965f4f2d536d Mon Sep 17 00:00:00 2001 From: Jeremias Blendin <303581+jgjl@users.noreply.github.com> Date: Mon, 3 Aug 2020 21:26:11 -0700 Subject: [PATCH 5/6] Improve comment. --- R/stat-ecdf.r | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/stat-ecdf.r b/R/stat-ecdf.r index d9224c0e9f..4ff24b391c 100644 --- a/R/stat-ecdf.r +++ b/R/stat-ecdf.r @@ -7,10 +7,10 @@ #' The downside is that it requires more training to accurately interpret, #' and the underlying visual tasks are somewhat more challenging. #' -#' The statistic relies on the aesthetics assignment to guess which variable as -#' input and as output. Either x or y must be provided and one of them must be -#' unused. The ECDF will be calculated on the given aesthetic and will be output -#' on the unused one. +#' The statistic relies on the aesthetics assignment to guess which variable to +#' use as the input and which to use as the output. Either x or y must be provided +#' and one of them must be unused. The ECDF will be calculated on the given aesthetic +#' and will be output on the unused one. #' #' @inheritParams layer #' @inheritParams geom_point From 1de83516fbd17eb1a297bf3baf893122b7b3233e Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 4 Aug 2020 13:58:13 +0000 Subject: [PATCH 6/6] Document --- man/stat_ecdf.Rd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/man/stat_ecdf.Rd b/man/stat_ecdf.Rd index 12afbfb07e..58423c3d41 100644 --- a/man/stat_ecdf.Rd +++ b/man/stat_ecdf.Rd @@ -76,10 +76,15 @@ tuning parameters and handles both continuous and categorical variables. The downside is that it requires more training to accurately interpret, and the underlying visual tasks are somewhat more challenging. } +\details{ +The statistic relies on the aesthetics assignment to guess which variable to +use as the input and which to use as the output. Either x or y must be provided +and one of them must be unused. The ECDF will be calculated on the given aesthetic +and will be output on the unused one. +} \section{Computed variables}{ \describe{ -\item{x}{x in data} \item{y}{cumulative density corresponding x} } }