From d2fb415c1caf38a7d5985ace451aac4c8c8f31fc Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 12 Feb 2025 10:30:46 +0100 Subject: [PATCH 1/7] draft `stat_connect()` --- R/stat-connect.R | 121 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 R/stat-connect.R diff --git a/R/stat-connect.R b/R/stat-connect.R new file mode 100644 index 0000000000..07e7a5555f --- /dev/null +++ b/R/stat-connect.R @@ -0,0 +1,121 @@ +stat_connect <- function( + mapping = NULL, + data = NULL, + geom = "path", + position = "identity", + ..., + connection = "hv", + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE) { + layer( + data = data, + mapping = mapping, + stat = StatConnect, + geom = geom, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list2( + na.rm = na.rm, + connection = connection, + ... + ) + ) +} + +StatConnect <- ggproto( + "StatConnect", Stat, + + required_aes = c("x", "y"), + + setup_params = function(data, params) { + params$flipped_aes <- has_flipped_aes(data, ambiguous = TRUE) + + connection <- params$connection %||% "hv" + + if (is.character(connection)) { + check_string(connection) + connection <- switch( + arg_match0(connection, c("hv", "vh", "mid", "linear")), + hv = matrix(c(1, 1, 0, 1), 2, 2), + vh = matrix(c(0, 0, 0, 1), 2, 2), + mid = matrix(c(0.5, 0.5, 0, 1), 2, 2), + linear = matrix(c(0, 1, 0, 1), 2, 2) + ) + } + + if (!is.matrix(connection) || + !typeof(connection) %in% c("integer", "double") || + !identical(dim(connection)[2], 2L)) { + extra <- "" + if (!is.null(dim(connection)[2])) { + extra <- paste0(" with ", dim(connection)[2], " column(s)") + } + cli::cli_abort( + "{.arg connection} must be a numeric {.cls matrix} with 2 columns, \\ + not {.obj_type_friendly {connection}}{extra}." + ) + } + + if (any(!is.finite(connection))) { + cli::cli_abort( + "{.arg connection} cannot contain missing or other non-finite values." + ) + } + + if (nrow(connection) < 1) { + connection <- NULL + } + + params$connection <- connection + params + }, + + compute_group = function(data, scales, connection = "hv", flipped_aes = FALSE) { + + data <- flip_data(data, flipped_aes) + + n <- nrow(data) + if (n <= 1) { + return(vec_slice(data, 0)) + } + + if (!is.matrix(connection)) { + return(data) + } + m <- nrow(connection) + + before <- rep(seq_len(n - 1), each = m) + after <- rep(seq_len(n)[-1], each = m) + + data <- vec_slice(data, order(data$x %||% data$xmin)) + + # Interpolate x + # Note that `length(x) != length(xjust)`, but these are kept in sync due to + # the matrix recycling rules (effectively `rep(xjust, ncol(x))`) + x <- as.matrix(data[intersect(names(data), ggplot_global$x_aes)]) + xjust <- rep(connection[, 1], n - 1L) + x <- vec_slice(x, before) * (1 - xjust) + vec_slice(x, after) * xjust + + # Interpolate y + y <- as.matrix(data[intersect(names(data), ggplot_global$y_aes)]) + yjust <- rep(connection[, 2], n - 1L) + y <- vec_slice(y, before) * (1 - yjust) + vec_slice(y, after) * yjust + + # Reconstitute data + new_data <- vec_slice(data, before) + new_data[colnames(x)] <- split_matrix(x) + new_data[colnames(y)] <- split_matrix(y) + + # Esnure data starts and ends are intact + if (!all(connection[1, ] == c(0, 0))) { + new_data <- vec_c(vec_slice(data, 1), new_data) + } + if (!all(connection[m, ] == c(1, 1))) { + new_data <- vec_c(new_data, vec_slice(data, n)) + } + flip_data(new_data, flipped_aes) + } + +) From c48afd078c7745002c844fe4a85f6f0d2f316c42 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 12 Feb 2025 11:15:51 +0100 Subject: [PATCH 2/7] document --- DESCRIPTION | 1 + NAMESPACE | 2 + R/stat-connect.R | 38 ++++++++++ man/ggplot2-ggproto.Rd | 11 +-- man/stat_connect.Rd | 153 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 200 insertions(+), 5 deletions(-) create mode 100644 man/stat_connect.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 753e7dd49a..2ca50dc4cb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -247,6 +247,7 @@ Collate: 'stat-bindot.R' 'stat-binhex.R' 'stat-boxplot.R' + 'stat-connect.R' 'stat-contour.R' 'stat-count.R' 'stat-density-2d.R' diff --git a/NAMESPACE b/NAMESPACE index b58765ecc1..69b249f3c0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -257,6 +257,7 @@ export(StatBin2d) export(StatBindot) export(StatBinhex) export(StatBoxplot) +export(StatConnect) export(StatContour) export(StatContourFilled) export(StatCount) @@ -684,6 +685,7 @@ export(stat_bin_2d) export(stat_bin_hex) export(stat_binhex) export(stat_boxplot) +export(stat_connect) export(stat_contour) export(stat_contour_filled) export(stat_count) diff --git a/R/stat-connect.R b/R/stat-connect.R index 07e7a5555f..aef0173be1 100644 --- a/R/stat-connect.R +++ b/R/stat-connect.R @@ -1,3 +1,37 @@ +#' Connect observations +#' +#' Connect successive points with lines of different shapes. +#' +#' @inheritParams layer +#' @inheritParams geom_point +#' @param connection A specification of how two points are connected. Can be one +#' of the folloing: +#' * A string giving a named connection. These options are: +#' * `"hv"` to first jump horizontally, then vertically. +#' * `"vh"` to first jump vertically, then horizontally. +#' * `"mid"` to step half-way between adjacent x-values. +#' * `"linear"` to use a straight segment. +#' * A numeric matrix with two columns giving x and y coordinates respectively. +#' The coordinates should describe points on a path that connect point A +#' at location (0, 0) and point B at location (1, 1). At least one of these +#' two points is expected to be included in the coordinates. +#' +#' @eval rd_aesthetics("stat", "connect") +#' @export +#' +#' @examples +#' ggplot(head(economics, 20), aes(date, unemploy)) + +#' stat_connect(connection = "hv") +#' +#' # Setup custom connections +#' x <- seq(0, 1, length.out = 20)[-1] +#' smooth <- cbind(x, scales::rescale(1 / (1 + exp(-(x * 10 - 5))))) +#' zigzag <- cbind(c(0.4, 0.6, 1), c(0.75, 0.25, 1)) +#' +#' ggplot(head(economics, 10), aes(date, unemploy)) + +#' geom_point() + +#' stat_connect(aes(colour = "zigzag"), connection = zigzag) + +#' stat_connect(aes(colour = "smooth"), connection = smooth) stat_connect <- function( mapping = NULL, data = NULL, @@ -24,6 +58,10 @@ stat_connect <- function( ) } +#' @rdname ggplot2-ggproto +#' @format NULL +#' @usage NULL +#' @export StatConnect <- ggproto( "StatConnect", Stat, diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd index 6658fdafb9..d4dfa8ab1f 100644 --- a/man/ggplot2-ggproto.Rd +++ b/man/ggplot2-ggproto.Rd @@ -21,11 +21,11 @@ % R/position-stack.R, R/scale-.R, R/scale-binned.R, R/scale-continuous.R, % R/scale-date.R, R/scale-discrete-.R, R/scale-identity.R, R/stat-align.R, % R/stat-bin.R, R/stat-summary-2d.R, R/stat-bin2d.R, R/stat-bindot.R, -% R/stat-binhex.R, R/stat-boxplot.R, R/stat-contour.R, R/stat-count.R, -% R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, R/stat-ellipse.R, -% R/stat-function.R, R/stat-identity.R, R/stat-manual.R, R/stat-qq-line.R, -% R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, R/stat-sum.R, -% R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, +% R/stat-binhex.R, R/stat-boxplot.R, R/stat-connect.R, R/stat-contour.R, +% R/stat-count.R, R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, +% R/stat-ellipse.R, R/stat-function.R, R/stat-identity.R, R/stat-manual.R, +% R/stat-qq-line.R, R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, +% R/stat-sum.R, R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, % R/stat-unique.R, R/stat-ydensity.R \docType{data} \name{ggplot2-ggproto} @@ -131,6 +131,7 @@ \alias{StatBindot} \alias{StatBinhex} \alias{StatBoxplot} +\alias{StatConnect} \alias{StatContour} \alias{StatContourFilled} \alias{StatCount} diff --git a/man/stat_connect.Rd b/man/stat_connect.Rd new file mode 100644 index 0000000000..aa78b2e371 --- /dev/null +++ b/man/stat_connect.Rd @@ -0,0 +1,153 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stat-connect.R +\name{stat_connect} +\alias{stat_connect} +\title{Connect observations} +\usage{ +stat_connect( + mapping = NULL, + data = NULL, + geom = "path", + position = "identity", + ..., + connection = "hv", + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE +) +} +\arguments{ +\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default mapping +at the top level of the plot. You must supply \code{mapping} if there is no plot +mapping.} + +\item{data}{The data to be displayed in this layer. There are three +options: + +If \code{NULL}, the default, the data is inherited from the plot +data as specified in the call to \code{\link[=ggplot]{ggplot()}}. + +A \code{data.frame}, or other object, will override the plot +data. All objects will be fortified to produce a data frame. See +\code{\link[=fortify]{fortify()}} for which variables will be created. + +A \code{function} will be called with a single argument, +the plot data. The return value must be a \code{data.frame}, and +will be used as the layer data. A \code{function} can be created +from a \code{formula} (e.g. \code{~ head(.x, 10)}).} + +\item{geom}{The geometric object to use to display the data for this layer. +When using a \verb{stat_*()} function to construct a layer, the \code{geom} argument +can be used to override the default coupling between stats and geoms. The +\code{geom} argument accepts the following: +\itemize{ +\item A \code{Geom} ggproto subclass, for example \code{GeomPoint}. +\item A string naming the geom. To give the geom as a string, strip the +function name of the \code{geom_} prefix. For example, to use \code{geom_point()}, +give the geom as \code{"point"}. +\item For more information and other ways to specify the geom, see the +\link[=layer_geoms]{layer geom} documentation. +}} + +\item{position}{A position adjustment to use on the data for this layer. This +can be used in various ways, including to prevent overplotting and +improving the display. The \code{position} argument accepts the following: +\itemize{ +\item The result of calling a position function, such as \code{position_jitter()}. +This method allows for passing extra arguments to the position. +\item A string naming the position adjustment. To give the position as a +string, strip the function name of the \code{position_} prefix. For example, +to use \code{position_jitter()}, give the position as \code{"jitter"}. +\item For more information and other ways to specify the position, see the +\link[=layer_positions]{layer position} documentation. +}} + +\item{...}{Other arguments passed on to \code{\link[=layer]{layer()}}'s \code{params} argument. These +arguments broadly fall into one of 4 categories below. Notably, further +arguments to the \code{position} argument, or aesthetics that are required +can \emph{not} be passed through \code{...}. Unknown arguments that are not part +of the 4 categories below are ignored. +\itemize{ +\item Static aesthetics that are not mapped to a scale, but are at a fixed +value and apply to the layer as a whole. For example, \code{colour = "red"} +or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} +section that lists the available options. The 'required' aesthetics +cannot be passed on to the \code{params}. Please note that while passing +unmapped aesthetics as vectors is technically possible, the order and +required length is not guaranteed to be parallel to the input data. +\item When constructing a layer using +a \verb{stat_*()} function, the \code{...} argument can be used to pass on +parameters to the \code{geom} part of the layer. An example of this is +\code{stat_density(geom = "area", outline.type = "both")}. The geom's +documentation lists which parameters it can accept. +\item Inversely, when constructing a layer using a +\verb{geom_*()} function, the \code{...} argument can be used to pass on parameters +to the \code{stat} part of the layer. An example of this is +\code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation +lists which parameters it can accept. +\item The \code{key_glyph} argument of \code{\link[=layer]{layer()}} may also be passed on through +\code{...}. This can be one of the functions described as +\link[=draw_key]{key glyphs}, to change the display of the layer in the legend. +}} + +\item{connection}{A specification of how two points are connected. Can be one +of the folloing: +\itemize{ +\item A string giving a named connection. These options are: +\itemize{ +\item \code{"hv"} to first jump horizontally, then vertically. +\item \code{"vh"} to first jump vertically, then horizontally. +\item \code{"mid"} to step half-way between adjacent x-values. +\item \code{"linear"} to use a straight segment. +} +\item A numeric matrix with two columns giving x and y coordinates respectively. +The coordinates should describe points on a path that connect point A +at location (0, 0) and point B at location (1, 1). At least one of these +two points is expected to be included in the coordinates. +}} + +\item{na.rm}{If \code{FALSE}, the default, missing values are removed with +a warning. If \code{TRUE}, missing values are silently removed.} + +\item{show.legend}{logical. Should this layer be included in the legends? +\code{NA}, the default, includes if any aesthetics are mapped. +\code{FALSE} never includes, and \code{TRUE} always includes. +It can also be a named logical vector to finely select the aesthetics to +display. To include legend keys for all levels, even +when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, +but unobserved levels are omitted.} + +\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, +rather than combining with them. This is most useful for helper functions +that define both data and aesthetics and shouldn't inherit behaviour from +the default plot specification, e.g. \code{\link[=borders]{borders()}}.} +} +\description{ +Connect successive points with lines of different shapes. +} +\section{Aesthetics}{ + +\code{stat_connect()} understands the following aesthetics (required aesthetics are in bold): +\itemize{ +\item \strong{\code{\link[=aes_position]{x}}} +\item \strong{\code{\link[=aes_position]{y}}} +\item \code{\link[=aes_group_order]{group}} +} +Learn more about setting these aesthetics in \code{vignette("ggplot2-specs")}. +} + +\examples{ +ggplot(head(economics, 20), aes(date, unemploy)) + + stat_connect(connection = "hv") + +# Setup custom connections +x <- seq(0, 1, length.out = 20)[-1] +smooth <- cbind(x, scales::rescale(1 / (1 + exp(-(x * 10 - 5))))) +zigzag <- cbind(c(0.4, 0.6, 1), c(0.75, 0.25, 1)) + +ggplot(head(economics, 10), aes(date, unemploy)) + + geom_point() + + stat_connect(aes(colour = "zigzag"), connection = zigzag) + + stat_connect(aes(colour = "smooth"), connection = smooth) +} From 10205b0b5d296f541822b754f2f41c8a70e9dbde Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 12 Feb 2025 12:10:06 +0100 Subject: [PATCH 3/7] broaden required aes --- R/stat-connect.R | 2 +- man/stat_connect.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/stat-connect.R b/R/stat-connect.R index aef0173be1..151d653bdf 100644 --- a/R/stat-connect.R +++ b/R/stat-connect.R @@ -65,7 +65,7 @@ stat_connect <- function( StatConnect <- ggproto( "StatConnect", Stat, - required_aes = c("x", "y"), + required_aes = c("x|xmin|xmax", "y|ymin|ymax"), setup_params = function(data, params) { params$flipped_aes <- has_flipped_aes(data, ambiguous = TRUE) diff --git a/man/stat_connect.Rd b/man/stat_connect.Rd index aa78b2e371..d62ba610a5 100644 --- a/man/stat_connect.Rd +++ b/man/stat_connect.Rd @@ -130,8 +130,8 @@ Connect successive points with lines of different shapes. \code{stat_connect()} understands the following aesthetics (required aesthetics are in bold): \itemize{ -\item \strong{\code{\link[=aes_position]{x}}} -\item \strong{\code{\link[=aes_position]{y}}} +\item \strong{\code{\link[=aes_position]{x}} \emph{or} \code{\link[=aes_position]{xmin}} \emph{or} \code{\link[=aes_position]{xmax}}} +\item \strong{\code{\link[=aes_position]{y}} \emph{or} \code{\link[=aes_position]{ymin}} \emph{or} \code{\link[=aes_position]{ymax}}} \item \code{\link[=aes_group_order]{group}} } Learn more about setting these aesthetics in \code{vignette("ggplot2-specs")}. From 3902f4927f96f941ab2ef748b1f0efebc0ccf255 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 12 Feb 2025 12:55:37 +0100 Subject: [PATCH 4/7] try to detect orientation better --- R/stat-connect.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/stat-connect.R b/R/stat-connect.R index 151d653bdf..48a193fdf1 100644 --- a/R/stat-connect.R +++ b/R/stat-connect.R @@ -68,7 +68,10 @@ StatConnect <- ggproto( required_aes = c("x|xmin|xmax", "y|ymin|ymax"), setup_params = function(data, params) { - params$flipped_aes <- has_flipped_aes(data, ambiguous = TRUE) + params$flipped_aes <- has_flipped_aes( + data, params, + range_is_orthogonal = TRUE, ambiguous = TRUE + ) connection <- params$connection %||% "hv" From a58f145e4c9bd5f4cb99f17c3ae503e884f3621d Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 12 Feb 2025 12:56:22 +0100 Subject: [PATCH 5/7] add tests --- tests/testthat/_snaps/stat-connect.md | 24 ++++++++ tests/testthat/test-stat-connect.R | 85 +++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 tests/testthat/_snaps/stat-connect.md create mode 100644 tests/testthat/test-stat-connect.R diff --git a/tests/testthat/_snaps/stat-connect.md b/tests/testthat/_snaps/stat-connect.md new file mode 100644 index 0000000000..98c4fd7df5 --- /dev/null +++ b/tests/testthat/_snaps/stat-connect.md @@ -0,0 +1,24 @@ +# stat_connect rejects invalid connections + + Code + test_setup(connection = "foobar") + Condition + Error in `setup_params()`: + ! `connection` must be one of "hv", "vh", "mid", or "linear", not "foobar". + +--- + + Code + test_setup(connection = matrix(1:3, ncol = 1)) + Condition + Error in `setup_params()`: + ! `connection` must be a numeric with 2 columns, not an integer matrix with 1 column(s). + +--- + + Code + test_setup(connection = matrix(c(1:3, NA), ncol = 2)) + Condition + Error in `setup_params()`: + ! `connection` cannot contain missing or other non-finite values. + diff --git a/tests/testthat/test-stat-connect.R b/tests/testthat/test-stat-connect.R new file mode 100644 index 0000000000..16c3ed44fd --- /dev/null +++ b/tests/testthat/test-stat-connect.R @@ -0,0 +1,85 @@ +test_that("stat_connect closes off ends", { + + data <- data.frame(x = 1:3, y = c(1, 2, 0)) + + ld <- get_layer_data( + ggplot(data, aes(x, y)) + + stat_connect(connection = "mid") + ) + + i <- c(1L, nrow(ld)) + j <- c(1L, nrow(data)) + expect_equal(ld$x[i], data$x[j]) + expect_equal(ld$y[i], data$y[j]) + +}) + +test_that("stat_connect works with 1-row connections", { + data <- data.frame(x = 1:3, y = c(1, 2, 0)) + + ld <- get_layer_data( + ggplot(data, aes(x, y)) + + stat_connect(connection = cbind(0.5, 0.5)) + ) + + expect_equal(ld$x, c(1, 1.5, 2.5, 3)) + expect_equal(ld$y, c(1, 1.5, 1.0, 0)) +}) + +test_that("stat_connect works with ribbons in both orientations", { + + data <- data.frame(x = 1:4, ymin = c(1, 2, 0, 1), ymax = c(3, 4, 3, 4)) + expected <- data.frame( + x = c(1, 2, 2, 3, 3, 4, 4), + ymin = c(1, 1, 2, 2, 0, 0, 1), + ymax = c(3, 3, 4, 4, 3, 3, 4) + ) + + ld <- layer_data( + ggplot(data, aes(x, ymin = ymin, ymax = ymax)) + + geom_ribbon(stat = "connect", connection = "hv") + ) + + expect_equal(ld[c("x", "ymin", "ymax")], expected) + + ld <- layer_data( + ggplot(data, aes(y = x, xmin = ymin, xmax = ymax)) + + geom_ribbon(stat = "connect", connection = "hv", orientation = "y") + ) + + expect_equal(ld[c("y", "xmin", "xmax")], flip_data(expected, TRUE)) +}) + +test_that("stat_connect rejects invalid connections", { + + test_setup <- function(...) { + StatConnect$setup_params(NULL, list(...)) + } + + # Accept keyword parameter + p <- test_setup(connection = "linear") + expect_vector(p$connection, size = 2L, ptype = matrix(NA_real_, 0, 2)) + + # Accept xy coord matrix + p <- test_setup(connection = cbind(c(0, 1), c(0, 1))) + expect_vector(p$connection, size = 2L, ptype = matrix(NA_real_, 0, 2)) + + + p <- test_setup(connection = matrix(NA_real_, 0, 2)) + expect_null(p$connection) + + expect_snapshot( + test_setup(connection = "foobar"), + error = TRUE + ) + + expect_snapshot( + test_setup(connection = matrix(1:3, ncol = 1)), + error = TRUE + ) + + expect_snapshot( + test_setup(connection = matrix(c(1:3, NA), ncol = 2)), + error = TRUE + ) +}) From 2656a4731277d3efe5d1a98a509bc7ce5ab9278b Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 12 Feb 2025 12:58:39 +0100 Subject: [PATCH 6/7] add news bullet --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 74b049ace5..e978b19b55 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # ggplot2 (development version) +* New `stat_connect()` to connect points via steps or other shapes + (@teunbrand, #6228) * New parameters for `geom_label()` (@teunbrand and @steveharoz, #5365): * The `linewidth` aesthetic is now applied and replaces the `label.size` argument. From e5989c201c82bf996b517f13d8c8b28d31d6b550 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 12 Feb 2025 13:13:47 +0100 Subject: [PATCH 7/7] add to pkgdown --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 0259312234..5b0505afd8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -69,6 +69,7 @@ reference: - stat_unique - stat_sf_coordinates - stat_manual + - stat_connect - after_stat - subtitle: Position adjustment