Skip to content

Commit

Permalink
Update coalesce() to use vec_case_when() (#6265)
Browse files Browse the repository at this point in the history
* Port tidyverse/funs#80 to `coalesce()`

* NEWS bullet

* Use `vec_case_when()` infrastructure in `coalesce()`

* Backtick `NULL`

* Tweak parameter documentation one more time
  • Loading branch information
DavisVaughan authored Jul 11, 2022
1 parent 5e27ef1 commit a6df2a0
Show file tree
Hide file tree
Showing 5 changed files with 231 additions and 84 deletions.
12 changes: 12 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# dplyr (development version)

* `coalesce()` now more fully embraces the principles of vctrs (#6265).

* `.ptype` and `.size` arguments have been added to allow you to explicitly
enforce an output type and size.

* `NULL` inputs are now discarded up front.

* `coalesce()` no longer iterates over the columns of data frame input.
Instead, a row is now only coalesced if it is entirely missing, which is
consistent with `vctrs::vec_equal_na()` and greatly simplifies the
implementation.

* `group_by()` now uses a new algorithm for computing groups. It is often faster
than the previous approach (especially when there are many groups), and in
most cases there should be no changes. The exception is with character vector
Expand Down
100 changes: 55 additions & 45 deletions R/coalesce.R
Original file line number Diff line number Diff line change
@@ -1,69 +1,79 @@
#' Find first non-missing element
#' Find the first non-missing element
#'
#' Given a set of vectors, `coalesce()` finds the first non-missing value
#' at each position. This is inspired by the SQL `COALESCE` function
#' which does the same thing for `NULL`s.
#' Given a set of vectors, `coalesce()` finds the first non-missing value at
#' each position. It's inspired by the SQL `COALESCE` function which does the
#' same thing for SQL `NULL`s.
#'
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]>
#'
#' One or more vectors. These will be
#' [recycled][vctrs::vector_recycling_rules] against each other, and will be
#' cast to their common type.
#'
#' @param .ptype An optional prototype declaring the desired output type. If
#' supplied, this overrides the common type of the vectors in `...`.
#'
#' @param .size An optional size declaring the desired output size. If supplied,
#' this overrides the common size of the vectors in `...`.
#'
#' @return A vector with the same type and size as the common type and common
#' size of the vectors in `...`.
#'
#' @seealso [na_if()] to replace specified values with an `NA`.
#' [tidyr::replace_na()] to replace `NA` with a value.
#'
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Vectors. Inputs should be
#' recyclable (either be length 1 or same length as the longest vector) and
#' coercible to a common type. If data frames, they are coalesced column by
#' column.
#' @return A vector the same length as the first `...` argument with
#' missing values replaced by the first non-missing value.
#' @seealso [na_if()] to replace specified values with a `NA`.
#' [tidyr::replace_na()] to replace `NA` with a value
#' @export
#' @examples
#' # Use a single value to replace all missing values
#' x <- sample(c(1:5, NA, NA, NA))
#' coalesce(x, 0L)
#'
#' # Or match together a complete vector from missing pieces
#' # The equivalent to a missing value in a list is `NULL`
#' coalesce(list(1, 2, NULL), list(NA))
#'
#' # Or generate a complete vector from partially missing pieces
#' y <- c(1, 2, NA, NA, 5)
#' z <- c(NA, NA, 3, 4, 5)
#' coalesce(y, z)
#'
#' # Supply lists by with dynamic dots
#' # Supply lists by splicing them into dots:
#' vecs <- list(
#' c(1, 2, NA, NA, 5),
#' c(NA, NA, 3, 4, 5)
#' )
#' coalesce(!!!vecs)
coalesce <- function(...) {
if (missing(..1)) {
abort("At least one argument must be supplied.")
}
coalesce <- function(..., .ptype = NULL, .size = NULL) {
args <- list2(...)

values <- list2(...)
values <- fix_call(vec_cast_common(!!!values))
values <- fix_call(vec_recycle_common(!!!values))
# Drop `NULL`s
# TODO: Use cheaper `vctrs::vec_any_missing()` approach
# https://github.com/r-lib/vctrs/issues/1563
args <- discard(args, is.null)

x <- values[[1]]
values <- values[-1]

if (is.array(x) && length(dim(x)) > 1) {
abort("Can't coalesce matrices.")
}
if (is.data.frame(x)) {
df_coalesce(x, values)
} else {
vec_coalesce(x, values)
if (length(args) == 0L) {
abort("`...` can't be empty.")
}
}

vec_coalesce <- function(x, values) {
for (i in seq_along(values)) {
x_miss <- is.na(x)
vec_slice(x, x_miss) <- vec_slice(values[[i]], x_miss)
}
# Recycle early so logical conditions computed below will be the same length,
# as required by `vec_case_when()`
args <- vec_recycle_common(!!!args, .size = .size)

x
}
df_coalesce <- function(x, values) {
for (i in seq_along(x)) {
col_values <- map(values, `[[`, i)
x[[i]] <- vec_coalesce(x[[i]], col_values)
}
# Name early to get correct indexing in `vec_case_when()` error messages
names <- names2(args)
names <- names_as_error_names(names)
args <- set_names(args, names)

conditions <- map(args, ~{
!vec_equal_na(.x)
})

x
vec_case_when(
conditions = conditions,
values = args,
conditions_arg = "",
values_arg = "",
ptype = .ptype,
size = .size,
call = current_env()
)
}
40 changes: 25 additions & 15 deletions man/coalesce.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 48 additions & 0 deletions tests/testthat/_snaps/coalesce.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,51 @@
Error in `coalesce()`:
! Can't combine `..1` <integer> and `..2` <character>.

# `.size` overrides the common size

Code
coalesce(x, 1:2, .size = vec_size(x))
Condition
Error in `coalesce()`:
! Can't recycle `..2` (size 2) to size 1.

# must have at least one non-`NULL` vector

Code
coalesce()
Condition
Error in `coalesce()`:
! `...` can't be empty.

---

Code
coalesce(NULL, NULL)
Condition
Error in `coalesce()`:
! `...` can't be empty.

# inputs must be vectors

Code
coalesce(1, environment())
Condition
Error in `coalesce()`:
! `..2` must be a vector, not an environment.

# names in error messages are indexed correctly

Code
coalesce(1, "x")
Condition
Error in `coalesce()`:
! Can't combine `..1` <double> and `..2` <character>.

---

Code
coalesce(1, y = "x")
Condition
Error in `coalesce()`:
! Can't combine `..1` <double> and `y` <character>.

115 changes: 91 additions & 24 deletions tests/testthat/test-coalesce.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ test_that("coerces to common type", {
expect_identical(coalesce(NA, f), f)
})

test_that("inputs are recycled to their common size", {
expect_identical(coalesce(1, c(2, 3)), c(1, 1))
})

test_that("finds non-missing values in multiple positions", {
x1 <- c(1L, NA, NA)
x2 <- c(NA, 2L, NA)
Expand All @@ -25,33 +29,96 @@ test_that("coalesce() gives meaningful error messages", {
})
})

test_that("coalesce() supports data frames (#5326)", {
out <- coalesce(
data.frame(x = c(NA, 1)),
data.frame(x = 1:2)
test_that("coalesce() supports one-dimensional arrays (#5557)", {
x <- array(1:10)
out <- coalesce(x, 0L)
expect_identical(out, x)
})

test_that("only updates entirely missing matrix rows", {
x <- c(
1, NA,
NA, NA
)
expect_identical(out, data.frame(x = c(1, 1)))

df1 <- data.frame(x = c(NA, 1, NA), y = c(2, NA, NA), z = c(1:2, NA))
df2 <- tibble::tibble(x = 1:3, y = c(3, 4, NA), z = c(NA, NA, NA))
df3 <- data.frame(x = NA, y = c(30, 40, 50), z = 101:103)
out <- coalesce(df1, df2, df3)
exp <- tibble(x = c(1, 1, 3), y = c(2, 4, 50), z = c(1L, 2L, 103L))
expect_identical(out, exp)

expect_error(
coalesce(
data.frame(x = c(NA, 1)),
data.frame(x = c("a", "b"))
),
class = "vctrs_error_incompatible_type"
x <- matrix(x, nrow = 2, byrow = TRUE)

y <- c(
2, 2,
NA, 1
)
y <- matrix(y, nrow = 2, byrow = TRUE)

expect_error(coalesce(as.matrix(mtcars), as.matrix(mtcars)), "matrices")
expect <- c(
1, NA,
NA, 1
)
expect <- matrix(expect, nrow = 2, byrow = TRUE)

expect_identical(coalesce(x, y), expect)
})

test_that("coalesce() supports one-dimensional arrays (#5557)", {
x <- array(1:10)
out <- coalesce(x, 0)
expect_equal(out, x)
test_that("only updates entirely missing data frame rows", {
x <- tibble(x = c(1, NA), y = c(NA, NA))
y <- tibble(x = c(2, NA), y = c(TRUE, TRUE))

expect <- tibble(x = c(1, NA), y = c(NA, TRUE))

expect_identical(coalesce(x, y), expect)
})

test_that("only updates entirely missing rcrd observations", {
x <- new_rcrd(list(x = c(1, NA), y = c(NA, NA)))
y <- new_rcrd(list(x = c(2, NA), y = c(TRUE, TRUE)))

expect <- new_rcrd(list(x = c(1, NA), y = c(NA, TRUE)))

expect_identical(coalesce(x, y), expect)
})

test_that("recycling is done on the values early", {
expect_identical(coalesce(1, 1:2), c(1, 1))
})

test_that("`.ptype` overrides the common type (r-lib/funs#64)", {
x <- c(1L, NA)
expect_identical(coalesce(x, 99, .ptype = x), c(1L, 99L))
})

test_that("`.size` overrides the common size", {
x <- 1L

expect_snapshot(error = TRUE, {
coalesce(x, 1:2, .size = vec_size(x))
})
})

test_that("must have at least one non-`NULL` vector", {
expect_snapshot(error = TRUE, {
coalesce()
})
expect_snapshot(error = TRUE, {
coalesce(NULL, NULL)
})
})

test_that("`NULL`s are discarded (r-lib/funs#80)", {
expect_identical(
coalesce(c(1, NA, NA), NULL, c(1, 2, NA), NULL, 3),
c(1, 2, 3)
)
})

test_that("inputs must be vectors", {
expect_snapshot(error = TRUE, {
coalesce(1, environment())
})
})

test_that("names in error messages are indexed correctly", {
expect_snapshot(error = TRUE, {
coalesce(1, "x")
})
expect_snapshot(error = TRUE, {
coalesce(1, y = "x")
})
})

0 comments on commit a6df2a0

Please sign in to comment.