From 71ec2d69029da4ecca9a7fa84f2e3d54249a2f11 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Fri, 13 Jan 2023 11:01:51 -0500 Subject: [PATCH] Error on unnamed values if `names_sep` isn't provided (#1462) * Error on unnamed elements if `names_sep` isn't provided And fix automatic name generation on partially missing names * NEWS bullets --- NEWS.md | 7 +++ R/unnest-wider.R | 52 +++++++++-------- man/unnest_wider.Rd | 9 +-- tests/testthat/_snaps/unnest-wider.md | 63 ++++++++++++--------- tests/testthat/test-unnest-wider.R | 81 +++++++++++++++++++++------ 5 files changed, 140 insertions(+), 72 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0a3846c0a..b3fdae3b7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # tidyr (development version) +* `unnest_wider()` now generates automatic names for _partially_ unnamed + vectors. Previously it only generated them for fully unnamed vectors, + resulting in a strange mix of automatic names and name-repaired names (#1367). + +* `unnest_wider()` now errors if any values being unnested are unnamed and + `names_sep` is not provided (#1367). + * `nest()` has gained a new argument, `.by`, which allows you to specify the columns to nest by (rather than the columns to nest, i.e. through `...`). Additionally, the `.key` argument is no longer deprecated, and is used diff --git a/R/unnest-wider.R b/R/unnest-wider.R index d5c9a1986..87c55a699 100644 --- a/R/unnest-wider.R +++ b/R/unnest-wider.R @@ -13,9 +13,10 @@ #' as is. If a string, the outer and inner names will be pasted together using #' `names_sep` as a separator. #' -#' If the values being unnested are unnamed and `names_sep` is supplied, the -#' inner names will be automatically generated as an increasing sequence of -#' integers. +#' If any values being unnested are unnamed, then `names_sep` must be +#' supplied, otherwise an error is thrown. When `names_sep` is supplied, +#' names are automatically generated for unnamed values as an increasing +#' sequence of integers. #' @param strict A single logical specifying whether or not to apply strict #' vctrs typing rules. If `FALSE`, typed empty values (like `list()` or #' `integer()`) nested within list-columns will be treated like `NULL` and @@ -58,7 +59,7 @@ #' x = 1:3, #' y = list(NULL, 1:3, 4:5) #' ) -#' # where you'll usually want to provide names_sep: +#' # but you must supply `names_sep` to do so, which generates automatic names: #' df %>% unnest_wider(y, names_sep = "_") #' #' # 0-length elements --------------------------------------------------------- @@ -205,6 +206,8 @@ elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) { # which we want to treat like lists where we know the type of each element x <- tidyr_new_list(x) x <- map(x, list_of) + names <- names2(x) + x <- set_names(x, NULL) } else { if (!strict && vec_is_list(x)) { empty <- list_sizes(x) == 0L @@ -214,34 +217,35 @@ elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) { } } - names <- vec_names(x) - - if (is.null(names)) { - x <- vec_chop(x) - } else { - # Promote names to column names - x <- vec_set_names(x, NULL) - x <- vec_chop(x) - x <- vec_set_names(x, names) - } + names <- vec_names2(x) + x <- vec_set_names(x, NULL) + x <- vec_chop(x) } + empty <- names == "" + any_empty <- any(empty) + if (is.null(names_sep)) { - names(x) <- vec_as_names(names2(x), repair = "unique", call = error_call) + if (any_empty) { + stop_use_names_sep(error_call = error_call) + } } else { - outer <- name - - inner <- names(x) - if (is.null(inner)) { - inner <- as.character(seq_along(x)) - } else { - inner <- vec_as_names(inner, repair = "unique", call = error_call) + if (any_empty) { + names[empty] <- as.character(which(empty)) } - - names(x) <- apply_names_sep(outer, inner, names_sep) + names <- apply_names_sep(name, names, names_sep) } + x <- set_names(x, names) x <- new_data_frame(x, n = 1L) x } + +stop_use_names_sep <- function(error_call = caller_env()) { + message <- c( + "Can't unnest elements with missing names.", + i = "Supply {.arg names_sep} to generate automatic names." + ) + cli::cli_abort(message, call = error_call) +} diff --git a/man/unnest_wider.Rd b/man/unnest_wider.Rd index e2cbf5d2a..6044960fa 100644 --- a/man/unnest_wider.Rd +++ b/man/unnest_wider.Rd @@ -27,9 +27,10 @@ to their common size.} as is. If a string, the outer and inner names will be pasted together using \code{names_sep} as a separator. -If the values being unnested are unnamed and \code{names_sep} is supplied, the -inner names will be automatically generated as an increasing sequence of -integers.} +If any values being unnested are unnamed, then \code{names_sep} must be +supplied, otherwise an error is thrown. When \code{names_sep} is supplied, +names are automatically generated for unnamed values as an increasing +sequence of integers.} \item{simplify}{If \code{TRUE}, will attempt to simplify lists of length-1 vectors to an atomic vector. Can also be a named list containing @@ -119,7 +120,7 @@ df <- tibble( x = 1:3, y = list(NULL, 1:3, 4:5) ) -# where you'll usually want to provide names_sep: +# but you must supply `names_sep` to do so, which generates automatic names: df \%>\% unnest_wider(y, names_sep = "_") # 0-length elements --------------------------------------------------------- diff --git a/tests/testthat/_snaps/unnest-wider.md b/tests/testthat/_snaps/unnest-wider.md index ce8f39325..4a31b4280 100644 --- a/tests/testthat/_snaps/unnest-wider.md +++ b/tests/testthat/_snaps/unnest-wider.md @@ -7,50 +7,61 @@ Error in `unnest_wider()`: ! List-column `y` must contain only vectors. -# can unnest a vector with a mix of named/unnamed elements (#1200 comment) +# can't unnest unnamed elements without `names_sep` (#1367) Code - out <- unnest_wider(df, x, names_sep = "_") - Message - New names: - * `` -> `...1` + unnest_wider(df, col) + Condition + Error in `unnest_wider()`: + ! Can't unnest elements with missing names. + i Supply `names_sep` to generate automatic names. -# unique name repair is done on the elements before applying `names_sep` (#1200 comment) +--- Code - out <- unnest_wider(df, col, names_sep = "_") - Message - New names: - * `` -> `...1` + unnest_wider(df, col) + Condition + Error in `unnest_wider()`: + ! Can't unnest elements with missing names. + i Supply `names_sep` to generate automatic names. --- Code - out <- unnest_wider(df, col, names_sep = "_") - Message - New names: - * `` -> `...1` - * `` -> `...2` + unnest_wider(df, col) + Condition + Error in `unnest_wider()`: + ! Can't unnest elements with missing names. + i Supply `names_sep` to generate automatic names. -# output structure is the same whether or not `names_sep` is applied (#1200 comment) +--- Code - out1 <- unnest_wider(df, col) - Message - New names: - * `` -> `...1` - New names: - * `` -> `...1` + unnest_wider(df, col) + Condition + Error in `unnest_wider()`: + ! Can't unnest elements with missing names. + i Supply `names_sep` to generate automatic names. + +# catches duplicate inner names in the same vector + + Code + unnest_wider(df, col) + Condition + Error in `unnest_wider()`: + ! Names must be unique. + x These names are duplicated: + * "a" at locations 1 and 2. + i Use argument `names_repair` to specify repair strategy. --- Code - out2 <- unnest_wider(df, col, names_sep = "_") + out <- unnest_wider(df, col, names_repair = "unique") Message New names: - * `` -> `...1` - New names: - * `` -> `...1` + * `a` -> `a...1` + * `a` -> `a...2` # unnest_wider() advises on outer / inner name duplication (#1367) diff --git a/tests/testthat/test-unnest-wider.R b/tests/testthat/test-unnest-wider.R index df164b63a..a924bf392 100644 --- a/tests/testthat/test-unnest-wider.R +++ b/tests/testthat/test-unnest-wider.R @@ -182,11 +182,11 @@ test_that("can unnest multiple columns wider at once (#740)", { ) }) -test_that("can unnest a vector with a mix of named/unnamed elements (#1200 comment)", { +test_that("can unnest a vector with a mix of named/unnamed elements (#1200 comment, #1367)", { df <- tibble(x = c(a = 1L, 2L)) - expect_snapshot(out <- unnest_wider(df, x, names_sep = "_")) + out <- unnest_wider(df, x, names_sep = "_") expect_identical(out$x_a, c(1L, NA)) - expect_identical(out$x_...1, c(NA, 2L)) + expect_identical(out$x_1, c(NA, 2L)) }) test_that("can unnest a list with a mix of named/unnamed elements (#1200 comment)", { @@ -196,17 +196,32 @@ test_that("can unnest a list with a mix of named/unnamed elements (#1200 comment expect_identical(out$x_2, c(2L, 4L)) }) -test_that("unique name repair is done on the elements before applying `names_sep` (#1200 comment)", { +test_that("integer names are generated before applying `names_sep` (#1200 comment, #1367)", { df <- tibble(col = list(set_names(1, ""))) - expect_snapshot(out <- unnest_wider(df, col, names_sep = "_")) - expect_named(out, "col_...1") + out <- unnest_wider(df, col, names_sep = "_") + expect_named(out, "col_1") df <- tibble(col = list(set_names(1:2, c("", "")))) - expect_snapshot(out <- unnest_wider(df, col, names_sep = "_")) - expect_named(out, c("col_...1", "col_...2")) + out <- unnest_wider(df, col, names_sep = "_") + expect_named(out, c("col_1", "col_2")) }) -test_that("output structure is the same whether or not `names_sep` is applied (#1200 comment)", { +test_that("integer names are generated for partially named vectors (#1367)", { + df <- tibble(col = list(set_names(1:4, c("x", "", "z", "")))) + out <- unnest_wider(df, col, names_sep = "_") + expect_named(out, c("col_x", "col_2", "col_z", "col_4")) + + df <- tibble(col = list( + set_names(1:4, c("x", "", "z", "")), + set_names(5:8, c("", "", "z", "")) + )) + out <- unnest_wider(df, col, names_sep = "_") + expect_named(out, c("col_x", "col_2", "col_z", "col_4", "col_1")) + expect_identical(out$col_x, c(1L, NA)) + expect_identical(out$col_1, c(NA, 5L)) +}) + +test_that("`NA_character_` name is treated like the empty string (#1200 comment)", { col <- list( set_names(1, "a"), set_names(1, NA_character_), @@ -214,16 +229,10 @@ test_that("output structure is the same whether or not `names_sep` is applied (# ) df <- tibble(col = col) - # Column structure between these two must be the same, - # we consider an `NA_character_` name as identical to `""`. - expect_snapshot(out1 <- unnest_wider(df, col)) - expect_snapshot(out2 <- unnest_wider(df, col, names_sep = "_")) - - expect_identical(out1$a, c(1, NA, NA)) - expect_identical(out1$...1, c(NA, 1, 1)) + out <- unnest_wider(df, col, names_sep = "_") - expect_identical(out2$col_a, c(1, NA, NA)) - expect_identical(out2$col_...1, c(NA, 1, 1)) + expect_identical(out$col_a, c(1, NA, NA)) + expect_identical(out$col_1, c(NA, 1, 1)) }) test_that("can combine ` + >`", { @@ -232,6 +241,42 @@ test_that("can combine ` + >`", { expect_identical(out$a, list(1:2, 1L)) }) +test_that("can't unnest unnamed elements without `names_sep` (#1367)", { + df <- tibble(col = list(1)) + expect_snapshot(error = TRUE, { + unnest_wider(df, col) + }) + + df <- tibble(col = list(set_names(1, ""))) + expect_snapshot(error = TRUE, { + unnest_wider(df, col) + }) + + df <- tibble(col = list(set_names(1, NA_character_))) + expect_snapshot(error = TRUE, { + unnest_wider(df, col) + }) + + # Partially missing within an element + df <- tibble(col = list(c(a = 1), c(a = 1, 2))) + expect_snapshot(error = TRUE, { + unnest_wider(df, col) + }) +}) + +test_that("catches duplicate inner names in the same vector", { + df <- tibble(col = list(c(a = 1, a = 2))) + + expect_snapshot(error = TRUE, { + unnest_wider(df, col) + }) + + expect_snapshot({ + out <- unnest_wider(df, col, names_repair = "unique") + }) + expect_named(out, c("a...1", "a...2")) +}) + test_that("unnest_wider() advises on outer / inner name duplication (#1367)", { df <- tibble(x = 1, y = list(list(x = 2)))