Skip to content

Commit

Permalink
Error on unnamed values if names_sep isn't provided (#1462)
Browse files Browse the repository at this point in the history
* Error on unnamed elements if `names_sep` isn't provided

And fix automatic name generation on partially missing names

* NEWS bullets
  • Loading branch information
DavisVaughan authored Jan 13, 2023
1 parent c5b189e commit 71ec2d6
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 72 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# tidyr (development version)

* `unnest_wider()` now generates automatic names for _partially_ unnamed
vectors. Previously it only generated them for fully unnamed vectors,
resulting in a strange mix of automatic names and name-repaired names (#1367).

* `unnest_wider()` now errors if any values being unnested are unnamed and
`names_sep` is not provided (#1367).

* `nest()` has gained a new argument, `.by`, which allows you to specify the
columns to nest by (rather than the columns to nest, i.e. through `...`).
Additionally, the `.key` argument is no longer deprecated, and is used
Expand Down
52 changes: 28 additions & 24 deletions R/unnest-wider.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
#' as is. If a string, the outer and inner names will be pasted together using
#' `names_sep` as a separator.
#'
#' If the values being unnested are unnamed and `names_sep` is supplied, the
#' inner names will be automatically generated as an increasing sequence of
#' integers.
#' If any values being unnested are unnamed, then `names_sep` must be
#' supplied, otherwise an error is thrown. When `names_sep` is supplied,
#' names are automatically generated for unnamed values as an increasing
#' sequence of integers.
#' @param strict A single logical specifying whether or not to apply strict
#' vctrs typing rules. If `FALSE`, typed empty values (like `list()` or
#' `integer()`) nested within list-columns will be treated like `NULL` and
Expand Down Expand Up @@ -58,7 +59,7 @@
#' x = 1:3,
#' y = list(NULL, 1:3, 4:5)
#' )
#' # where you'll usually want to provide names_sep:
#' # but you must supply `names_sep` to do so, which generates automatic names:
#' df %>% unnest_wider(y, names_sep = "_")
#'
#' # 0-length elements ---------------------------------------------------------
Expand Down Expand Up @@ -205,6 +206,8 @@ elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) {
# which we want to treat like lists where we know the type of each element
x <- tidyr_new_list(x)
x <- map(x, list_of)
names <- names2(x)
x <- set_names(x, NULL)
} else {
if (!strict && vec_is_list(x)) {
empty <- list_sizes(x) == 0L
Expand All @@ -214,34 +217,35 @@ elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) {
}
}

names <- vec_names(x)

if (is.null(names)) {
x <- vec_chop(x)
} else {
# Promote names to column names
x <- vec_set_names(x, NULL)
x <- vec_chop(x)
x <- vec_set_names(x, names)
}
names <- vec_names2(x)
x <- vec_set_names(x, NULL)
x <- vec_chop(x)
}

empty <- names == ""
any_empty <- any(empty)

if (is.null(names_sep)) {
names(x) <- vec_as_names(names2(x), repair = "unique", call = error_call)
if (any_empty) {
stop_use_names_sep(error_call = error_call)
}
} else {
outer <- name

inner <- names(x)
if (is.null(inner)) {
inner <- as.character(seq_along(x))
} else {
inner <- vec_as_names(inner, repair = "unique", call = error_call)
if (any_empty) {
names[empty] <- as.character(which(empty))
}

names(x) <- apply_names_sep(outer, inner, names_sep)
names <- apply_names_sep(name, names, names_sep)
}

x <- set_names(x, names)
x <- new_data_frame(x, n = 1L)

x
}

stop_use_names_sep <- function(error_call = caller_env()) {
message <- c(
"Can't unnest elements with missing names.",
i = "Supply {.arg names_sep} to generate automatic names."
)
cli::cli_abort(message, call = error_call)
}
9 changes: 5 additions & 4 deletions man/unnest_wider.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 37 additions & 26 deletions tests/testthat/_snaps/unnest-wider.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,50 +7,61 @@
Error in `unnest_wider()`:
! List-column `y` must contain only vectors.

# can unnest a vector with a mix of named/unnamed elements (#1200 comment)
# can't unnest unnamed elements without `names_sep` (#1367)

Code
out <- unnest_wider(df, x, names_sep = "_")
Message
New names:
* `` -> `...1`
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Can't unnest elements with missing names.
i Supply `names_sep` to generate automatic names.

# unique name repair is done on the elements before applying `names_sep` (#1200 comment)
---

Code
out <- unnest_wider(df, col, names_sep = "_")
Message
New names:
* `` -> `...1`
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Can't unnest elements with missing names.
i Supply `names_sep` to generate automatic names.

---

Code
out <- unnest_wider(df, col, names_sep = "_")
Message
New names:
* `` -> `...1`
* `` -> `...2`
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Can't unnest elements with missing names.
i Supply `names_sep` to generate automatic names.

# output structure is the same whether or not `names_sep` is applied (#1200 comment)
---

Code
out1 <- unnest_wider(df, col)
Message
New names:
* `` -> `...1`
New names:
* `` -> `...1`
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Can't unnest elements with missing names.
i Supply `names_sep` to generate automatic names.

# catches duplicate inner names in the same vector

Code
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Names must be unique.
x These names are duplicated:
* "a" at locations 1 and 2.
i Use argument `names_repair` to specify repair strategy.

---

Code
out2 <- unnest_wider(df, col, names_sep = "_")
out <- unnest_wider(df, col, names_repair = "unique")
Message
New names:
* `` -> `...1`
New names:
* `` -> `...1`
* `a` -> `a...1`
* `a` -> `a...2`

# unnest_wider() advises on outer / inner name duplication (#1367)

Expand Down
81 changes: 63 additions & 18 deletions tests/testthat/test-unnest-wider.R
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,11 @@ test_that("can unnest multiple columns wider at once (#740)", {
)
})

test_that("can unnest a vector with a mix of named/unnamed elements (#1200 comment)", {
test_that("can unnest a vector with a mix of named/unnamed elements (#1200 comment, #1367)", {
df <- tibble(x = c(a = 1L, 2L))
expect_snapshot(out <- unnest_wider(df, x, names_sep = "_"))
out <- unnest_wider(df, x, names_sep = "_")
expect_identical(out$x_a, c(1L, NA))
expect_identical(out$x_...1, c(NA, 2L))
expect_identical(out$x_1, c(NA, 2L))
})

test_that("can unnest a list with a mix of named/unnamed elements (#1200 comment)", {
Expand All @@ -196,34 +196,43 @@ test_that("can unnest a list with a mix of named/unnamed elements (#1200 comment
expect_identical(out$x_2, c(2L, 4L))
})

test_that("unique name repair is done on the elements before applying `names_sep` (#1200 comment)", {
test_that("integer names are generated before applying `names_sep` (#1200 comment, #1367)", {
df <- tibble(col = list(set_names(1, "")))
expect_snapshot(out <- unnest_wider(df, col, names_sep = "_"))
expect_named(out, "col_...1")
out <- unnest_wider(df, col, names_sep = "_")
expect_named(out, "col_1")

df <- tibble(col = list(set_names(1:2, c("", ""))))
expect_snapshot(out <- unnest_wider(df, col, names_sep = "_"))
expect_named(out, c("col_...1", "col_...2"))
out <- unnest_wider(df, col, names_sep = "_")
expect_named(out, c("col_1", "col_2"))
})

test_that("output structure is the same whether or not `names_sep` is applied (#1200 comment)", {
test_that("integer names are generated for partially named vectors (#1367)", {
df <- tibble(col = list(set_names(1:4, c("x", "", "z", ""))))
out <- unnest_wider(df, col, names_sep = "_")
expect_named(out, c("col_x", "col_2", "col_z", "col_4"))

df <- tibble(col = list(
set_names(1:4, c("x", "", "z", "")),
set_names(5:8, c("", "", "z", ""))
))
out <- unnest_wider(df, col, names_sep = "_")
expect_named(out, c("col_x", "col_2", "col_z", "col_4", "col_1"))
expect_identical(out$col_x, c(1L, NA))
expect_identical(out$col_1, c(NA, 5L))
})

test_that("`NA_character_` name is treated like the empty string (#1200 comment)", {
col <- list(
set_names(1, "a"),
set_names(1, NA_character_),
set_names(1, "")
)
df <- tibble(col = col)

# Column structure between these two must be the same,
# we consider an `NA_character_` name as identical to `""`.
expect_snapshot(out1 <- unnest_wider(df, col))
expect_snapshot(out2 <- unnest_wider(df, col, names_sep = "_"))

expect_identical(out1$a, c(1, NA, NA))
expect_identical(out1$...1, c(NA, 1, 1))
out <- unnest_wider(df, col, names_sep = "_")

expect_identical(out2$col_a, c(1, NA, NA))
expect_identical(out2$col_...1, c(NA, 1, 1))
expect_identical(out$col_a, c(1, NA, NA))
expect_identical(out$col_1, c(NA, 1, 1))
})

test_that("can combine `<list> + <list_of<ptype>>`", {
Expand All @@ -232,6 +241,42 @@ test_that("can combine `<list> + <list_of<ptype>>`", {
expect_identical(out$a, list(1:2, 1L))
})

test_that("can't unnest unnamed elements without `names_sep` (#1367)", {
df <- tibble(col = list(1))
expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})

df <- tibble(col = list(set_names(1, "")))
expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})

df <- tibble(col = list(set_names(1, NA_character_)))
expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})

# Partially missing within an element
df <- tibble(col = list(c(a = 1), c(a = 1, 2)))
expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})
})

test_that("catches duplicate inner names in the same vector", {
df <- tibble(col = list(c(a = 1, a = 2)))

expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})

expect_snapshot({
out <- unnest_wider(df, col, names_repair = "unique")
})
expect_named(out, c("a...1", "a...2"))
})

test_that("unnest_wider() advises on outer / inner name duplication (#1367)", {
df <- tibble(x = 1, y = list(list(x = 2)))

Expand Down

0 comments on commit 71ec2d6

Please sign in to comment.