Error on unnamed values if names_sep isn't provided (#1462)

* Error on unnamed elements if `names_sep` isn't provided And fix automatic name generation on partially missing names * NEWS bullets
tidyverse · Jan 13, 2023 · 71ec2d6 · 71ec2d6
1 parent c5b189e
commit 71ec2d6
Show file tree

Hide file tree

Showing 5 changed files with 140 additions and 72 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,12 @@
 # tidyr (development version)
 
+* `unnest_wider()` now generates automatic names for _partially_ unnamed
+  vectors. Previously it only generated them for fully unnamed vectors,
+  resulting in a strange mix of automatic names and name-repaired names (#1367).
+
+* `unnest_wider()` now errors if any values being unnested are unnamed and
+  `names_sep` is not provided (#1367).
+
 * `nest()` has gained a new argument, `.by`, which allows you to specify the
   columns to nest by (rather than the columns to nest, i.e. through `...`).
   Additionally, the `.key` argument is no longer deprecated, and is used

diff --git a/R/unnest-wider.R b/R/unnest-wider.R
@@ -13,9 +13,10 @@
 #'   as is. If a string, the outer and inner names will be pasted together using
 #'   `names_sep` as a separator.
 #'
-#'   If the values being unnested are unnamed and `names_sep` is supplied, the
-#'   inner names will be automatically generated as an increasing sequence of
-#'   integers.
+#'   If any values being unnested are unnamed, then `names_sep` must be
+#'   supplied, otherwise an error is thrown. When `names_sep` is supplied,
+#'   names are automatically generated for unnamed values as an increasing
+#'   sequence of integers.
 #' @param strict A single logical specifying whether or not to apply strict
 #'   vctrs typing rules. If `FALSE`, typed empty values (like `list()` or
 #'   `integer()`) nested within list-columns will be treated like `NULL` and
@@ -58,7 +59,7 @@
 #'   x = 1:3,
 #'   y = list(NULL, 1:3, 4:5)
 #' )
-#' # where you'll usually want to provide names_sep:
+#' # but you must supply `names_sep` to do so, which generates automatic names:
 #' df %>% unnest_wider(y, names_sep = "_")
 #'
 #' # 0-length elements ---------------------------------------------------------
@@ -205,6 +206,8 @@ elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) {
     # which we want to treat like lists where we know the type of each element
     x <- tidyr_new_list(x)
     x <- map(x, list_of)
+    names <- names2(x)
+    x <- set_names(x, NULL)
   } else {
     if (!strict && vec_is_list(x)) {
       empty <- list_sizes(x) == 0L
@@ -214,34 +217,35 @@ elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) {
       }
     }
 
-    names <- vec_names(x)
-
-    if (is.null(names)) {
-      x <- vec_chop(x)
-    } else {
-      # Promote names to column names
-      x <- vec_set_names(x, NULL)
-      x <- vec_chop(x)
-      x <- vec_set_names(x, names)
-    }
+    names <- vec_names2(x)
+    x <- vec_set_names(x, NULL)
+    x <- vec_chop(x)
   }
 
+  empty <- names == ""
+  any_empty <- any(empty)
+
   if (is.null(names_sep)) {
-    names(x) <- vec_as_names(names2(x), repair = "unique", call = error_call)
+    if (any_empty) {
+      stop_use_names_sep(error_call = error_call)
+    }
   } else {
-    outer <- name
-
-    inner <- names(x)
-    if (is.null(inner)) {
-      inner <- as.character(seq_along(x))
-    } else {
-      inner <- vec_as_names(inner, repair = "unique", call = error_call)
+    if (any_empty) {
+      names[empty] <- as.character(which(empty))
     }
-
-    names(x) <- apply_names_sep(outer, inner, names_sep)
+    names <- apply_names_sep(name, names, names_sep)
   }
 
+  x <- set_names(x, names)
   x <- new_data_frame(x, n = 1L)
 
   x
 }
+
+stop_use_names_sep <- function(error_call = caller_env()) {
+  message <- c(
+    "Can't unnest elements with missing names.",
+    i = "Supply {.arg names_sep} to generate automatic names."
+  )
+  cli::cli_abort(message, call = error_call)
+}
diff --git a/man/unnest_wider.Rd b/man/unnest_wider.Rd
diff --git a/tests/testthat/_snaps/unnest-wider.md b/tests/testthat/_snaps/unnest-wider.md
@@ -7,50 +7,61 @@
       Error in `unnest_wider()`:
       ! List-column `y` must contain only vectors.
 
-# can unnest a vector with a mix of named/unnamed elements (#1200 comment)
+# can't unnest unnamed elements without `names_sep` (#1367)
 
     Code
-      out <- unnest_wider(df, x, names_sep = "_")
-    Message
-      New names:
-      * `` -> `...1`
+      unnest_wider(df, col)
+    Condition
+      Error in `unnest_wider()`:
+      ! Can't unnest elements with missing names.
+      i Supply `names_sep` to generate automatic names.
 
-# unique name repair is done on the elements before applying `names_sep` (#1200 comment)
+---
 
     Code
-      out <- unnest_wider(df, col, names_sep = "_")
-    Message
-      New names:
-      * `` -> `...1`
+      unnest_wider(df, col)
+    Condition
+      Error in `unnest_wider()`:
+      ! Can't unnest elements with missing names.
+      i Supply `names_sep` to generate automatic names.
 
 ---
 
     Code
-      out <- unnest_wider(df, col, names_sep = "_")
-    Message
-      New names:
-      * `` -> `...1`
-      * `` -> `...2`
+      unnest_wider(df, col)
+    Condition
+      Error in `unnest_wider()`:
+      ! Can't unnest elements with missing names.
+      i Supply `names_sep` to generate automatic names.
 
-# output structure is the same whether or not `names_sep` is applied (#1200 comment)
+---
 
     Code
-      out1 <- unnest_wider(df, col)
-    Message
-      New names:
-      * `` -> `...1`
-      New names:
-      * `` -> `...1`
+      unnest_wider(df, col)
+    Condition
+      Error in `unnest_wider()`:
+      ! Can't unnest elements with missing names.
+      i Supply `names_sep` to generate automatic names.
+
+# catches duplicate inner names in the same vector
+
+    Code
+      unnest_wider(df, col)
+    Condition
+      Error in `unnest_wider()`:
+      ! Names must be unique.
+      x These names are duplicated:
+        * "a" at locations 1 and 2.
+      i Use argument `names_repair` to specify repair strategy.
 
 ---
 
     Code
-      out2 <- unnest_wider(df, col, names_sep = "_")
+      out <- unnest_wider(df, col, names_repair = "unique")
     Message
       New names:
-      * `` -> `...1`
-      New names:
-      * `` -> `...1`
+      * `a` -> `a...1`
+      * `a` -> `a...2`
 
 # unnest_wider() advises on outer / inner name duplication (#1367)
 

diff --git a/tests/testthat/test-unnest-wider.R b/tests/testthat/test-unnest-wider.R
@@ -182,11 +182,11 @@ test_that("can unnest multiple columns wider at once (#740)", {
   )
 })
 
-test_that("can unnest a vector with a mix of named/unnamed elements (#1200 comment)", {
+test_that("can unnest a vector with a mix of named/unnamed elements (#1200 comment, #1367)", {
   df <- tibble(x = c(a = 1L, 2L))
-  expect_snapshot(out <- unnest_wider(df, x, names_sep = "_"))
+  out <- unnest_wider(df, x, names_sep = "_")
   expect_identical(out$x_a, c(1L, NA))
-  expect_identical(out$x_...1, c(NA, 2L))
+  expect_identical(out$x_1, c(NA, 2L))
 })
 
 test_that("can unnest a list with a mix of named/unnamed elements (#1200 comment)", {
@@ -196,34 +196,43 @@ test_that("can unnest a list with a mix of named/unnamed elements (#1200 comment
   expect_identical(out$x_2, c(2L, 4L))
 })
 
-test_that("unique name repair is done on the elements before applying `names_sep` (#1200 comment)", {
+test_that("integer names are generated before applying `names_sep` (#1200 comment, #1367)", {
   df <- tibble(col = list(set_names(1, "")))
-  expect_snapshot(out <- unnest_wider(df, col, names_sep = "_"))
-  expect_named(out, "col_...1")
+  out <- unnest_wider(df, col, names_sep = "_")
+  expect_named(out, "col_1")
 
   df <- tibble(col = list(set_names(1:2, c("", ""))))
-  expect_snapshot(out <- unnest_wider(df, col, names_sep = "_"))
-  expect_named(out, c("col_...1", "col_...2"))
+  out <- unnest_wider(df, col, names_sep = "_")
+  expect_named(out, c("col_1", "col_2"))
 })
 
-test_that("output structure is the same whether or not `names_sep` is applied (#1200 comment)", {
+test_that("integer names are generated for partially named vectors (#1367)", {
+  df <- tibble(col = list(set_names(1:4, c("x", "", "z", ""))))
+  out <- unnest_wider(df, col, names_sep = "_")
+  expect_named(out, c("col_x", "col_2", "col_z", "col_4"))
+
+  df <- tibble(col = list(
+    set_names(1:4, c("x", "", "z", "")),
+    set_names(5:8, c("", "", "z", ""))
+  ))
+  out <- unnest_wider(df, col, names_sep = "_")
+  expect_named(out, c("col_x", "col_2", "col_z", "col_4", "col_1"))
+  expect_identical(out$col_x, c(1L, NA))
+  expect_identical(out$col_1, c(NA, 5L))
+})
+
+test_that("`NA_character_` name is treated like the empty string (#1200 comment)", {
   col <- list(
     set_names(1, "a"),
     set_names(1, NA_character_),
     set_names(1, "")
   )
   df <- tibble(col = col)
 
-  # Column structure between these two must be the same,
-  # we consider an `NA_character_` name as identical to `""`.
-  expect_snapshot(out1 <- unnest_wider(df, col))
-  expect_snapshot(out2 <- unnest_wider(df, col, names_sep = "_"))
-
-  expect_identical(out1$a, c(1, NA, NA))
-  expect_identical(out1$...1, c(NA, 1, 1))
+  out <- unnest_wider(df, col, names_sep = "_")
 
-  expect_identical(out2$col_a, c(1, NA, NA))
-  expect_identical(out2$col_...1, c(NA, 1, 1))
+  expect_identical(out$col_a, c(1, NA, NA))
+  expect_identical(out$col_1, c(NA, 1, 1))
 })
 
 test_that("can combine `<list> + <list_of<ptype>>`", {
@@ -232,6 +241,42 @@ test_that("can combine `<list> + <list_of<ptype>>`", {
   expect_identical(out$a, list(1:2, 1L))
 })
 
+test_that("can't unnest unnamed elements without `names_sep` (#1367)", {
+  df <- tibble(col = list(1))
+  expect_snapshot(error = TRUE, {
+    unnest_wider(df, col)
+  })
+
+  df <- tibble(col = list(set_names(1, "")))
+  expect_snapshot(error = TRUE, {
+    unnest_wider(df, col)
+  })
+
+  df <- tibble(col = list(set_names(1, NA_character_)))
+  expect_snapshot(error = TRUE, {
+    unnest_wider(df, col)
+  })
+
+  # Partially missing within an element
+  df <- tibble(col = list(c(a = 1), c(a = 1, 2)))
+  expect_snapshot(error = TRUE, {
+    unnest_wider(df, col)
+  })
+})
+
+test_that("catches duplicate inner names in the same vector", {
+  df <- tibble(col = list(c(a = 1, a = 2)))
+
+  expect_snapshot(error = TRUE, {
+    unnest_wider(df, col)
+  })
+
+  expect_snapshot({
+    out <- unnest_wider(df, col, names_repair = "unique")
+  })
+  expect_named(out, c("a...1", "a...2"))
+})
+
 test_that("unnest_wider() advises on outer / inner name duplication (#1367)", {
   df <- tibble(x = 1, y = list(list(x = 2)))