From a51350dab5502d2dbc391f75f309f74135d74d2f Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 11 Sep 2023 11:15:57 +0200 Subject: [PATCH 1/7] removed else clutter in export --- R/export.R | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/R/export.R b/R/export.R index d4b691d..12e1421 100644 --- a/R/export.R +++ b/R/export.R @@ -83,21 +83,13 @@ export <- function(x, file, format, ...) { if (missing(file) && missing(format)) { stop("Must specify 'file' and/or 'format'") } - if (!missing(file) && !missing(format)) { - format <- tolower(format) + if (!missing(file)) { cfile <- file f <- find_compress(file) file <- f$file compress <- f$compress - } - if (!missing(file) && missing(format)) { - cfile <- file - f <- find_compress(file) - file <- f$file - compress <- f$compress - format <- get_info(file)$input ## this line is slight confusing - } - if (!missing(format) && missing(file)) { + format <- ifelse(isFALSE(missing(format)), tolower(format), get_info(file)$input) + } else { format <- .standardize_format(format) file <- paste0(as.character(substitute(x)), ".", format) compress <- NA_character_ From 4a50411b2cb114170ebf2e1ea8eb40a36c6e0d16 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 11 Sep 2023 12:46:56 +0200 Subject: [PATCH 2/7] refactor outfile creation in export_list --- R/export_list.R | 28 ++-------------------------- R/utils.R | 29 +++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/R/export_list.R b/R/export_list.R index 60cbcce..bfb3ac6 100644 --- a/R/export_list.R +++ b/R/export_list.R @@ -52,32 +52,8 @@ export_list <- function(x, file, archive = "", ...) { stop("'x' must be a list. Perhaps you want export()?") } - if (is.null(file)) { - stop("'file' must be a character vector") - } else if (length(file) == 1L) { - if (!grepl("%s", file, fixed = TRUE)) { - stop("'file' must have a %s placeholder") - } - if (is.null(names(x))) { - outfiles <- sprintf(file, seq_along(x)) - } else { - if (any(nchar(names(x))) == 0) { - stop("All elements of 'x' must be named or all must be unnamed") - } - if (anyDuplicated(names(x))) { - stop("Names of elements in 'x' are not unique") - } - outfiles <- sprintf(file, names(x)) - } - } else { - if (length(x) != length(file)) { - stop("'file' must be same length as 'x', or a single pattern with a %s placeholder") - } - if (anyDuplicated(file)) { - stop("File names are not unique") - } - outfiles <- file - } + outfiles <- .create_outfiles(file, names(x)) + if (is.na(archive_format$compress) && archive_format$file != "") { outfiles <- file.path(archive_format$file, outfiles) } diff --git a/R/utils.R b/R/utils.R index 45a53ad..f6cce14 100644 --- a/R/utils.R +++ b/R/utils.R @@ -46,9 +46,9 @@ get_ext <- function(file) { .query_format <- function(input, file) { - unique_rio_formats <- unique(rio_formats[,colnames(rio_formats) != "note"]) + unique_rio_formats <- unique(rio_formats[, colnames(rio_formats) != "note"]) if (file == "clipboard") { - output <- as.list(unique_rio_formats[unique_rio_formats$format == "clipboard",]) + output <- as.list(unique_rio_formats[unique_rio_formats$format == "clipboard", ]) output$file <- file return(output) } @@ -114,3 +114,28 @@ escape_xml <- function(x, replacement = c("&", """, "<", ">", "&a } invisible(NULL) } + +.create_outfiles <- function(file, names_x) { + if (length(file) == 1L) { + if (!grepl("%s", file, fixed = TRUE)) { + stop("'file' must have a %s placeholder") + } + if (is.null(names_x)) { + return(sprintf(file, seq_along(x))) + } + if (any(nchar(names_x)) == 0) { + stop("All elements of 'x' must be named or all must be unnamed") + } + if (anyDuplicated(names_x)) { + stop("Names of elements in 'x' are not unique") + } + return(sprintf(file, names_x)) + } + if (length(x) != length(file)) { + stop("'file' must be same length as 'x', or a single pattern with a %s placeholder") + } + if (anyDuplicated(file)) { + stop("File names are not unique") + } + return(file) +} From 91fe6366d29a20a7db145db890ddf93cee7d32a7 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 11 Sep 2023 12:51:08 +0200 Subject: [PATCH 3/7] need to pass x to create_outfiles --- R/export_list.R | 2 +- R/utils.R | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/R/export_list.R b/R/export_list.R index bfb3ac6..82305d0 100644 --- a/R/export_list.R +++ b/R/export_list.R @@ -52,7 +52,7 @@ export_list <- function(x, file, archive = "", ...) { stop("'x' must be a list. Perhaps you want export()?") } - outfiles <- .create_outfiles(file, names(x)) + outfiles <- .create_outfiles(file, x) if (is.na(archive_format$compress) && archive_format$file != "") { outfiles <- file.path(archive_format$file, outfiles) diff --git a/R/utils.R b/R/utils.R index f6cce14..07c00c5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -115,7 +115,8 @@ escape_xml <- function(x, replacement = c("&", """, "<", ">", "&a invisible(NULL) } -.create_outfiles <- function(file, names_x) { +.create_outfiles <- function(file, x) { + names_x <- names(x) if (length(file) == 1L) { if (!grepl("%s", file, fixed = TRUE)) { stop("'file' must have a %s placeholder") From 57765bff48bb7f3c80bfbd66cf88c4999a819d43 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 11 Sep 2023 13:07:26 +0200 Subject: [PATCH 4/7] added tests for .create_outfiles --- R/utils.R | 2 +- tests/testthat/test_create_outfiles.R | 33 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/test_create_outfiles.R diff --git a/R/utils.R b/R/utils.R index 07c00c5..69bf002 100644 --- a/R/utils.R +++ b/R/utils.R @@ -124,7 +124,7 @@ escape_xml <- function(x, replacement = c("&", """, "<", ">", "&a if (is.null(names_x)) { return(sprintf(file, seq_along(x))) } - if (any(nchar(names_x)) == 0) { + if (any(nchar(names_x) == 0)) { stop("All elements of 'x' must be named or all must be unnamed") } if (anyDuplicated(names_x)) { diff --git a/tests/testthat/test_create_outfiles.R b/tests/testthat/test_create_outfiles.R new file mode 100644 index 0000000..2b64a76 --- /dev/null +++ b/tests/testthat/test_create_outfiles.R @@ -0,0 +1,33 @@ +test_that(".create_outfiles works", { + x <- list( + a = data.frame(), + b = data.frame(), + c = data.frame() + ) + y <- list( + data.frame(), + data.frame(), + data.frame() + ) + expect_identical(.create_outfiles("d_%s.csv", x), c("d_a.csv", "d_b.csv", "d_c.csv")) + expect_identical(.create_outfiles("d_%s.csv", y), c("d_1.csv", "d_2.csv", "d_3.csv")) + expect_identical(.create_outfiles(c("a.csv", "b.csv", "c.csv"), x), c("a.csv", "b.csv", "c.csv")) +}) + +test_that(".create_outfiles errors", { + x <- list( + a = data.frame(), + a = data.frame(), + c = data.frame() + ) + y <- list( + a = data.frame(), + b = data.frame(), + data.frame() + ) + expect_error(.create_outfiles("d_%s.csv", x)) + expect_error(.create_outfiles(c("a.csv", "a.csv", "c.csv"), x)) + expect_error(.create_outfiles(c("a.csv", "b.csv"), x)) + expect_error(.create_outfiles(c("a.csv"), x)) + expect_error(.create_outfiles(c("d_%s.csv"), y)) +}) From 7cc78c86f6f0edca47daa1cd74197de00226becc Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 11 Sep 2023 15:25:38 +0200 Subject: [PATCH 5/7] rewrote .export.rio_rdata --- R/export_methods.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/R/export_methods.R b/R/export_methods.R index 3205620..2ff163d 100644 --- a/R/export_methods.R +++ b/R/export_methods.R @@ -137,18 +137,20 @@ export_delim <- function(file, x, fwrite = TRUE, sep = "\t", row.names = FALSE, #' @export .export.rio_rdata <- function(file, x, ...) { + if (isFALSE(is.data.frame(x)) && isFALSE(is.list(x)) && isFALSE(is.environment(x)) && isFALSE(is.character(x))) { + stop("'x' must be a data.frame, list, or environment") + } if (is.data.frame(x)) { return(save(x, file = file, ...)) - } else if (is.list(x)) { + } + if (is.list(x)) { e <- as.environment(x) - save(list = names(x), file = file, envir = e, ...) - } else if (is.environment(x)) { - save(list = ls(x), file = file, envir = x, ...) - } else if (is.character(x)) { - save(list = x, file = file, ...) - } else { - stop("'x' must be a data.frame, list, or environment") + return(save(list = names(x), file = file, envir = e, ...)) + } + if (is.environment(x)) { + return(save(list = ls(x), file = file, envir = x, ...)) } + return(save(list = x, file = file, ...)) ## characters, but is this doing what it does? } #' @export From 3aaca1e499cf6d3e45e3a1f5e757d48ad7e8bf86 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 11 Sep 2023 15:40:42 +0200 Subject: [PATCH 6/7] rewrite parse_tar and parse_zip to parse_archive --- R/compression.R | 52 ++++++++++++++++++------------------------------- R/import.R | 6 +++--- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/R/compression.R b/R/compression.R index 3570042..f8ddeb3 100644 --- a/R/compression.R +++ b/R/compression.R @@ -47,48 +47,34 @@ compress_out <- function(cfile, filename, type = c("zip", "tar", "gzip", "bzip2" return(cfile) } - -parse_zip <- function(file, which, ...) { - d <- tempfile() - dir.create(d) - file_list <- utils::unzip(file, list = TRUE) - if (missing(which)) { - which <- 1 - if (nrow(file_list) > 1) { - warning(sprintf("Zip archive contains multiple files. Attempting first file.")) - } - } - if (is.numeric(which)) { - utils::unzip(file, files = file_list$Name[which], exdir = d) - file.path(d, file_list$Name[which]) +parse_archive <- function(file, which, file_type, ...) { + if (file_type == "zip") { + file_list <- utils::unzip(file, list = TRUE)$Name + extract_func <- utils::unzip + } else if (file_type == "tar") { + file_list <- utils::untar(file, list = TRUE) + extract_func <- utils::untar } else { - if (substring(which, 1, 1) != "^") { - which2 <- paste0("^", which) - } - utils::unzip(file, files = file_list$Name[grep(which2, file_list$Name)[1]], exdir = d) - file.path(d, which) + stop("Unsupported file_type. Use 'zip' or 'tar'.") } -} -parse_tar <- function(file, which, ...) { d <- tempfile() dir.create(d) - on.exit(unlink(d)) - file_list <- utils::untar(file, list = TRUE) + if (missing(which)) { - which <- 1 if (length(file_list) > 1) { - warning(sprintf("Tar archive contains multiple files. Attempting first file.")) + warning(sprintf("%s archive contains multiple files. Attempting first file.", file_type)) } + which <- 1 } + if (is.numeric(which)) { - utils::untar(file, files = file_list[which], exdir = d) - file.path(d, file_list[which]) - } else { - if (substring(which, 1, 1) != "^") { - which2 <- paste0("^", which) - } - utils::untar(file, files = file_list[grep(which2, file_list)[1]], exdir = d) - file.path(d, which) + extract_func(file, files = file_list[which], exdir = d) + return(file.path(d, file_list[which])) + } + if (substring(which, 1, 1) != "^") { + which2 <- paste0("^", which) } + extract_func(file, files = file_list[grep(which2, file_list)[1]], exdir = d) + return(file.path(d, which)) } diff --git a/R/import.R b/R/import.R index 735c0f0..eb319d9 100644 --- a/R/import.R +++ b/R/import.R @@ -110,15 +110,15 @@ import <- function(file, format, setclass = getOption("rio.import.class", "data. } if (grepl("\\.zip$", file)) { if (missing(which)) { - file <- parse_zip(file) + file <- parse_archive(file, file_type = "zip") } else { - file <- parse_zip(file, which = which) + file <- parse_archive(file, which = which, file_type = "zip") } } else if (grepl("\\.tar", file)) { if (missing(which)) { which <- 1 } - file <- parse_tar(file, which = which) + file <- parse_archive(file, which = which, file_type = "tar") } if (missing(format)) { format <- get_info(file)$format From 14a683e77f6fddd056a7467b60e54281ff5c1c79 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 11 Sep 2023 16:08:38 +0200 Subject: [PATCH 7/7] reduced cyclomatic complexity of set_class --- R/set_class.R | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/R/set_class.R b/R/set_class.R index 34ff198..741f95d 100644 --- a/R/set_class.R +++ b/R/set_class.R @@ -1,21 +1,37 @@ set_class <- function(x, class = NULL) { if (is.null(class)) { return(x) - } else if ("data.table" %in% class) { - if (inherits(x, "data.table")) { - return(x) - } - return(data.table::as.data.table(x)) - } else if ("tibble" %in% class || "tbl_df" %in% class || "tbl" %in% class) { - if (inherits(x, "tbl")) { - return(x) - } - return(tibble::as_tibble(x)) } + + if ("data.table" %in% class) { + return(.ensure_data_table(x)) + } + + if (any(c("tibble", "tbl_df", "tbl") %in% class)) { + return(.ensure_tibble(x)) + } + + return(.ensure_data_frame(x)) +} + +.ensure_data_table <- function(x) { + if (inherits(x, "data.table")) { + return(x) + } + return(data.table::as.data.table(x)) +} + +.ensure_tibble <- function(x) { + if (inherits(x, "tbl")) { + return(x) + } + return(tibble::as_tibble(x)) +} + +.ensure_data_frame <- function(x) { out <- structure(x, class = "data.frame") - # add row names in case `x` wasn't already a data frame (e.g., matlab list) if (!length(rownames(out))) { - rownames(out) <- as.character(seq_len(length(out[,1L,drop = TRUE]))) + rownames(out) <- as.character(seq_len(length(out[, 1L, drop = TRUE]))) } return(out) }