From e7e94dca76f5feb2ecdc4866c030b3a66d17671b Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 7 Oct 2017 20:13:25 +0100 Subject: [PATCH 1/8] Add names to "which" for excel and zip --- R/import_list.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/import_list.R b/R/import_list.R index c27962d..ffb16ef 100644 --- a/R/import_list.R +++ b/R/import_list.R @@ -70,9 +70,13 @@ function(file, which <- seq_along(xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table")) } else if (get_ext(file) %in% c("xls","xlsx")) { requireNamespace("readxl", quietly = TRUE) - which <- seq_along(readxl::excel_sheets(path = file)) + whichnames <- readxl::excel_sheets(path = file) + which <- seq_along(whichnames) + names(which) <- whichnames } else if (get_ext(file) %in% c("zip")) { - which <- seq_len(nrow(utils::unzip(file, list = TRUE))) + whichnames <- utils::unzip(file, list = TRUE)[, "Name"] + which <- seq_along(whichnames) + names(which) <- whichnames } else { which <- 1 } From 3caba1f70b7ce62d829c2ccee4f79cf6799bba25 Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 7 Oct 2017 20:14:43 +0100 Subject: [PATCH 2/8] Use filenames as list elements Exclude .ext from element name --- R/import_list.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/import_list.R b/R/import_list.R index ffb16ef..2c19a0b 100644 --- a/R/import_list.R +++ b/R/import_list.R @@ -48,6 +48,7 @@ function(file, setclass <- NULL } if (length(file) > 1) { + names(file) <- gsub(paste0("\\.", tools::file_ext(file[1]), "$"), "", file, ignore.case = TRUE) x <- lapply(file, function(thisfile) { out <- try(import(thisfile, setclass = setclass, ...), silent = TRUE) if (inherits(out, "try-error")) { From 88d00d61d78b6bdbd6c8ef3b792e1f81ce31d3db Mon Sep 17 00:00:00 2001 From: Unknown Date: Sun, 8 Oct 2017 18:30:45 +0100 Subject: [PATCH 3/8] import_list() naming tests --- tests/testthat/test_import_list.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/testthat/test_import_list.R b/tests/testthat/test_import_list.R index 889daca..74eac4f 100644 --- a/tests/testthat/test_import_list.R +++ b/tests/testthat/test_import_list.R @@ -41,5 +41,22 @@ test_that("Using setclass in import_list()", { }) +test_that("Object names are preserved by import_list()", { + export(list(mtcars1 = mtcars[1:10,], + mtcars2 = mtcars[11:20,], + mtcars3 = mtcars[21:32,]), "mtcars.xlsx") + export(mtcars[1:10,], "mtcars1.csv") + export(mtcars[11:20,], "mtcars2.csv") + export(mtcars[21:32,], "mtcars3.csv") + expected_names <- c("mtcars1", "mtcars2", "mtcars3") + dat_xls <- import_list("mtcars.xlsx") + dat_csv <- import_list(c("mtcars1.csv","mtcars2.csv","mtcars3.csv")) + + expect_identical(names(dat_xls), expected_names) + expect_identical(names(dat_csv), expected_names) + + unlink(c("mtcars.xlsx", "mtcars1.csv","mtcars2.csv","mtcars3.csv")) +}) + unlink("data.rdata") unlink("mtcars.rds") From 4a33bed6321259ed4534114737195812ece561a2 Mon Sep 17 00:00:00 2001 From: Unknown Date: Sun, 8 Oct 2017 18:32:36 +0100 Subject: [PATCH 4/8] Add names to "which" for html Uses table's 'class' attribute otherwise blank --- R/import_list.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/import_list.R b/R/import_list.R index 2c19a0b..f92ce73 100644 --- a/R/import_list.R +++ b/R/import_list.R @@ -68,7 +68,15 @@ function(file, if (missing(which)) { if (get_ext(file) == "html") { requireNamespace("xml2", quietly = TRUE) - which <- seq_along(xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table")) + tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table") + which <- seq_along(tables) + names(which) <- sapply(xml2::xml_attrs(tables), function(x) { + if ("class" %in% names(x)) { + x["class"] + } else { + "" + } + }) } else if (get_ext(file) %in% c("xls","xlsx")) { requireNamespace("readxl", quietly = TRUE) whichnames <- readxl::excel_sheets(path = file) From 00ffcc3afa67518df24032b51c0ea7fda5a333f7 Mon Sep 17 00:00:00 2001 From: Unknown Date: Mon, 9 Oct 2017 17:09:44 +0100 Subject: [PATCH 5/8] Add a "class" to twotables.html and add test --- inst/examples/twotables.html | 2 +- tests/testthat/test_import_list.R | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/examples/twotables.html b/inst/examples/twotables.html index d608c98..168e023 100644 --- a/inst/examples/twotables.html +++ b/inst/examples/twotables.html @@ -5,7 +5,7 @@ R Exported Data -
mpgcyldisphpdratwtqsecvsamgearcarb
2161601103.92.6216.460144
2161601103.92.87517.020144
22.84108933.852.3218.611141
21.462581103.083.21519.441031
18.783601753.153.4417.020032
18.162251052.763.4620.221031
14.383602453.213.5715.840034
24.44146.7623.693.19201042
22.84140.8953.923.1522.91042
19.26167.61233.923.4418.31044
17.86167.61233.923.4418.91044
16.48275.81803.074.0717.40033
17.38275.81803.073.7317.60033
15.28275.81803.073.78180033
10.484722052.935.2517.980034
10.4846021535.42417.820034
14.784402303.235.34517.420034
32.4478.7664.082.219.471141
30.4475.7524.931.61518.521142
33.9471.1654.221.83519.91141
21.54120.1973.72.46520.011031
15.583181502.763.5216.870032
15.283041503.153.43517.30032
13.383502453.733.8415.410034
19.284001753.083.84517.050032
27.3479664.081.93518.91141
264120.3914.432.1416.70152
30.4495.11133.771.51316.91152
15.883512644.223.1714.50154
19.761451753.622.7715.50156
1583013353.543.5714.60158
21.441211094.112.7818.61142
+
mpgcyldisphpdratwtqsecvsamgearcarb
2161601103.92.6216.460144
2161601103.92.87517.020144
22.84108933.852.3218.611141
21.462581103.083.21519.441031
18.783601753.153.4417.020032
18.162251052.763.4620.221031
14.383602453.213.5715.840034
24.44146.7623.693.19201042
22.84140.8953.923.1522.91042
19.26167.61233.923.4418.31044
17.86167.61233.923.4418.91044
16.48275.81803.074.0717.40033
17.38275.81803.073.7317.60033
15.28275.81803.073.78180033
10.484722052.935.2517.980034
10.4846021535.42417.820034
14.784402303.235.34517.420034
32.4478.7664.082.219.471141
30.4475.7524.931.61518.521142
33.9471.1654.221.83519.91141
21.54120.1973.72.46520.011031
15.583181502.763.5216.870032
15.283041503.153.43517.30032
13.383502453.733.8415.410034
19.284001753.083.84517.050032
27.3479664.081.93518.91141
264120.3914.432.1416.70152
30.4495.11133.771.51316.91152
15.883512644.223.1714.50154
19.761451753.622.7715.50156
1583013353.543.5714.60158
21.441211094.112.7818.61142

Sepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
5.13.51.40.2setosa
4.931.40.2setosa
4.73.21.30.2setosa
4.63.11.50.2setosa
53.61.40.2setosa
5.43.91.70.4setosa
4.63.41.40.3setosa
53.41.50.2setosa
4.42.91.40.2setosa
4.93.11.50.1setosa
5.43.71.50.2setosa
4.83.41.60.2setosa
4.831.40.1setosa
4.331.10.1setosa
5.841.20.2setosa
5.74.41.50.4setosa
5.43.91.30.4setosa
5.13.51.40.3setosa
5.73.81.70.3setosa
5.13.81.50.3setosa
5.43.41.70.2setosa
5.13.71.50.4setosa
4.63.610.2setosa
5.13.31.70.5setosa
4.83.41.90.2setosa
531.60.2setosa
53.41.60.4setosa
5.23.51.50.2setosa
5.23.41.40.2setosa
4.73.21.60.2setosa
4.83.11.60.2setosa
5.43.41.50.4setosa
5.24.11.50.1setosa
5.54.21.40.2setosa
4.93.11.50.2setosa
53.21.20.2setosa
5.53.51.30.2setosa
4.93.61.40.1setosa
4.431.30.2setosa
5.13.41.50.2setosa
53.51.30.3setosa
4.52.31.30.3setosa
4.43.21.30.2setosa
53.51.60.6setosa
5.13.81.90.4setosa
4.831.40.3setosa
5.13.81.60.2setosa
4.63.21.40.2setosa
5.33.71.50.2setosa
53.31.40.2setosa
73.24.71.4versicolor
6.43.24.51.5versicolor
6.93.14.91.5versicolor
5.52.341.3versicolor
6.52.84.61.5versicolor
5.72.84.51.3versicolor
6.33.34.71.6versicolor
4.92.43.31versicolor
6.62.94.61.3versicolor
5.22.73.91.4versicolor
523.51versicolor
5.934.21.5versicolor
62.241versicolor
6.12.94.71.4versicolor
5.62.93.61.3versicolor
6.73.14.41.4versicolor
5.634.51.5versicolor
5.82.74.11versicolor
6.22.24.51.5versicolor
5.62.53.91.1versicolor
5.93.24.81.8versicolor
6.12.841.3versicolor
6.32.54.91.5versicolor
6.12.84.71.2versicolor
6.42.94.31.3versicolor
6.634.41.4versicolor
6.82.84.81.4versicolor
6.7351.7versicolor
62.94.51.5versicolor
5.72.63.51versicolor
5.52.43.81.1versicolor
5.52.43.71versicolor
5.82.73.91.2versicolor
62.75.11.6versicolor
5.434.51.5versicolor
63.44.51.6versicolor
6.73.14.71.5versicolor
6.32.34.41.3versicolor
5.634.11.3versicolor
5.52.541.3versicolor
5.52.64.41.2versicolor
6.134.61.4versicolor
5.82.641.2versicolor
52.33.31versicolor
5.62.74.21.3versicolor
5.734.21.2versicolor
5.72.94.21.3versicolor
6.22.94.31.3versicolor
5.12.531.1versicolor
5.72.84.11.3versicolor
6.33.362.5virginica
5.82.75.11.9virginica
7.135.92.1virginica
6.32.95.61.8virginica
6.535.82.2virginica
7.636.62.1virginica
4.92.54.51.7virginica
7.32.96.31.8virginica
6.72.55.81.8virginica
7.23.66.12.5virginica
6.53.25.12virginica
6.42.75.31.9virginica
6.835.52.1virginica
5.72.552virginica
5.82.85.12.4virginica
6.43.25.32.3virginica
6.535.51.8virginica
7.73.86.72.2virginica
7.72.66.92.3virginica
62.251.5virginica
6.93.25.72.3virginica
5.62.84.92virginica
7.72.86.72virginica
6.32.74.91.8virginica
6.73.35.72.1virginica
7.23.261.8virginica
6.22.84.81.8virginica
6.134.91.8virginica
6.42.85.62.1virginica
7.235.81.6virginica
7.42.86.11.9virginica
7.93.86.42virginica
6.42.85.62.2virginica
6.32.85.11.5virginica
6.12.65.61.4virginica
7.736.12.3virginica
6.33.45.62.4virginica
6.43.15.51.8virginica
634.81.8virginica
6.93.15.42.1virginica
6.73.15.62.4virginica
6.93.15.12.3virginica
5.82.75.11.9virginica
6.83.25.92.3virginica
6.73.35.72.5virginica
6.735.22.3virginica
6.32.551.9virginica
6.535.22virginica
6.23.45.42.3virginica
5.935.11.8virginica
diff --git a/tests/testthat/test_import_list.R b/tests/testthat/test_import_list.R index 74eac4f..f57772f 100644 --- a/tests/testthat/test_import_list.R +++ b/tests/testthat/test_import_list.R @@ -51,9 +51,11 @@ test_that("Object names are preserved by import_list()", { expected_names <- c("mtcars1", "mtcars2", "mtcars3") dat_xls <- import_list("mtcars.xlsx") dat_csv <- import_list(c("mtcars1.csv","mtcars2.csv","mtcars3.csv")) + dat_html <- import_list(system.file("examples", "twotables.html", package = "rio")) expect_identical(names(dat_xls), expected_names) expect_identical(names(dat_csv), expected_names) + expect_identical(names(dat_html), c("mtcars", "")) unlink(c("mtcars.xlsx", "mtcars1.csv","mtcars2.csv","mtcars3.csv")) }) From 0d7c57952fe9cd785f22e1545f6f3b36a2c524c7 Mon Sep 17 00:00:00 2001 From: Ruaridh Williamson Date: Tue, 10 Oct 2017 10:16:23 +0100 Subject: [PATCH 6/8] Cleanup and update description --- DESCRIPTION | 5 +++-- R/import_list.R | 10 +++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c6ae777..9aa3003 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: rio Type: Package Title: A Swiss-Army Knife for Data I/O Version: 0.5.7 -Date: 2017-07-26 +Date: 2017-10-10 Authors@R: c(person("Jason", "Becker", role = "ctb", email = "jason@jbecker.co"), person("Chung-hong", "Chan", role = "aut", email = "chainsawtiney@gmail.com"), person("Geoffrey CH", "Chan", role = "ctb", email = "gefchchan@gmail.com"), @@ -10,7 +10,8 @@ Authors@R: c(person("Jason", "Becker", role = "ctb", email = "jason@jbecker.co") person("Christopher", "Gandrud", role = "ctb"), person("Andrew", "MacDonald", role = "ctb"), person("Ista", "Zahn", role = "ctb"), - person("Stanislaus", "Stadlmann", role = "ctb")) + person("Stanislaus", "Stadlmann", role = "ctb"), + person("Ruaridh", "Williamson", role = "ctb", email = "ruaridh.williamson@gmail.com")) Description: Streamlined data import and export by making assumptions that the user is probably willing to make: 'import()' and 'export()' determine the data structure from the file extension, reasonable defaults are used for diff --git a/R/import_list.R b/R/import_list.R index f92ce73..b7359e4 100644 --- a/R/import_list.R +++ b/R/import_list.R @@ -70,13 +70,9 @@ function(file, requireNamespace("xml2", quietly = TRUE) tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table") which <- seq_along(tables) - names(which) <- sapply(xml2::xml_attrs(tables), function(x) { - if ("class" %in% names(x)) { - x["class"] - } else { - "" - } - }) + names(which) <- sapply(xml2::xml_attrs(tables), + function(x) if ("class" %in% names(x)) x["class"] else "" + ) } else if (get_ext(file) %in% c("xls","xlsx")) { requireNamespace("readxl", quietly = TRUE) whichnames <- readxl::excel_sheets(path = file) From 258a50c817ba41a6639a3b89eea95221feb23ee6 Mon Sep 17 00:00:00 2001 From: Ruaridh Williamson Date: Wed, 11 Oct 2017 20:35:25 +0100 Subject: [PATCH 7/8] Improve removal of file extensions and update tests --- R/import_list.R | 8 ++++++-- tests/testthat/test_import_list.R | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R/import_list.R b/R/import_list.R index b7359e4..8c37ccf 100644 --- a/R/import_list.R +++ b/R/import_list.R @@ -47,8 +47,12 @@ function(file, if (missing(setclass)) { setclass <- NULL } + strip_exts <- function(file) { + exts <- paste0("\\.", lapply(file, tools::file_ext), "$") + sapply(seq_along(file), function(x) gsub(exts[x], "", file[x])) + } if (length(file) > 1) { - names(file) <- gsub(paste0("\\.", tools::file_ext(file[1]), "$"), "", file, ignore.case = TRUE) + names(file) <- strip_exts(file) x <- lapply(file, function(thisfile) { out <- try(import(thisfile, setclass = setclass, ...), silent = TRUE) if (inherits(out, "try-error")) { @@ -81,7 +85,7 @@ function(file, } else if (get_ext(file) %in% c("zip")) { whichnames <- utils::unzip(file, list = TRUE)[, "Name"] which <- seq_along(whichnames) - names(which) <- whichnames + names(which) <- strip_exts(whichnames) } else { which <- 1 } diff --git a/tests/testthat/test_import_list.R b/tests/testthat/test_import_list.R index f57772f..e1f44c8 100644 --- a/tests/testthat/test_import_list.R +++ b/tests/testthat/test_import_list.R @@ -46,18 +46,18 @@ test_that("Object names are preserved by import_list()", { mtcars2 = mtcars[11:20,], mtcars3 = mtcars[21:32,]), "mtcars.xlsx") export(mtcars[1:10,], "mtcars1.csv") - export(mtcars[11:20,], "mtcars2.csv") + export(mtcars[11:20,], "mtcars2.tsv") export(mtcars[21:32,], "mtcars3.csv") expected_names <- c("mtcars1", "mtcars2", "mtcars3") dat_xls <- import_list("mtcars.xlsx") - dat_csv <- import_list(c("mtcars1.csv","mtcars2.csv","mtcars3.csv")) + dat_csv <- import_list(c("mtcars1.csv","mtcars2.tsv","mtcars3.csv")) dat_html <- import_list(system.file("examples", "twotables.html", package = "rio")) expect_identical(names(dat_xls), expected_names) expect_identical(names(dat_csv), expected_names) expect_identical(names(dat_html), c("mtcars", "")) - unlink(c("mtcars.xlsx", "mtcars1.csv","mtcars2.csv","mtcars3.csv")) + unlink(c("mtcars.xlsx", "mtcars1.csv","mtcars2.tsv","mtcars3.csv")) }) unlink("data.rdata") From 1781e2e990ee87af572e5b3431636c0c5ded0d5d Mon Sep 17 00:00:00 2001 From: Ruaridh Williamson Date: Sun, 22 Oct 2017 16:23:00 +0100 Subject: [PATCH 8/8] Simplify strip_exts --- R/import_list.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/import_list.R b/R/import_list.R index 8c37ccf..d6c5b80 100644 --- a/R/import_list.R +++ b/R/import_list.R @@ -48,8 +48,7 @@ function(file, setclass <- NULL } strip_exts <- function(file) { - exts <- paste0("\\.", lapply(file, tools::file_ext), "$") - sapply(seq_along(file), function(x) gsub(exts[x], "", file[x])) + sapply(file, function(x) tools::file_path_sans_ext(basename(x))) } if (length(file) > 1) { names(file) <- strip_exts(file)