diff --git a/NEWS.md b/NEWS.md index c62f996..11acedd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,7 @@ - write all documentation blocks in markdown #311 - remove all @importFrom #325 h/t David Schoch - rearrange "Package Philosophy" as a Vignette #320 + - Create a single source of truth about all import and export functions #313 * New authors - David Schoch @schochastics diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 0000000..d7d3714 Binary files /dev/null and b/R/sysdata.rda differ diff --git a/README.Rmd b/README.Rmd index 37dfb54..5abfaf0 100644 --- a/README.Rmd +++ b/README.Rmd @@ -91,46 +91,32 @@ install_formats() The full list of supported formats is below: -| Format | Typical Extension | Import Package | Export Package | Installed by Default | -| ------ | --------- | -------------- | -------------- | -------------------- | -| Comma-separated data | .csv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| Pipe-separated data | .psv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| Tab-separated data | .tsv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| CSVY (CSV + YAML metadata header) | .csvy | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| SAS | .sas7bdat | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) (but [deprecated](https://github.com/tidyverse/haven/issues/224)) | Yes | -| SPSS | .sav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SPSS (compressed) | .zsav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| Stata | .dta | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SAS XPORT | .xpt | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SPSS Portable | .por | [**haven**](https://cran.r-project.org/package=haven) | | Yes | -| Excel | .xls | [**readxl**](https://cran.r-project.org/package=readxl) | | Yes | -| Excel | .xlsx | [**readxl**](https://cran.r-project.org/package=readxl) | [**openxlsx**](https://cran.r-project.org/package=openxlsx) | Yes | -| R syntax | .R | **base** | **base** | Yes | -| Saved R objects | .RData, .rda | **base** | **base** | Yes | -| Serialized R objects | .rds | **base** | **base** | Yes | -| Epiinfo | .rec | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| Minitab | .mtp | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| Systat | .syd | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| "XBASE" database files | .dbf | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | -| Weka Attribute-Relation File Format | .arff | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | -| Data Interchange Format | .dif | **utils** | | Yes | -| Fortran data | no recognized extension | **utils** | | Yes | -| Fixed-width format data | .fwf | **utils** | **utils** | Yes | -| gzip comma-separated data | .csv.gz | **utils** | **utils** | Yes | -| Apache Arrow (Parquet) | .parquet | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No | -| EViews | .wf1 | [**hexView**](https://cran.r-project.org/package=hexView) | | No | -| Feather R/Python interchange format | .feather | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No | -| Fast Storage | .fst | [**fst**](https://cran.r-project.org/package=fst) | [**fst**](https://cran.r-project.org/package=fst) | No | -| JSON | .json | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | No | -| Matlab | .mat | [**rmatio**](https://cran.r-project.org/package=rmatio) | [**rmatio**](https://cran.r-project.org/package=rmatio) | No | -| OpenDocument Spreadsheet | .ods | [**readODS**](https://cran.r-project.org/package=readODS) | [**readODS**](https://cran.r-project.org/package=readODS) | No | -| HTML Tables | .html | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | -| Shallow XML documents | .xml | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | -| YAML | .yml | [**yaml**](https://cran.r-project.org/package=yaml) | [**yaml**](https://cran.r-project.org/package=yaml) | No | -| Clipboard | default is tsv | [**clipr**](https://cran.r-project.org/package=clipr) | [**clipr**](https://cran.r-project.org/package=clipr) | No | -| [Google Sheets](https://www.google.com/sheets/about/) | as Comma-separated data | | | | -| Graphpad Prism | .pzfx | [**pzfx**](https://cran.r-project.org/package=pzfx) | [**pzfx**](https://cran.r-project.org/package=pzfx) | No | -| Serialized R objects | .qs | [**qs**](https://cran.r-project.org/package=qs) | [**qs**](https://cran.r-project.org/package=qs) | No | +```{r, include = FALSE} +suppressPackageStartupMessages(library(data.table)) +``` + +```{r featuretable, echo = FALSE} +rf <- data.table(rio:::rio_formats)[!input %in% c(",", ";", "|", "\\t") & type %in% c("import", "suggest", "archive"), !"ext"] +short_rf <- rf[, paste(input, collapse = " / "), by = format_name] +type_rf <- unique(rf[,c("format_name", "type", "import_function", "export_function", "note")]) + +feature_table <- short_rf[type_rf, on = .(format_name)] + +colnames(feature_table)[2] <- "signature" + +setorder(feature_table, "type", "format_name") +feature_table$import_function <- stringi::stri_extract_first(feature_table$import_function, regex = "[a-zA-Z0-9\\.]+") +feature_table$import_function[is.na(feature_table$import_function)] <- "" +feature_table$export_function <- stringi::stri_extract_first(feature_table$export_function, regex = "[a-zA-Z0-9\\.]+") +feature_table$export_function[is.na(feature_table$export_function)] <- "" + +feature_table$type <- ifelse(feature_table$type %in% c("suggest"), "Suggest", "Default") +feature_table <- feature_table[,c("format_name", "signature", "import_function", "export_function", "type", "note")] + +colnames(feature_table) <- c("Format", "Extensions / \"fmt\"", "Import Package", "Export Package", "Type", "Note") + +knitr::kable(feature_table) +``` Additionally, any format that is not supported by **rio** but that has a known R implementation will produce an informative error message pointing to a package and import or export function. Unrecognized formats will yield a simple "Unrecognized file format" error. diff --git a/README.md b/README.md index 4142f3f..a196472 100644 --- a/README.md +++ b/README.md @@ -130,46 +130,49 @@ install_formats() The full list of supported formats is below: -| Format | Typical Extension | Import Package | Export Package | Installed by Default | -| ----------------------------------------------------- | ----------------------- | --------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | -------------------- | -| Comma-separated data | .csv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| Pipe-separated data | .psv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| Tab-separated data | .tsv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| CSVY (CSV + YAML metadata header) | .csvy | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| SAS | .sas7bdat | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) (but [deprecated](https://github.com/tidyverse/haven/issues/224)) | Yes | -| SPSS | .sav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SPSS (compressed) | .zsav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| Stata | .dta | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SAS XPORT | .xpt | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SPSS Portable | .por | [**haven**](https://cran.r-project.org/package=haven) | | Yes | -| Excel | .xls | [**readxl**](https://cran.r-project.org/package=readxl) | | Yes | -| Excel | .xlsx | [**readxl**](https://cran.r-project.org/package=readxl) | [**openxlsx**](https://cran.r-project.org/package=openxlsx) | Yes | -| R syntax | .R | **base** | **base** | Yes | -| Saved R objects | .RData, .rda | **base** | **base** | Yes | -| Serialized R objects | .rds | **base** | **base** | Yes | -| Epiinfo | .rec | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| Minitab | .mtp | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| Systat | .syd | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| “XBASE” database files | .dbf | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | -| Weka Attribute-Relation File Format | .arff | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | -| Data Interchange Format | .dif | **utils** | | Yes | -| Fortran data | no recognized extension | **utils** | | Yes | -| Fixed-width format data | .fwf | **utils** | **utils** | Yes | -| gzip comma-separated data | .csv.gz | **utils** | **utils** | Yes | -| Apache Arrow (Parquet) | .parquet | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No | -| EViews | .wf1 | [**hexView**](https://cran.r-project.org/package=hexView) | | No | -| Feather R/Python interchange format | .feather | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No | -| Fast Storage | .fst | [**fst**](https://cran.r-project.org/package=fst) | [**fst**](https://cran.r-project.org/package=fst) | No | -| JSON | .json | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | No | -| Matlab | .mat | [**rmatio**](https://cran.r-project.org/package=rmatio) | [**rmatio**](https://cran.r-project.org/package=rmatio) | No | -| OpenDocument Spreadsheet | .ods | [**readODS**](https://cran.r-project.org/package=readODS) | [**readODS**](https://cran.r-project.org/package=readODS) | No | -| HTML Tables | .html | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | -| Shallow XML documents | .xml | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | -| YAML | .yml | [**yaml**](https://cran.r-project.org/package=yaml) | [**yaml**](https://cran.r-project.org/package=yaml) | No | -| Clipboard | default is tsv | [**clipr**](https://cran.r-project.org/package=clipr) | [**clipr**](https://cran.r-project.org/package=clipr) | No | -| [Google Sheets](https://www.google.com/sheets/about/) | as Comma-separated data | | | | -| Graphpad Prism | .pzfx | [**pzfx**](https://cran.r-project.org/package=pzfx) | [**pzfx**](https://cran.r-project.org/package=pzfx) | No | -| Serialized R objects | .qs | [**qs**](https://cran.r-project.org/package=qs) | [**qs**](https://cran.r-project.org/package=qs) | No | +| Format | Extensions / “fmt” | Import Package | Export Package | Type | Note | +| :---------------------------------- | :--------------------------- | :------------- | :------------- | :------ | :---------------------- | +| Archive files (handled by tar) | bzip2 / xz / gz / gzip / tar | utils | utils | Default | | +| Zip files | zip | utils | utils | Default | | +| CSVY (CSV + YAML metadata header) | csvy | data.table | data.table | Default | | +| Comma-separated data | csv | data.table | data.table | Default | | +| Comma-separated data (European) | csv2 | data.table | data.table | Default | | +| Data Interchange Format | dif | utils | | Default | | +| Epiinfo | epiinfo / rec | foreign | | Default | | +| Excel | excel / xlsx | readxl | openxlsx | Default | | +| Excel (Legacy) | xls | readxl | | Default | | +| Fixed-width format data | fwf | utils | utils | Default | | +| Fortran data | fortran | utils | | Default | No recognized extension | +| Google Sheets | csv | data.table | | Default | As comma-separated data | +| Minitab | minitab / mtp | foreign | | Default | | +| Pipe-separated data | psv | data.table | data.table | Default | | +| R syntax | r | base | base | Default | | +| SAS | sas / sas7bdat | haven | haven | Default | Export is deprecated | +| SAS XPORT | xport / xpt | haven | haven | Default | | +| SPSS | sav / spss | haven | haven | Default | | +| SPSS (compressed) | zsav | haven | haven | Default | | +| SPSS Portable | por | haven | | Default | | +| Saved R objects | rda / rdata | base | base | Default | | +| Serialized R objects | rds | base | base | Default | | +| Stata | dta / stata | haven | haven | Default | | +| Systat | syd / systat | foreign | | Default | | +| Tab-separated data | tsv / txt | data.table | data.table | Default | | +| Text Representations of R Objects | dump | base | base | Default | | +| Weka Attribute-Relation File Format | arff / weka | foreign | foreign | Default | | +| XBASE database files | dbf | foreign | foreign | Default | | +| Apache Arrow (Parquet) | parquet | arrow | arrow | Suggest | | +| Clipboard | clipboard | clipr | clipr | Suggest | default is tsv | +| EViews | eviews / wf1 | hexView | | Suggest | | +| Fast Storage | fst | fst | fst | Suggest | | +| Feather R/Python interchange format | feather | arrow | arrow | Suggest | | +| Graphpad Prism | pzfx | pzfx | pzfx | Suggest | | +| HTML Tables | htm / html | xml2 | xml2 | Suggest | | +| JSON | json | jsonlite | jsonlite | Suggest | | +| Matlab | mat / matlab | rmatio | rmatio | Suggest | | +| OpenDocument Spreadsheet | ods | readODS | readODS | Suggest | | +| Serialized R objects (Quick) | qs | qs | qs | Suggest | | +| Shallow XML documents | xml | xml2 | xml2 | Suggest | | +| YAML | yaml / yml | yaml | yaml | Suggest | | Additionally, any format that is not supported by **rio** but that has a known R implementation will produce an informative error message diff --git a/data-raw/convert.R b/data-raw/convert.R new file mode 100644 index 0000000..d2d8610 --- /dev/null +++ b/data-raw/convert.R @@ -0,0 +1,6 @@ +writeLines( + jsonlite::prettify(jsonlite::toJSON(jsonlite::read_json(here::here("data-raw/single.json"), TRUE))), + here::here("data-raw/single.json") +) +rio_formats <- rio::import(here::here("data-raw", "single.json")) +usethis::use_data(rio_formats, overwrite = TRUE, internal = TRUE) diff --git a/data-raw/single.json b/data-raw/single.json new file mode 100644 index 0000000..d341409 --- /dev/null +++ b/data-raw/single.json @@ -0,0 +1,823 @@ +[ + { + "input": "parquet", + "fmt": "parquet", + "ext": "parquet", + "type": "suggest", + "format_name": "Apache Arrow (Parquet)", + "import_function": "arrow::read_parquet", + "export_function": "arrow::write_parquet", + "note": "" + }, + { + "input": "bzip2", + "fmt": "tar", + "ext": "bzip2", + "type": "archive", + "format_name": "Archive files (handled by tar)", + "import_function": "utils::untar", + "export_function": "utils::tar", + "note": "" + }, + { + "input": "xz", + "fmt": "tar", + "ext": "xz", + "type": "archive", + "format_name": "Archive files (handled by tar)", + "import_function": "utils::untar", + "export_function": "utils::tar", + "note": "" + }, + { + "input": "gz", + "fmt": "tar", + "ext": "gz", + "type": "archive", + "format_name": "Archive files (handled by tar)", + "import_function": "utils::untar", + "export_function": "utils::tar", + "note": "" + }, + { + "input": "gzip", + "fmt": "tar", + "ext": "gzip", + "type": "archive", + "format_name": "Archive files (handled by tar)", + "import_function": "utils::untar", + "export_function": "utils::tar", + "note": "" + }, + { + "input": "tar", + "fmt": "tar", + "ext": "tar", + "type": "archive", + "format_name": "Archive files (handled by tar)", + "import_function": "utils::untar", + "export_function": "utils::tar", + "note": "" + }, + { + "input": "bmp", + "fmt": "bmp", + "ext": "bmp", + "type": "known", + "format_name": "BMP images", + "import_function": "bmp::read.bmp", + "export_function": "", + "note": "" + }, + { + "input": "bib", + "fmt": "bib", + "ext": "bib", + "type": "known", + "format_name": "BibTeX files", + "import_function": "bib2df::bib2df", + "export_function": "", + "note": "" + }, + { + "input": "bibtex", + "fmt": "bib", + "ext": "bib", + "type": "known", + "format_name": "BibTeX files", + "import_function": "bib2df::bib2df", + "export_function": "", + "note": "" + }, + { + "input": "csvy", + "fmt": "csvy", + "ext": "csvy", + "type": "import", + "format_name": "CSVY (CSV + YAML metadata header)", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": "clipboard", + "fmt": "clipboard", + "ext": "", + "type": "suggest", + "format_name": "Clipboard", + "import_function": "clipr::read_clip_tbl", + "export_function": "clipr::write_clip", + "note": "default is tsv" + }, + { + "input": "csv", + "fmt": "csv", + "ext": "csv", + "type": "import", + "format_name": "Comma-separated data", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": ";", + "fmt": "csv2", + "ext": "csv2", + "type": "import", + "format_name": "Comma-separated data (European)", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": "csv2", + "fmt": "csv2", + "ext": "csv2", + "type": "import", + "format_name": "Comma-separated data (European)", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": "dif", + "fmt": "dif", + "ext": "dif", + "type": "import", + "format_name": "Data Interchange Format", + "import_function": "utils::read.DIF", + "export_function": "", + "note": "" + }, + { + "input": "eviews", + "fmt": "eviews", + "ext": "eviews", + "type": "suggest", + "format_name": "EViews", + "import_function": "hexView::readEViews", + "export_function": "", + "note": "" + }, + { + "input": "wf1", + "fmt": "eviews", + "ext": "eviews", + "type": "suggest", + "format_name": "EViews", + "import_function": "hexView::readEViews", + "export_function": "", + "note": "" + }, + { + "input": "epiinfo", + "fmt": "rec", + "ext": "rec", + "type": "import", + "format_name": "Epiinfo", + "import_function": "foreign::read.epiinfo", + "export_function": "", + "note": "" + }, + { + "input": "rec", + "fmt": "rec", + "ext": "rec", + "type": "import", + "format_name": "Epiinfo", + "import_function": "foreign::read.epiinfo", + "export_function": "", + "note": "" + }, + { + "input": "excel", + "fmt": "xlsx", + "ext": "xlsx", + "type": "import", + "format_name": "Excel", + "import_function": "readxl::read_xlsx", + "export_function": "openxlsx::write.xlsx", + "note": "" + }, + { + "input": "xls", + "fmt": "xls", + "ext": "xls", + "type": "import", + "format_name": "Excel (Legacy)", + "import_function": "readxl::read_xls", + "export_function": "", + "note": "" + }, + { + "input": "xlsx", + "fmt": "xlsx", + "ext": "xlsx", + "type": "import", + "format_name": "Excel", + "import_function": "readxl::read_xlsx", + "export_function": "openxlsx::write.xlsx", + "note": "" + }, + { + "input": "fst", + "fmt": "fst", + "ext": "fst", + "type": "suggest", + "format_name": "Fast Storage", + "import_function": "fst::read.fst", + "export_function": "fst::write.fst", + "note": "" + }, + { + "input": "feather", + "fmt": "feather", + "ext": "feather", + "type": "suggest", + "format_name": "Feather R/Python interchange format", + "import_function": "arrow::read_feather", + "export_function": "arrow::write_feather", + "note": "" + }, + { + "input": "fwf", + "fmt": "fwf", + "ext": "fwf", + "type": "import", + "format_name": "Fixed-width format data", + "import_function": "utils::read.table", + "export_function": "utils::write.table", + "note": "" + }, + { + "input": "fortran", + "fmt": "fortran", + "ext": "", + "type": "import", + "format_name": "Fortran data", + "import_function": "utils::read.fortran", + "export_function": "", + "note": "No recognized extension" + }, + { + "input": "gnumeric", + "fmt": "gnumeric", + "ext": "gnumeric", + "type": "known", + "format_name": "Gnumeric", + "import_function": "gnumeric::read.gnumeric.sheet", + "export_function": "", + "note": "" + }, + { + "input": "csv", + "fmt": "csv", + "ext": "", + "type": "import", + "format_name": "Google Sheets", + "import_function": "data.table::fread", + "export_function": "", + "note": "As comma-separated data" + }, + { + "input": "gexf", + "fmt": "gexf", + "ext": "gexf", + "type": "known", + "format_name": "Graph Exchange XML Format", + "import_function": "rgexf::read.gexf", + "export_function": "", + "note": "" + }, + { + "input": "pzfx", + "fmt": "pzfx", + "ext": "pzfx", + "type": "suggest", + "format_name": "Graphpad Prism", + "import_function": "pzfx::read_pzfx", + "export_function": "pzfx::write_pzfx", + "note": "" + }, + { + "input": "htm", + "fmt": "html", + "ext": "html", + "type": "suggest", + "format_name": "HTML Tables", + "import_function": "xml2::read_html", + "export_function": "xml2::write_xml", + "note": "" + }, + { + "input": "html", + "fmt": "html", + "ext": "html", + "type": "suggest", + "format_name": "HTML Tables", + "import_function": "xml2::read_html", + "export_function": "xml2::write_xml", + "note": "" + }, + { + "input": "jpeg", + "fmt": "jpg", + "ext": "jpg", + "type": "known", + "format_name": "JPEG images", + "import_function": "jpeg::readJPEG", + "export_function": "", + "note": "" + }, + { + "input": "jpg", + "fmt": "jpg", + "ext": "jpg", + "type": "known", + "format_name": "JPEG images", + "import_function": "jpeg::readJPEG", + "export_function": "", + "note": "" + }, + { + "input": "json", + "fmt": "json", + "ext": "json", + "type": "suggest", + "format_name": "JSON", + "import_function": "jsonlite::fromJSON", + "export_function": "jsonlite::toJSON", + "note": "" + }, + { + "input": "mat", + "fmt": "matlab", + "ext": "matlab", + "type": "suggest", + "format_name": "Matlab", + "import_function": "rmatio::read.mat", + "export_function": "rmatio::write.mat", + "note": "" + }, + { + "input": "matlab", + "fmt": "matlab", + "ext": "matlab", + "type": "suggest", + "format_name": "Matlab", + "import_function": "rmatio::read.mat", + "export_function": "rmatio::write.mat", + "note": "" + }, + { + "input": "minitab", + "fmt": "mtp", + "ext": "mtp", + "type": "import", + "format_name": "Minitab", + "import_function": "foreign::read.mtp", + "export_function": "", + "note": "" + }, + { + "input": "mtp", + "fmt": "mtp", + "ext": "mtp", + "type": "import", + "format_name": "Minitab", + "import_function": "foreign::read.mtp", + "export_function": "", + "note": "" + }, + { + "input": "ods", + "fmt": "ods", + "ext": "ods", + "type": "suggest", + "format_name": "OpenDocument Spreadsheet", + "import_function": "readODS::read_ods", + "export_function": "readODS::write_ods", + "note": "" + }, + { + "input": "pdf", + "fmt": "pdf", + "ext": "pdf", + "type": "known", + "format_name": "PDF files", + "import_function": "tabulizer::extract_tables", + "export_function": "", + "note": "" + }, + { + "input": "png", + "fmt": "png", + "ext": "png", + "type": "known", + "format_name": "PNG images", + "import_function": "png::readPNG", + "export_function": "", + "note": "" + }, + { + "input": "npy", + "fmt": "npy", + "ext": "npy", + "type": "known", + "format_name": "Pickled Numpy arrays", + "import_function": "RcppCNPy::npyLoad", + "export_function": "", + "note": "" + }, + { + "input": "|", + "fmt": "psv", + "ext": "psv", + "type": "import", + "format_name": "Pipe-separated data", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": "psv", + "fmt": "psv", + "ext": "psv", + "type": "import", + "format_name": "Pipe-separated data", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": "beancount", + "fmt": "ledger", + "ext": "ledger", + "type": "enhance", + "format_name": "Plain text accounting format", + "import_function": "ledger::register", + "export_function": "", + "note": "" + }, + { + "input": "hledger", + "fmt": "ledger", + "ext": "ledger", + "type": "enhance", + "format_name": "Plain text accounting format", + "import_function": "ledger::register", + "export_function": "", + "note": "" + }, + { + "input": "ledger", + "fmt": "ledger", + "ext": "ledger", + "type": "enhance", + "format_name": "Plain text accounting format", + "import_function": "ledger::register", + "export_function": "", + "note": "" + }, + { + "input": "bean", + "fmt": "ledger", + "ext": "ledger", + "type": "enhance", + "format_name": "Plain text accounting format", + "import_function": "ledger::register", + "export_function": "", + "note": "" + }, + { + "input": "r", + "fmt": "r", + "ext": "r", + "type": "import", + "format_name": "R syntax", + "import_function": "base::dget", + "export_function": "base::dput", + "note": "" + }, + { + "input": "sas", + "fmt": "sas7bdat", + "ext": "sas7bdat", + "type": "import", + "format_name": "SAS", + "import_function": "haven::read_sas", + "export_function": "haven::write_sas", + "note": "Export is deprecated" + }, + { + "input": "sas7bdat", + "fmt": "sas7bdat", + "ext": "sas7bdat", + "type": "import", + "format_name": "SAS", + "import_function": "haven::read_sas", + "export_function": "haven::write_sas", + "note": "Export is deprecated" + }, + { + "input": "xport", + "fmt": "xpt", + "ext": "xpt", + "type": "import", + "format_name": "SAS XPORT", + "import_function": "haven::read_xpt", + "export_function": "haven::write_xpt", + "note": "" + }, + { + "input": "xpt", + "fmt": "xpt", + "ext": "xpt", + "type": "import", + "format_name": "SAS XPORT", + "import_function": "haven::read_xpt", + "export_function": "haven::write_xpt", + "note": "" + }, + { + "input": "sav", + "fmt": "sav", + "ext": "sav", + "type": "import", + "format_name": "SPSS", + "import_function": "haven::read_sav", + "export_function": "haven::write_sav", + "note": "" + }, + { + "input": "spss", + "fmt": "sav", + "ext": "sav", + "type": "import", + "format_name": "SPSS", + "import_function": "haven::read_sav", + "export_function": "haven::write_sav", + "note": "" + }, + { + "input": "zsav", + "fmt": "zsav", + "ext": "zsav", + "type": "import", + "format_name": "SPSS (compressed)", + "import_function": "haven::read_sav", + "export_function": "haven::write_sav", + "note": "" + }, + { + "input": "por", + "fmt": "spss", + "ext": "spss", + "type": "import", + "format_name": "SPSS Portable", + "import_function": "haven::read_por", + "export_function": "", + "note": "" + }, + { + "input": "rda", + "fmt": "rdata", + "ext": "rdata", + "type": "import", + "format_name": "Saved R objects", + "import_function": "base::load", + "export_function": "base::save", + "note": "" + }, + { + "input": "rdata", + "fmt": "rdata", + "ext": "rdata", + "type": "import", + "format_name": "Saved R objects", + "import_function": "base::load", + "export_function": "base::save", + "note": "" + }, + { + "input": "rds", + "fmt": "rds", + "ext": "rds", + "type": "import", + "format_name": "Serialized R objects", + "import_function": "base::readRDS", + "export_function": "base::saveRDS", + "note": "" + }, + { + "input": "qs", + "fmt": "qs", + "ext": "qs", + "type": "suggest", + "format_name": "Serialized R objects (Quick)", + "import_function": "qs::qread", + "export_function": "qs::qsave", + "note": "" + }, + { + "input": "xml", + "fmt": "xml", + "ext": "xml", + "type": "suggest", + "format_name": "Shallow XML documents", + "import_function": "xml2::read_xml", + "export_function": "xml2::write_xml", + "note": "" + }, + { + "input": "sss", + "fmt": "sss", + "ext": "sss", + "type": "known", + "format_name": "Standard Survey Structure (Triple-S)", + "import_function": "sss::read.sss", + "export_function": "", + "note": "" + }, + { + "input": "dta", + "fmt": "dta", + "ext": "dta", + "type": "import", + "format_name": "Stata", + "import_function": "haven::read_dta", + "export_function": "haven::write_dta", + "note": "" + }, + { + "input": "stata", + "fmt": "dta", + "ext": "dta", + "type": "import", + "format_name": "Stata", + "import_function": "haven::read_dta", + "export_function": "haven::write_dta", + "note": "" + }, + { + "input": "sdmx", + "fmt": "sdmx", + "ext": "sdmx", + "type": "known", + "format_name": "Statistical Data and Metadata eXchange", + "import_function": "sdmx::readSDMX", + "export_function": "", + "note": "" + }, + { + "input": "syd", + "fmt": "syd", + "ext": "syd", + "type": "import", + "format_name": "Systat", + "import_function": "foreign::read.systat", + "export_function": "", + "note": "" + }, + { + "input": "systat", + "fmt": "syd", + "ext": "syd", + "type": "import", + "format_name": "Systat", + "import_function": "foreign::read.systat", + "export_function": "", + "note": "" + }, + { + "input": "tif", + "fmt": "tiff", + "ext": "tiff", + "type": "known", + "format_name": "TIFF images", + "import_function": "tiff::readTIFF", + "export_function": "", + "note": "" + }, + { + "input": "tiff", + "fmt": "tiff", + "ext": "tiff", + "type": "known", + "format_name": "TIFF images", + "import_function": "tiff::readTIFF", + "export_function": "", + "note": "" + }, + { + "input": "\\t", + "fmt": "tsv", + "ext": "tsv", + "type": "import", + "format_name": "Tab-separated data", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": "tsv", + "fmt": "tsv", + "ext": "tsv", + "type": "import", + "format_name": "Tab-separated data", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": "txt", + "fmt": "tsv", + "ext": "tsv", + "type": "import", + "format_name": "Tab-separated data", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + }, + { + "input": "dump", + "fmt": "dump", + "ext": "dump", + "type": "import", + "format_name": "Text Representations of R Objects", + "import_function": "base::dump", + "export_function": "base::source", + "note": "" + }, + { + "input": "arff", + "fmt": "arff", + "ext": "arff", + "type": "import", + "format_name": "Weka Attribute-Relation File Format", + "import_function": "foreign::read.arff", + "export_function": "foreign::write.arff", + "note": "" + }, + { + "input": "weka", + "fmt": "arff", + "ext": "arff", + "type": "import", + "format_name": "Weka Attribute-Relation File Format", + "import_function": "foreign::read.arff", + "export_function": "foreign::write.arff", + "note": "" + }, + { + "input": "dbf", + "fmt": "dbf", + "ext": "dbf", + "type": "import", + "format_name": "XBASE database files", + "import_function": "foreign::read.dbf", + "export_function": "foreign::write.dbf", + "note": "" + }, + { + "input": "yaml", + "fmt": "yml", + "ext": "yml", + "type": "suggest", + "format_name": "YAML", + "import_function": "yaml::read_yaml", + "export_function": "yaml::write_yaml", + "note": "" + }, + { + "input": "yml", + "fmt": "yml", + "ext": "yml", + "type": "suggest", + "format_name": "YAML", + "import_function": "yaml::read_yaml", + "export_function": "yaml::write_yaml", + "note": "" + }, + { + "input": "zip", + "fmt": "zip", + "ext": "zip", + "type": "archive", + "format_name": "Zip files", + "import_function": "utils::unzip", + "export_function": "utils::zip", + "note": "" + }, + { + "input": ",", + "fmt": "csv", + "ext": "csv", + "type": "import", + "format_name": "Comma-separated data", + "import_function": "data.table::fread", + "export_function": "data.table::fwrite", + "note": "" + } +] + diff --git a/vignettes/rio.Rmd b/vignettes/rio.Rmd index c3c7732..3529e56 100644 --- a/vignettes/rio.Rmd +++ b/vignettes/rio.Rmd @@ -33,45 +33,32 @@ install_formats() The full list of supported formats is below: -| Format | Typical Extension | Import Package | Export Package | Installed by Default | -| ------ | --------- | -------------- | -------------- | -------------------- | -| Comma-separated data | .csv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| Pipe-separated data | .psv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| Tab-separated data | .tsv | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| CSVY (CSV + YAML metadata header) | .csvy | [**data.table**](https://cran.r-project.org/package=data.table) | [**data.table**](https://cran.r-project.org/package=data.table) | Yes | -| SAS | .sas7bdat | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SPSS | .sav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SPSS (compressed) | .zsav | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| Stata | .dta | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SAS XPORT | .xpt | [**haven**](https://cran.r-project.org/package=haven) | [**haven**](https://cran.r-project.org/package=haven) | Yes | -| SPSS Portable | .por | [**haven**](https://cran.r-project.org/package=haven) | | Yes | -| Excel | .xls | [**readxl**](https://cran.r-project.org/package=readxl) | | Yes | -| Excel | .xlsx | [**readxl**](https://cran.r-project.org/package=readxl) | [**openxlsx**](https://cran.r-project.org/package=openxlsx) | Yes | -| R syntax | .R | **base** | **base** | Yes | -| Saved R objects | .RData, .rda | **base** | **base** | Yes | -| Serialized R objects | .rds | **base** | **base** | Yes | -| Epiinfo | .rec | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| Minitab | .mtp | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| Systat | .syd | [**foreign**](https://cran.r-project.org/package=foreign) | | Yes | -| "XBASE" database files | .dbf | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | -| Weka Attribute-Relation File Format | .arff | [**foreign**](https://cran.r-project.org/package=foreign) | [**foreign**](https://cran.r-project.org/package=foreign) | Yes | -| Data Interchange Format | .dif | **utils** | | Yes | -| Fortran data | no recognized extension | **utils** | | Yes | -| Fixed-width format data | .fwf | **utils** | **utils** | Yes | -| gzip comma-separated data | .csv.gz | **utils** | **utils** | Yes | -| Apache Arrow (Parquet) | .parquet | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No | -| EViews | .wf1 | [**hexView**](https://cran.r-project.org/package=hexView) | | No | -| Feather R/Python interchange format | .feather | [**arrow**](https://cran.r-project.org/package=arrow) | [**arrow**](https://cran.r-project.org/package=arrow) | No | -| Fast Storage | .fst | [**fst**](https://cran.r-project.org/package=fst) | [**fst**](https://cran.r-project.org/package=fst) | No | -| JSON | .json | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | [**jsonlite**](https://cran.r-project.org/package=jsonlite) | No | -| Matlab | .mat | [**rmatio**](https://cran.r-project.org/package=rmatio) | [**rmatio**](https://cran.r-project.org/package=rmatio) | No | -| OpenDocument Spreadsheet | .ods | [**readODS**](https://cran.r-project.org/package=readODS) | [**readODS**](https://cran.r-project.org/package=readODS) | No | -| HTML Tables | .html | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | -| Shallow XML documents | .xml | [**xml2**](https://cran.r-project.org/package=xml2) | [**xml2**](https://cran.r-project.org/package=xml2) | No | -| YAML | .yml | [**yaml**](https://cran.r-project.org/package=yaml) | [**yaml**](https://cran.r-project.org/package=yaml) | No | -| Clipboard | default is tsv | [**clipr**](https://cran.r-project.org/package=clipr) | [**clipr**](https://cran.r-project.org/package=clipr) | No | -| [Google Sheets](https://www.google.com/sheets/about/) | as Comma-separated data | | | | -| Serialized R objects | .qs | [**qs**](https://cran.r-project.org/package=qs) | [**qs**](https://cran.r-project.org/package=qs) | No | +```{r, include = FALSE} +suppressPackageStartupMessages(library(data.table)) +``` + +```{r featuretable, echo = FALSE} +rf <- data.table(rio:::rio_formats)[!input %in% c(",", ";", "|", "\\t") & type %in% c("import", "suggest", "archive"), !"ext"] +short_rf <- rf[, paste(input, collapse = " / "), by = format_name] +type_rf <- unique(rf[,c("format_name", "type", "import_function", "export_function", "note")]) + +feature_table <- short_rf[type_rf, on = .(format_name)] + +colnames(feature_table)[2] <- "signature" + +setorder(feature_table, "type", "format_name") +feature_table$import_function <- stringi::stri_extract_first(feature_table$import_function, regex = "[a-zA-Z0-9\\.]+") +feature_table$import_function[is.na(feature_table$import_function)] <- "" +feature_table$export_function <- stringi::stri_extract_first(feature_table$export_function, regex = "[a-zA-Z0-9\\.]+") +feature_table$export_function[is.na(feature_table$export_function)] <- "" + +feature_table$type <- ifelse(feature_table$type %in% c("suggest"), "Suggest", "Default") +feature_table <- feature_table[,c("format_name", "signature", "import_function", "export_function", "type", "note")] + +colnames(feature_table) <- c("Format", "Extensions / \"fmt\"", "Import Package", "Export Package", "Type", "Note") + +knitr::kable(feature_table) +``` Additionally, any format that is not supported by **rio** but that has a known R implementation will produce an informative error message pointing to a package and import or export function. Unrecognized formats will yield a simple "Unrecognized file format" error.