diff --git a/.gitignore b/.gitignore index 42be198..2b67fb0 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ data/* .Renviron *.zip *.tsv.bz2 - +*.sqlite +*.sql diff --git a/data-raw/create_db.R b/data-raw/create_db.R index b2692cd..11cb4b4 100644 --- a/data-raw/create_db.R +++ b/data-raw/create_db.R @@ -1,17 +1,9 @@ -library(readr) -library(dplyr) -library(DBI) -library(R.utils) +# remotes::install_github("cboettig/arkdb") -itis <- read_tsv("data/itis.tsv.bz2") -ncbi <- read_tsv("data/ncbi.tsv.bz2") +library(arkdb) -taxa <- bind_rows(itis, ncbi) +files <- fs::dir_ls("data/", glob="*.tsv.bz2") +db <- unark(files, dbname = "data/taxa.sqlite", lines = 1e6) -db_path <- "data/taxa.sql" -con <- dbConnect(RSQLite::SQLite(), dbname=db_path) -dbListTables(con) -dbWriteTable(con, "taxa", taxa) -dbDisconnect(con) -R.utils::bzip2(db_path) +R.utils::bzip2("data/taxa.sqlite")