From 1220ea7e85229530341f66e572726c8cd34ced81 Mon Sep 17 00:00:00 2001 From: Carl Boettiger Date: Thu, 28 Jun 2018 20:48:09 +0000 Subject: [PATCH] WIP --- data-raw/fishbase.R | 43 ++++++++++++++++++++++++++++++++++--------- data-raw/gbif.R | 9 ++++----- data-raw/itis.R | 5 +++++ data-raw/tpl.R | 3 ++- data-raw/wd.R | 6 ++++++ 5 files changed, 51 insertions(+), 15 deletions(-) create mode 100644 data-raw/wd.R diff --git a/data-raw/fishbase.R b/data-raw/fishbase.R index 46e982c..64ee127 100644 --- a/data-raw/fishbase.R +++ b/data-raw/fishbase.R @@ -1,4 +1,4 @@ -library(rfishbase) +library(rfishbase) # 3.0 library(tidyverse) fb <- as_tibble(rfishbase::load_taxa()) @@ -6,21 +6,46 @@ fb_wide <- fb %>% select( id = SpecCode, genus = Genus, species = Species, - subfamily = SubFamily, + subfamily = Subfamily, family = Family, order = Order, class = Class, - common_name = FBname) %>% + superclass = SuperClass) %>% mutate(phylum = "Chorodata", kingdom = "Animalia", - id = paste0("FB:", id)) %>% - select(id, species, genus, subfamily, - family, order, class, phylum, - kingdom, common_name) + id = paste0("FB:", id)) + +write_tsv(fb_wide, "data/fb_hierarchy.tsv.bz2") -write_tsv(fb_wide, "data/fb_wide.tsv.bz2") +species <- rfishbase:::fb_species() +synonyms <- rfishbase::synonyms(NULL) %>% + left_join(species) %>% + rename(id = SpecCode) %>% + select(id, + species = Species, + synonym, + type = Status, + syn_id = SynCode, + rank = TaxonLevel, + tsn = TSN, + col = CoL_ID, + worms = WoRMS_ID, + zoobank = ZooBank_ID) +## Consider preserving stock code? +common <- rfishbase:::fb_tbl("comnames") %>% + left_join(species) %>% + rename(id = SpecCode) %>% + select(id, + species = Species, + synonym = ComName, + language = Language) %>% + mutate(type = "common") +fb_synonyms <- +common %>% + bind_rows(synonyms) + slb <- as_tibble(rfishbase::load_taxa(server = "https://fishbase.ropensci.org/sealifebase")) slb_wide <- slb %>% @@ -36,7 +61,7 @@ slb_wide <- slb %>% select(id, species, genus, subfamily, family, order, class, common_name) -write_tsv(slb_wide, "data/slb_wide.tsv.bz2") +write_tsv(slb_wide, "data/slb_hierarchy.tsv.bz2") diff --git a/data-raw/gbif.R b/data-raw/gbif.R index 9d99f12..2e8debe 100644 --- a/data-raw/gbif.R +++ b/data-raw/gbif.R @@ -25,16 +25,15 @@ gbif_wide <- gbif_taxa %>% kingdom, phylum, class, order, family, genus, species, specific_epithet = specificEpithet, infraspecific_epithet = infraspecificEpithet, genericName, taxonomicStatus) -write_tsv(gbif_wide, "data/gbif_wide.tsv.bz2") +write_tsv(gbif_wide, "data/gbif_hierarchy.tsv.bz2") rm(gbif_wide) gbif_long <- gbif_taxa %>% - select(taxon_id, name = scientificName, rank = rank, genericName, taxonomicStatus) + select(id = taxon_id, name = scientificName, rank = rank, genericName, taxonomicStatus) write_tsv(gbif_long, "data/gbif_long.tsv.bz2") - -write_tsv(gbif_taxa, "data/gbif.tsv.bz2") -rm(gbif_taxa) +gbif_long %>% select(id, name, rank) %>% distinct() %>% +write_tsv("data/gbif_taxonid.tsv.bz2") diff --git a/data-raw/itis.R b/data-raw/itis.R index 3aa1c58..f241a61 100644 --- a/data-raw/itis.R +++ b/data-raw/itis.R @@ -117,12 +117,15 @@ system.time({ write_tsv(itis_wide, bzfile("data/itis_wide.tsv.bz2", compression=9)) }) +######################################################### ## Database prep for ITIS library(tidyverse) itis_long <- read_tsv("data/itis_long.tsv.bz2") itis_wide <- read_tsv("data/itis_wide.tsv.bz2") +fs::file_move("data/itis_wide.tsv.bz2", "data/itis_hierarchy.tsv.bz2") + ## accepted == valid ### https://www.itis.gov/submit_guidlines.html#usage @@ -136,6 +139,8 @@ itis_taxonid <- taxonid %>% filter(name_usage %in% c("accepted", "valid")) %>% s ## assert ids are unique itis_taxonid %>% pull(id) %>% duplicated() %>% any() %>% testthat::expect_false() +write_tsv(itis_taxonid, "data/itis_taxonid.tsv.bz2") + itis_paths <- itis_long %>% select(id, path_id, path, path_rank, path_rank_id) %>% distinct() %>% arrange(id) diff --git a/data-raw/tpl.R b/data-raw/tpl.R index eb20a21..2d2c766 100644 --- a/data-raw/tpl.R +++ b/data-raw/tpl.R @@ -29,7 +29,8 @@ write_tsv(tpl_wide, "data/tpl_wide.tsv.bz2") tpl_long <- tpl_wide %>% gather(path_rank, path, -id, -kewid, -ipni_id, -confidence_level) %>% - left_join(select(tpl_wide, id, name=species)) %>% mutate(rank = "species") %>% + left_join(select(tpl_wide, id, name=species)) %>% + mutate(rank = "species") %>% select(id, name, rank, path, path_rank, confidence_level, kew_id=kewid, ipni_id) write_tsv(tpl_long, "data/tpl_long.tsv.bz2") diff --git a/data-raw/wd.R b/data-raw/wd.R new file mode 100644 index 0000000..5a0aaa1 --- /dev/null +++ b/data-raw/wd.R @@ -0,0 +1,6 @@ + +# WikiData +# https://doi.org/10.5281/zenodo.1213476 +# +download.file("https://zenodo.org/record/1213477/files/wikidata-taxon-info20171227.tsv.gz", "wd-taxon.tsv.gz") +download.file("https://zenodo.org/record/1213477/files/links-globi-wd-ott.tsv.gz", "wd-links.tsv.gz") \ No newline at end of file