Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
cboettig committed Jun 28, 2018
1 parent fa5114c commit 1220ea7
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 15 deletions.
43 changes: 34 additions & 9 deletions data-raw/fishbase.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,51 @@
library(rfishbase)
library(rfishbase) # 3.0
library(tidyverse)

fb <- as_tibble(rfishbase::load_taxa())
fb_wide <- fb %>%
select( id = SpecCode,
genus = Genus,
species = Species,
subfamily = SubFamily,
subfamily = Subfamily,
family = Family,
order = Order,
class = Class,
common_name = FBname) %>%
superclass = SuperClass) %>%
mutate(phylum = "Chorodata",
kingdom = "Animalia",
id = paste0("FB:", id)) %>%
select(id, species, genus, subfamily,
family, order, class, phylum,
kingdom, common_name)
id = paste0("FB:", id))

write_tsv(fb_wide, "data/fb_hierarchy.tsv.bz2")

write_tsv(fb_wide, "data/fb_wide.tsv.bz2")
species <- rfishbase:::fb_species()
synonyms <- rfishbase::synonyms(NULL) %>%
left_join(species) %>%
rename(id = SpecCode) %>%
select(id,
species = Species,
synonym,
type = Status,
syn_id = SynCode,
rank = TaxonLevel,
tsn = TSN,
col = CoL_ID,
worms = WoRMS_ID,
zoobank = ZooBank_ID)

## Consider preserving stock code?
common <- rfishbase:::fb_tbl("comnames") %>%
left_join(species) %>%
rename(id = SpecCode) %>%
select(id,
species = Species,
synonym = ComName,
language = Language) %>%
mutate(type = "common")

fb_synonyms <-
common %>%
bind_rows(synonyms)


slb <- as_tibble(rfishbase::load_taxa(server = "https://fishbase.ropensci.org/sealifebase"))
slb_wide <- slb %>%
Expand All @@ -36,7 +61,7 @@ slb_wide <- slb %>%
select(id, species, genus, subfamily,
family, order, class, common_name)

write_tsv(slb_wide, "data/slb_wide.tsv.bz2")
write_tsv(slb_wide, "data/slb_hierarchy.tsv.bz2")



Expand Down
9 changes: 4 additions & 5 deletions data-raw/gbif.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,15 @@ gbif_wide <- gbif_taxa %>%
kingdom, phylum, class, order, family, genus, species,
specific_epithet = specificEpithet, infraspecific_epithet = infraspecificEpithet,
genericName, taxonomicStatus)
write_tsv(gbif_wide, "data/gbif_wide.tsv.bz2")
write_tsv(gbif_wide, "data/gbif_hierarchy.tsv.bz2")
rm(gbif_wide)

gbif_long <-
gbif_taxa %>%
select(taxon_id, name = scientificName, rank = rank, genericName, taxonomicStatus)
select(id = taxon_id, name = scientificName, rank = rank, genericName, taxonomicStatus)
write_tsv(gbif_long, "data/gbif_long.tsv.bz2")


write_tsv(gbif_taxa, "data/gbif.tsv.bz2")
rm(gbif_taxa)
gbif_long %>% select(id, name, rank) %>% distinct() %>%
write_tsv("data/gbif_taxonid.tsv.bz2")


5 changes: 5 additions & 0 deletions data-raw/itis.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,15 @@ system.time({
write_tsv(itis_wide, bzfile("data/itis_wide.tsv.bz2", compression=9))
})

#########################################################

## Database prep for ITIS
library(tidyverse)
itis_long <- read_tsv("data/itis_long.tsv.bz2")
itis_wide <- read_tsv("data/itis_wide.tsv.bz2")

fs::file_move("data/itis_wide.tsv.bz2", "data/itis_hierarchy.tsv.bz2")

## accepted == valid
### https://www.itis.gov/submit_guidlines.html#usage

Expand All @@ -136,6 +139,8 @@ itis_taxonid <- taxonid %>% filter(name_usage %in% c("accepted", "valid")) %>% s
## assert ids are unique
itis_taxonid %>% pull(id) %>% duplicated() %>% any() %>% testthat::expect_false()

write_tsv(itis_taxonid, "data/itis_taxonid.tsv.bz2")


itis_paths <- itis_long %>% select(id, path_id, path, path_rank, path_rank_id) %>%
distinct() %>% arrange(id)
Expand Down
3 changes: 2 additions & 1 deletion data-raw/tpl.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ write_tsv(tpl_wide, "data/tpl_wide.tsv.bz2")

tpl_long <- tpl_wide %>%
gather(path_rank, path, -id, -kewid, -ipni_id, -confidence_level) %>%
left_join(select(tpl_wide, id, name=species)) %>% mutate(rank = "species") %>%
left_join(select(tpl_wide, id, name=species)) %>%
mutate(rank = "species") %>%
select(id, name, rank, path, path_rank, confidence_level, kew_id=kewid, ipni_id)

write_tsv(tpl_long, "data/tpl_long.tsv.bz2")
Expand Down
6 changes: 6 additions & 0 deletions data-raw/wd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

# WikiData
# https://doi.org/10.5281/zenodo.1213476
#
download.file("https://zenodo.org/record/1213477/files/wikidata-taxon-info20171227.tsv.gz", "wd-taxon.tsv.gz")
download.file("https://zenodo.org/record/1213477/files/links-globi-wd-ott.tsv.gz", "wd-links.tsv.gz")

0 comments on commit 1220ea7

Please sign in to comment.