Skip to content

Commit

Permalink
some more examples
Browse files Browse the repository at this point in the history
  • Loading branch information
cboettig committed Dec 14, 2018
1 parent e07d7a9 commit 046d070
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 28 deletions.
30 changes: 6 additions & 24 deletions data-raw/itis.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## apt-get -y install mariadb-client postgresql-client
library(taxizedb)
#library(taxizedb)
library(tidyverse)

library(piggyback)
#itis_store <- db_download_itis()
## Need to fix locale issue
#db_load_itis(itis_store, user = "postgres", pwd = "password", host = "postgres")
Expand Down Expand Up @@ -96,25 +96,9 @@ itis_long <- itis %>%
path_rank, path_id, path_rank_id, name_usage, update_date) %>%
mutate(update_date = as_date(update_date))


## encode language with common name(?)

## Some langauage names have the same language code.
## ISOcodes puts duplicates in same column, we need a tidy look-up table
iso <- ISOcodes::ISO_639_2 %>%
select(language = Name, code = Alpha_2) %>%
na.omit() %>%
separate(language, c("name", "name2", "name3", "name4", "name5"),
sep = ";", extra="warn", fill = "right") %>%
gather(key, language, -code) %>%
select(-key) %>%
na.omit()

itis_for_rdf <-
itis_long %>%
left_join(iso) %>%
unite("common_name", common_name, code, sep = "@")

system.time({
write_tsv(itis_long, bzfile("data/itis_long.tsv.bz2", compression=9))
})


#itis_long <- read_tsv("data/itis_long.tsv.bz2")
Expand All @@ -127,9 +111,7 @@ itis_hierarchy <-
spread(path_rank, path)


system.time({
write_tsv(itis_long, bzfile("data/itis_long.tsv.bz2", compression=9))
})

## write at compression 9 for best compression
system.time({
write_tsv(itis_hierarchy, bzfile("data/itis_hierarchy.tsv.bz2", compression=9))
Expand Down
91 changes: 87 additions & 4 deletions notebook/taxa-rdf.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,41 @@
remotes::install_github("cboettig/virtuoso")
remotes::install_github("cboettig/rdftools")


library(virtuoso)
library(readr)
library(rdftools)
library(dplyr)
library(taxald)
library(ISOcodes)


itis_long <- read_tsv("data/itis_long.tsv.bz2")

## Some langauage names have the same language code.
## ISOcodes puts duplicates in same column, we need a tidy look-up table
iso <- ISOcodes::ISO_639_2 %>%
select(language = Name, code = Alpha_2) %>%
na.omit() %>%
separate(language, c("name", "name2", "name3", "name4", "name5"),
sep = ";", extra="warn", fill = "right") %>%
gather(key, language, -code) %>%
select(-key) %>%
na.omit()

itis_for_rdf <-
itis_long %>%
left_join(iso) %>%
unite("common_name", common_name, code, sep = "@")

species <- itis_long %>% select(id, name, rank, common_name, update_date) %>% distinct()
classif <- itis_long %>% select(path_id, path, path_rank, path_rank_id, species_id = id) %>% distinct()
#readr::write_tsv(species, "species.tsv")
#readr::write_tsv(classif, "classif.tsv")
rdftools::write_nquads(species, "itis_species.nq.gz", prefix = "taxald:", key = "id")
rdftools::write_nquads(classif, "itis_classif.nq.gz", prefix = "taxald:", key = "path_id")



taxa_tbl("itis", "taxonid") %>%
collect() %>%
Expand Down Expand Up @@ -42,6 +72,8 @@ taxa_tbl("wd", "taxonid") %>% collect() %>%
key_column = "id",
prefix = "taxald:")
#virtuoso:::vos_delete_db()

library(virtuoso)
vos_start()
con <- vos_connect()
files <- fs::dir_ls(glob="*.nq.gz")
Expand All @@ -65,18 +97,69 @@ vos_query(con,
")

vos_query(con,
'SELECT ?id ?rank
'SELECT ?id ?rank ?common
WHERE {
?id <taxald:name> "Homo sapiens" .
?id <taxald:rank> ?rank
?id <taxald:rank> ?rank .
OPTIONAL { ?id <taxald:common_name> ?common . }
}
')

vos_query(con,
'SELECT ?id ?species ?rank
WHERE {
?id <taxald:name> ?species .
?id <taxald:rank> ?rank .
?id <taxald:common_name> "Human" .
}
')

vos_query(con, 'SELECT ?id ?species ?rank
WHERE {
?id <taxald:name> ?species .
?id <taxald:rank> ?rank .
?id <taxald:common_name> ?common_name .
?id <taxald:common_name> "Human" .
FILTER langMatches(lang(?common_name), "en")
}
')

vos_query(con, 'SELECT ?id ?species ?common_name ?rank
WHERE {
?id <taxald:name> ?species .
?id <taxald:rank> ?rank .
?id <taxald:common_name> ?common_name .
FILTER( ?common_name LIKE "%Cod%" ).
}
')


vos_query(con,
'SELECT ?path ?rank
WHERE {
?path_id <taxald:path> ?path .
?path_id <taxald:path_rank> ?rank
} LIMIT 20
')

vos_query(con,
'SELECT ?id ?name
'SELECT ?id ?name ?common ?rank ?update_date
WHERE {
?id <taxald:name> ?name .
?id <taxald:rank> "kingdom"
?id <taxald:common_name> ?common .
?id <taxald:rank> ?rank .
?id <taxald:update_date> ?update_date
} LIMIT 20
')


vos_query(con,
'SELECT DISTINCT ?p
FROM <rdflib>
WHERE {
?s ?p ?o
}
LIMIT 20
')

0 comments on commit 046d070

Please sign in to comment.