Skip to content

Commit 9f9d748

Browse files
committed
#215 fix to inat - changed to work with newer API - bump patch version, add fixture for inat max records limit
add docs to occ() fxn for inat limits and where to get more data
1 parent f9d8418 commit 9f9d748

File tree

10 files changed

+16104
-1659
lines changed

10 files changed

+16104
-1659
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Description: A programmatic interface to many species occurrence data sources,
88
System ('OBIS'), and Atlas of Living Australia ('ALA'). Includes
99
functionality for retrieving species occurrence data, and combining
1010
those data.
11-
Version: 0.9.0.9811
11+
Version: 0.9.1.9100
1212
License: MIT + file LICENSE
1313
Authors@R:
1414
c(person(given = "Scott",

R/inat.R

+38-33
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
1-
spocc_inat_obs <- function(query=NULL, taxon = NULL, quality=NULL, geo=TRUE,
1+
# API docs: https://api.inaturalist.org/v1/docs/#!/Observations/get_observations
2+
spocc_inat_obs <- function(taxon_name=NULL, quality=NULL, geo=TRUE,
23
year=NULL, month=NULL, day=NULL, bounds=NULL, date_start = NULL,
34
date_end = NULL, maxresults=100, page=NULL, callopts) {
45

56
# input parameter checks
6-
if (!is.null(quality)) quality <- match.arg(quality, c("casual","research"))
7+
if (!is.null(quality)) quality <- match.arg(quality, c("casual","research","needs_id"))
78
if (!is.null(year)) {
89
if (length(year) > 1) {
910
stop("can only filter results by 1 year; enter only 1 value for year",
1011
call. = FALSE)
1112
}
1213
}
14+
assert(geo, "logical")
1315
if (!is.null(month)) {
1416
month <- as.numeric(month)
1517
if (is.na(month)) {
@@ -37,62 +39,63 @@ spocc_inat_obs <- function(query=NULL, taxon = NULL, quality=NULL, geo=TRUE,
3739
if (day < 1 || day > 31) stop("Please enter a valid day between 1 and 31",
3840
call. = FALSE)
3941
}
42+
43+
args <- sc(list(taxon_name = taxon_name, quality_grade = quality,
44+
geo = geo, year = year, month = month, day = day,
45+
d1 = date_start, d2 = date_end))
46+
4047
if (!is.null(bounds)) {
4148
if (length(bounds) != 4) {
4249
stop("bounding box specifications must have 4 coordinates", call. = FALSE)
4350
}
51+
bounds <- list(swlat = bounds[1], swlng = bounds[2], nelat = bounds[3],
52+
nelng = bounds[4])
53+
args <- sc(c(args, bounds))
4454
}
4555

46-
args <- sc(list(q = query, quality_grade = quality, taxon_name = taxon,
47-
`has[]` = if (!is.null(geo) && geo) "geo" else NULL,
48-
year = year, month = month, day = day,
49-
d1 = date_start, d2 = date_end))
50-
bounds <- list(swlat = bounds[1], swlng = bounds[2], nelat = bounds[3],
51-
nelng = bounds[4])
52-
args <- sc(c(args, bounds))
53-
54-
q_path <- "observations.csv"
55-
ping_path <- "observations.json"
56-
5756
if (!is.null(page)) {
5857
page_query <- c(args, per_page = maxresults, page = page)
59-
cli <- crul::HttpClient$new(url = inat_base_url(), opts = callopts)
60-
res <- cli$get(path = q_path, query = page_query)
58+
cli <- crul::HttpClient$new(url = inat_base_url, opts = callopts)
59+
res <- cli$get(path = inat_path, query = page_query)
6160

62-
total_res <- as.numeric(res$headers$`x-total-entries`)
6361
res <- spocc_inat_handle(res)
64-
data_out <- if (is.na(res)) NA else utils::read.csv(textConnection(res),
65-
stringsAsFactors = FALSE)
62+
tmp <- jsonlite::fromJSON(res, flatten = TRUE)
63+
data_out <- tmp$results
64+
total_res <- tmp$total_results
6665
} else {
6766
ping_query <- c(args, page = 1, per_page = 1)
68-
cli <- crul::HttpClient$new(url = inat_base_url(), opts = callopts)
69-
out <- cli$get(path = ping_path, query = ping_query)
67+
cli <- crul::HttpClient$new(url = inat_base_url, opts = callopts)
68+
out <- cli$get(path = inat_path, query = ping_query)
7069
out$raise_for_status()
71-
total_res <- as.numeric(out$response_headers$`x-total-entries`)
72-
70+
71+
total_res <- jsonlite::fromJSON(spocc_inat_handle(out),
72+
flatten = TRUE)$total_results
7373
if (total_res == 0) {
7474
stop("no results; either no records or entered an invalid search",
7575
call. = FALSE)
7676
}
7777

7878
page_query <- c(args, per_page = 200, page = 1)
79-
data <- cli$get(path = ping_path, query = page_query)
79+
data <- cli$get(path = inat_path, query = page_query)
8080
data <- spocc_inat_handle(data)
81-
data_out <- jsonlite::fromJSON(data, flatten = TRUE)
82-
data_out$tag_list <- sapply(data_out$tag_list, function(x) {
81+
data_out <- jsonlite::fromJSON(data, flatten = TRUE)$results
82+
data_out$tags <- sapply(data_out$tags, function(x) {
8383
if (length(x) == 0) "" else paste0(x, collapse = ", ")
8484
})
8585

8686
if (total_res < maxresults) maxresults <- total_res
8787
if (maxresults > 200) {
88+
testing_out <- list()
8889
for (i in 2:ceiling(maxresults / 200)) {
90+
cat(i, "\n")
8991
page_query <- c(args, per_page = 200, page = i)
90-
data <- cli$get(path = ping_path, query = page_query)
92+
data <- cli$get(path = inat_path, query = page_query)
9193
data <- spocc_inat_handle(data)
92-
data_out2 <- jsonlite::fromJSON(data, flatten = TRUE)
93-
data_out2$tag_list <- sapply(data_out2$tag_list, function(x) {
94+
data_out2 <- jsonlite::fromJSON(data, flatten = TRUE)$results
95+
data_out2$tags <- sapply(data_out2$tags, function(x) {
9496
if (length(x) == 0) "" else paste0(x, collapse = ", ")
9597
})
98+
testing_out[[i]] <- data_out2
9699
data_out <- rbindl(list(data_out, data_out2))
97100
}
98101
}
@@ -118,12 +121,13 @@ spocc_inat_handle <- function(x){
118121
if (!x$response_headers$`content-type` ==
119122
"application/json; charset=utf-8") {
120123
warning(
121-
"Conent type incorrect, should be 'application/json; charset=utf-8'")
124+
"Content type incorrect, should be 'application/json; charset=utf-8'")
122125
NA
123126
}
124127
if (x$status_code > 202) {
125-
warning(sprintf("Error: HTTP Status %s", data$status_code))
126-
NA
128+
parsed <- jsonlite::fromJSON(x$parse("UTF-8"))
129+
if ("error" %in% names(parsed)) stop(parsed$error)
130+
x$raise_for_status()
127131
}
128132
if (nchar(res) == 0) {
129133
warning("No data found")
@@ -136,10 +140,11 @@ spocc_inat_handle <- function(x){
136140

137141
spocc_get_inat_obs_id <- function(id, callopts = list()) {
138142
q_path <- paste("observations/", as.character(id), ".json", sep = "")
139-
cli <- crul::HttpClient$new(url = inat_base_url(), opts = callopts)
143+
cli <- crul::HttpClient$new(url = inat_base_url, opts = callopts)
140144
res <- cli$get(path = q_path)
141145
res$raise_for_status()
142146
jsonlite::fromJSON(res$parse("UTF-8"))
143147
}
144148

145-
inat_base_url <- function() "https://www.inaturalist.org/"
149+
inat_base_url <- "https://api.inaturalist.org"
150+
inat_path <- "v1/observations"

R/plugins.r

+1-1
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ foo_inat <- function(sources, query, limit, page, geometry, has_coords,
237237
opts <- limit_alias(opts, "inat")
238238
opts$geo <- has_coords
239239
time <- now()
240-
opts$query <- query
240+
opts$taxon_name <- query
241241
if (!"maxresults" %in% names(opts)) opts$maxresults <- limit
242242
if (!"page" %in% names(opts)) opts$page <- page
243243
if (!is.null(geometry)) {

R/zzz.r

+2
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,5 @@ assert <- function(x, y) {
116116
}
117117
}
118118
}
119+
120+
`%||%` <- function (x, y) if (is.null(x)) y else x

man-roxygen/occ_egs.R

+7
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@
2323
#' one$gbif
2424
#' two$gbif
2525
#'
26+
#' # iNaturalist limits: they allow at most 10,000; query through GBIF to get
27+
#' # more than 10,000
28+
#' # See https://www.gbif.org/dataset/50c9509d-22c7-4a22-a47d-8c48425ef4a7
29+
#' # x <- occ(query = 'Danaus plexippus', from = 'gbif', limit = 10100,
30+
#' # gbifopts = list(datasetKey = "50c9509d-22c7-4a22-a47d-8c48425ef4a7"))
31+
#' # x$gbif
32+
#'
2633
#' # Date range searches across data sources
2734
#' ## Not possible for ebird
2835
#' ## bison

man-roxygen/occtemp.r

+22
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,28 @@
155155
#' [rbison::bison_solr()] by default as it's more flexible. If you pass a value to the
156156
#' `geometry` parameter we use [rbison::bison()]. We'd prefer to just use one function
157157
#' to simplify things, but [rbison::bison_solr()] doesn't support geometry queries.
158+
#'
159+
#' @section iNaturalist notes:
160+
#' We're using the iNaturalist API, docs at
161+
#' https://api.inaturalist.org/v1/docs/#!/Observations/get_observations
162+
#'
163+
#' API rate limits: max of 100 requests per minute, though they ask that you try to keep it
164+
#' to 60 requests per minute or lower. If they notice usage that has serious impact on their
165+
#' performance they may institute blocks without notification.
166+
#'
167+
#' There is a hard limit 0f 10,000 observations with the iNaturalist API. We do paging
168+
#' internally so you may not see this aspect, but for example, if you request 12,000
169+
#' records, you won't be able to get that many. The API will error at anything more than
170+
#' 10,000. We now error if you request more than 10,000 from iNaturalist. There are
171+
#' some alternatives:
172+
#'
173+
#' - Consider exporting data while logged in
174+
#' to your iNaturalist account, or the iNaturalist research grade observations within
175+
#' GBIF - see https://www.gbif.org/dataset/50c9509d-22c7-4a22-a47d-8c48425ef4a7 - at
176+
#' time of this writing it has 8.5 million observations.
177+
#' - Search for iNaturalist data within GBIF. e.g., the following searches for iNaturalist
178+
#' data within GBIF and allows more than 10,000 records:
179+
#' ``
158180
#'
159181
#' @section limit parameter:
160182
#' The `limit` parameter is set to a default of 25. This means that you will get **up to**

man/occ.Rd

+32
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)