diff --git a/NAMESPACE b/NAMESPACE index 7302ed1..8465607 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(arc_geo) export(arc_reverse_geo) export(arcgeocoder_check_access) importFrom(utils,URLencode) diff --git a/R/arc_geo.R b/R/arc_geo.R index 389e54a..6fdb710 100644 --- a/R/arc_geo.R +++ b/R/arc_geo.R @@ -1,11 +1,133 @@ -arc_geo_single <- function(address, - lat = "lat", - long = "lon", - limit = 1, - full_results = TRUE, - return_addresses = TRUE, - verbose = TRUE, - custom_query = list()) { +#' Geocoding using the ArcGIS REST API +#' +#' @description +#' Geocodes addresses given as character values. This +#' function returns the \CRANpkg{tibble} associated with the query. +#' +#' @param address character with single line address +#' (`"1600 Pennsylvania Ave NW, Washington"`) or a vector of addresses +#' (`c("Madrid", "Barcelona")`). +#' @param lat latitude column name in the output data (default `"lat"`). +#' @param long longitude column name in the output data (default `"lon"`). +#' @param limit maximum number of results to return per input address. Note +#' that each query returns a maximum of 50 results. +#' @param full_results returns all available data from the API service. This +#' is a shorthand of `outFields=*`. See **References**. +#' If `FALSE` (default) only the default values of the API would be returned. +#' See also `return_addresses`. +#' @param return_addresses return input addresses with results if `TRUE`. +#' @param sourcecountry Limits the candidates returned to the specified country +#' or countries. Acceptable values include the three-character country code. +#' You can specify multiple country codes to limit results to more than one +#' country. +#' +#' @inheritParams arc_reverse_geo +#' +#' +#' @references +#' [ArcGIS REST +#' `findAddressCandidates`](https://developers.arcgis.com/rest/geocode/api-reference/geocoding-find-address-candidates.htm) +#' +#' @return A \CRANpkg{tibble} with the results. +#' +#' @details +#' More info and valid values in the [ArcGIS REST +#' docs](https://developers.arcgis.com/rest/geocode/api-reference/geocoding-find-address-candidates.htm) +#' +#' ## `outsr` +#' +#' The spatial reference can be specified as either a well-known ID (WKID). If +#' not specified, the spatial reference of the output locations is the same as +#' that of the service ( WGS84, i.e. WKID = 4326)). +#' +#' +#' @examplesIf arcgeocoder_check_access() +#' \donttest{ +#' arc_geo("Madrid, Spain") +#' +#' # Several addresses with additional output fields +#' with_params <- arc_geo(c("Madrid", "Barcelona"), +#' full_results = TRUE, +#' custom_query = list(outFields = "LongLabel") +#' ) +#' +#' with_params[, c("lat", "lon", "LongLabel")] +#' +#' # With options: restrict search to USA +#' with_params_usa <- arc_geo(c("Madrid", "Barcelona"), +#' full_results = TRUE, +#' sourcecountry = "USA", +#' custom_query = list(outFields = "LongLabel") +#' ) +#' +#' with_params_usa[, c("lat", "lon", "LongLabel")] +#' } +#' @export +#' +#' @seealso [tidygeocoder::geo()] +#' @family geocoding +arc_geo <- function(address, lat = "lat", long = "lon", limit = 1, + full_results = FALSE, return_addresses = TRUE, + verbose = FALSE, progressbar = TRUE, + outsr = NULL, langcode = NULL, sourcecountry = NULL, + custom_query = list()) { + if (limit > 50) { + message(paste( + "ArcGIS REST API provides 50 results as a maximum. ", + "Your query may be incomplete" + )) + limit <- min(50, limit) + } + + # Dedupe for query + init_key <- dplyr::tibble(query = address) + key <- unique(address) + + # Set progress bar + ntot <- length(key) + # Set progress bar if n > 1 + progressbar <- all(progressbar, ntot > 1) + if (progressbar) { + pb <- txtProgressBar(min = 0, max = ntot, width = 50, style = 3) + } + seql <- seq(1, ntot, 1) + + # Add additional parameters to the custom query + if (isTRUE(full_results)) { + # This will override the outFields param provided in the custom_query + custom_query$outFields <- "*" + } + + custom_query$sourceCountry <- sourcecountry + custom_query$outSR <- outsr + custom_query$langCode <- langcode + + + all_res <- lapply(seql, function(x) { + ad <- key[x] + if (progressbar) { + setTxtProgressBar(pb, x) + } + arc_geo_single( + address = ad, lat, long, limit, full_results, return_addresses, + verbose, custom_query, singleline = TRUE + ) + }) + if (progressbar) close(pb) + + all_res <- dplyr::bind_rows(all_res) + all_res <- dplyr::left_join(init_key, all_res, by = "query") + + all_res[all_res == ""] <- NA + return(all_res) +} + + + +arc_geo_single <- function(address, lat = "lat", long = "lon", limit = 1, + full_results = TRUE, return_addresses = TRUE, + verbose = TRUE, custom_query = list(), + singleline = TRUE) { # Step 1: Download ---- api <- paste0( "https://geocode.arcgis.com/arcgis/rest/", @@ -13,14 +135,10 @@ arc_geo_single <- function(address, ) # Compose url - url <- paste0(api, "SingleLine=", address, "&f=json&maxLocations=", limit) + if (singleline) ad_q <- paste0("SingleLine=", address) + url <- paste0(api, ad_q, "&f=json&maxLocations=", limit) - # Add options - - if (isTRUE(full_results)) { - custom_query$outFields <- "*" - } url <- add_custom_query(custom_query, url) @@ -57,14 +175,12 @@ arc_geo_single <- function(address, result_end <- dplyr::bind_cols(tbl_query, result_unn) result_end$lat <- as.double(result_unn$y) result_end$lon <- as.double(result_unn$x) - return(result_end) - - # Keep names - result_out <- keep_names_rev(result, - address = address, - # Return coords here always FALSE, check that in the top-level query - return_coords = FALSE, - full_results = full_results + + # Keep names in the right order + + result_out <- keep_names( + result_end, lat, long, full_results, + return_addresses ) return(result_out) diff --git a/R/arc_reverse_geo.R b/R/arc_reverse_geo.R index e139c5b..defb96f 100644 --- a/R/arc_reverse_geo.R +++ b/R/arc_reverse_geo.R @@ -151,10 +151,10 @@ arc_reverse_geo <- function(x, y, address = "address", full_results = FALSE, # Add additional parameters to the custom query - custom_query$outsr <- outsr - custom_query$langcode <- langcode - custom_query$featuretypes <- featuretypes - custom_query$locationtype <- locationtype + custom_query$outSR <- outsr + custom_query$langCode <- langcode + custom_query$featureTypes <- featuretypes + custom_query$locationType <- locationtype all_res <- lapply(seql, function(x) { if (progressbar) { @@ -257,8 +257,6 @@ arc_reverse_geo_single <- function(lat_cap, # Keep names result_out <- keep_names_rev(result, address = address, - # Return coords here always FALSE, check that in the top-level query - return_coords = FALSE, full_results = full_results ) diff --git a/R/utils.R b/R/utils.R index c329859..a045bb1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -27,6 +27,7 @@ is_named <- function(x) { return(TRUE) } +# Specific ---- unnest_reverse <- function(x) { x_add <- x$address lngths <- vapply(x_add, length, FUN.VALUE = numeric(1)) @@ -81,13 +82,12 @@ unnest_geo <- function(x) { return(endobj) } -keep_names_rev <- function(x, address = "address", return_coords = FALSE, +keep_names_rev <- function(x, address = "address", full_results = FALSE, colstokeep = address) { names(x) <- gsub("address", address, names(x)) out_cols <- colstokeep - if (return_coords) out_cols <- c(out_cols, "lat", "lon") if (full_results) out_cols <- c(out_cols, "lat", "lon", names(x)) out_cols <- unique(out_cols) @@ -96,6 +96,25 @@ keep_names_rev <- function(x, address = "address", return_coords = FALSE, return(out) } +keep_names <- function(x, lat = "lat", lon = "lon", + full_results = TRUE, + return_addresses = TRUE, + colstokeep = c("query", lat, lon)) { + names(x) <- gsub("^lon$", lon, names(x)) + names(x) <- gsub("^lat$", lat, names(x)) + + out_cols <- colstokeep + out_cols <- c(out_cols, names(x)) + + if (!return_addresses) out_cols <- colstokeep + if (full_results) out_cols <- c(out_cols, names(x)) + + out_cols <- unique(out_cols) + out <- x[, out_cols] + + return(out) +} + empty_tbl_rev <- function(x, address) { init_nm <- names(x) x <- dplyr::as_tibble(x) diff --git a/codemeta.json b/codemeta.json index 5d859bd..c1c5593 100644 --- a/codemeta.json +++ b/codemeta.json @@ -130,7 +130,7 @@ }, "applicationCategory": "cartography", "keywords": ["r", "geocoding", "arcgis", "address", "reverse-geocoding", "rstats", "r-package", "api-wrapper", "api-rest", "arcgis-api", "gis"], - "fileSize": "129.577KB", + "fileSize": "141.444KB", "citation": [ { "@type": "SoftwareSourceCode", diff --git a/man/arc_geo.Rd b/man/arc_geo.Rd new file mode 100644 index 0000000..b8b7389 --- /dev/null +++ b/man/arc_geo.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arc_geo.R +\name{arc_geo} +\alias{arc_geo} +\title{Geocoding using the ArcGIS REST API} +\usage{ +arc_geo( + address, + lat = "lat", + long = "lon", + limit = 1, + full_results = FALSE, + return_addresses = TRUE, + verbose = FALSE, + progressbar = TRUE, + outsr = NULL, + langcode = NULL, + sourcecountry = NULL, + custom_query = list() +) +} +\arguments{ +\item{address}{character with single line address +(\code{"1600 Pennsylvania Ave NW, Washington"}) or a vector of addresses +(\code{c("Madrid", "Barcelona")}).} + +\item{lat}{latitude column name in the output data (default \code{"lat"}).} + +\item{long}{longitude column name in the output data (default \code{"lon"}).} + +\item{limit}{maximum number of results to return per input address. Note +that each query returns a maximum of 50 results.} + +\item{full_results}{returns all available data from the API service. This +is a shorthand of \verb{outFields=*}. See \strong{References}. +If \code{FALSE} (default) only the default values of the API would be returned. +See also \code{return_addresses}.} + +\item{return_addresses}{return input addresses with results if \code{TRUE}.} + +\item{verbose}{if \code{TRUE} then detailed logs are output to the console.} + +\item{progressbar}{Logical. If \code{TRUE} displays a progress bar to indicate +the progress of the function.} + +\item{outsr}{The spatial reference of the \verb{x,y} coordinates returned by a +geocode request. By default is \code{NULL} (i.e. the parameter won't be used in +the query). See \strong{Details}.} + +\item{langcode}{Sets the language in which reverse-geocoded addresses are +returned.} + +\item{sourcecountry}{Limits the candidates returned to the specified country +or countries. Acceptable values include the three-character country code. +You can specify multiple country codes to limit results to more than one +country.} + +\item{custom_query}{API-specific parameters to be used, passed as a named +list.} +} +\value{ +A \CRANpkg{tibble} with the results. +} +\description{ +Geocodes addresses given as character values. This +function returns the \CRANpkg{tibble} associated with the query. +} +\details{ +More info and valid values in the \href{https://developers.arcgis.com/rest/geocode/api-reference/geocoding-find-address-candidates.htm}{ArcGIS REST docs} +\subsection{\code{outsr}}{ + +The spatial reference can be specified as either a well-known ID (WKID). If +not specified, the spatial reference of the output locations is the same as +that of the service ( WGS84, i.e. WKID = 4326)). +} +} +\examples{ +\dontshow{if (arcgeocoder_check_access()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\donttest{ +arc_geo("Madrid, Spain") + +# Several addresses with additional output fields +with_params <- arc_geo(c("Madrid", "Barcelona"), + full_results = TRUE, + custom_query = list(outFields = "LongLabel") +) + +with_params[, c("lat", "lon", "LongLabel")] + +# With options: restrict search to USA +with_params_usa <- arc_geo(c("Madrid", "Barcelona"), + full_results = TRUE, + sourcecountry = "USA", + custom_query = list(outFields = "LongLabel") +) + +with_params_usa[, c("lat", "lon", "LongLabel")] +} +\dontshow{\}) # examplesIf} +} +\references{ +\href{https://developers.arcgis.com/rest/geocode/api-reference/geocoding-find-address-candidates.htm}{ArcGIS REST \code{findAddressCandidates}} +} +\seealso{ +\code{\link[tidygeocoder:geo]{tidygeocoder::geo()}} +} +\concept{geocoding} diff --git a/tests/testthat/_snaps/arc_geo.md b/tests/testthat/_snaps/arc_geo.md new file mode 100644 index 0000000..a5c0c69 --- /dev/null +++ b/tests/testthat/_snaps/arc_geo.md @@ -0,0 +1,19 @@ +# Messages + + Code + out <- arc_geo("Madrid", limit = 200) + Message + ArcGIS REST API provides 50 results as a maximum. Your query may be incomplete + +--- + + Code + out <- arc_geo("Madrid", verbose = TRUE) + Message + + URL: https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/findAddressCandidates? + Parameters: + - SingleLine=Madrid + - f=json + - maxLocations=1 + diff --git a/tests/testthat/test-arc_geo.R b/tests/testthat/test-arc_geo.R new file mode 100644 index 0000000..5a9c79d --- /dev/null +++ b/tests/testthat/test-arc_geo.R @@ -0,0 +1,179 @@ +test_that("Returning empty query", { + skip_on_cran() + skip_if_api_server() + + expect_message( + obj <- arc_geo("alsksjdhfg 561bata lorem ipsum"), + "No results for" + ) + + expect_true(nrow(obj) == 1) + expect_true(obj$query == "alsksjdhfg 561bata lorem ipsum") + expect_s3_class(obj, "tbl") + expect_identical(names(obj), c("query", "lat", "lon")) + expect_true(all( + vapply(obj, class, FUN.VALUE = character(1)) + == c("character", rep("numeric", 2)) + )) + expect_true(is.na(obj$lat)) + expect_true(is.na(obj$lon)) + + expect_message( + obj_renamed <- arc_geo("alsksjdhfg 561bata lorem ipsum", + lat = "lata", + long = "longa" + ), + "No results for" + ) + + expect_identical(names(obj_renamed), c("query", "lata", "longa")) + + names(obj_renamed) <- names(obj) + + expect_identical(obj, obj_renamed) +}) + +test_that("Messages", { + skip_on_cran() + skip_if_api_server() + skip_if_offline() + + + expect_snapshot( + out <- arc_geo("Madrid", limit = 200) + ) + + + expect_snapshot(out <- arc_geo("Madrid", verbose = TRUE)) +}) + +test_that("Data format", { + skip_on_cran() + skip_if_api_server() + skip_if_offline() + + obj <- arc_geo("Madrid") + expect_s3_class(obj, "tbl") +}) + + + +test_that("Checking query", { + skip_on_cran() + skip_if_api_server() + skip_if_offline() + + + + obj <- arc_geo("Madrid", + long = "ong", lat = "at", + full_results = FALSE, + return_addresses = FALSE + ) + expect_identical(names(obj), c("query", "at", "ong")) + + obj1 <- arc_geo("Madrid", + long = "ong", lat = "at", + full_results = FALSE, + return_addresses = TRUE + ) + nobj1 <- ncol(obj1) + obj2 <- arc_geo("Madrid", + long = "ong", lat = "at", + full_results = TRUE, + return_addresses = TRUE + ) + nobj2 <- ncol(obj2) + expect_gt(nobj2, nobj1) + + # Try with outfields + obj3 <- arc_geo("Madrid", + long = "ong", lat = "at", + full_results = FALSE, + return_addresses = TRUE, + custom_query = list(outFields = "PlaceName") + ) + + expect_equal(ncol(obj3) - nobj1, 1) + expect_equal(setdiff(names(obj3), names(obj1)), "PlaceName") + + obj <- arc_geo("Madrid", + long = "ong", lat = "at", + full_results = TRUE, + return_addresses = FALSE + ) + + expect_identical(names(obj)[1:4], c("query", "at", "ong", "address")) + expect_gt(ncol(obj), 4) + + # Boosting with parameters + + query <- arc_geo("Burger King", + limit = 10, + full_results = TRUE, + sourcecountry = "ES" + ) + expect_gt(nrow(query), 4) + + # Should be in Spain + expect_true(any(query$Country == "ESP")) + + # And different than + query2 <- arc_geo("Burger King", + limit = 10, + full_results = TRUE + ) + + expect_false(any(query$lon == query2$lon)) + + # Select with other outsr + query3 <- arc_geo("Burger King", + limit = 10, + full_results = TRUE, + outsr = 102100 + ) + + expect_false(any(query3$lon == query2$lon)) + expect_true(all(query2$LongLabel == query3$LongLabel)) +}) + + +test_that("Dedupe", { + skip_on_cran() + skip_if_api_server() + skip_if_offline() + + # Dupes + expect_silent( + dup <- arc_geo(rep(c("Pentagon", "Barcelona"), 50), + limit = 1, + progressbar = FALSE, + verbose = FALSE + ) + ) + + expect_equal(nrow(dup), 100) + expect_equal(as.character(dup$query), rep(c("Pentagon", "Barcelona"), 50)) + + # Check deduping + dedup <- dplyr::distinct(dup) + + expect_equal(nrow(dedup), 2) + expect_equal(as.character(dedup$query), rep(c("Pentagon", "Barcelona"), 1)) +}) + + +test_that("Progress bar", { + skip_on_cran() + skip_if_api_server() + skip_if_offline() + # No pbar + expect_silent(arc_geo("Madrid")) + expect_silent(arc_geo("Madrid", progressbar = TRUE)) + + # Get a pbar + expect_output(aa <- arc_geo(c("Madrid", "Barcelona"))) + + # Not + expect_silent(aa <- arc_geo(c("Madrid", "Barcelona"), progressbar = FALSE)) +})