Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(r): Improve vctr class integration #79

Merged
merged 15 commits into from
Dec 2, 2023
19 changes: 19 additions & 0 deletions r/geoarrow/NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method("[",geoarrow_vctr)
S3method("[<-",geoarrow_vctr)
S3method("[[<-",geoarrow_vctr)
S3method(as.character,geoarrow_vctr)
S3method(as_geoarrow_array,character)
S3method(as_geoarrow_array,default)
S3method(as_geoarrow_array,nanoarrow_array)
Expand All @@ -12,10 +15,20 @@ S3method(as_geoarrow_array_stream,default)
S3method(as_geoarrow_array_stream,geoarrow_vctr)
S3method(as_geoarrow_array_stream,nanoarrow_array_stream)
S3method(as_nanoarrow_array,sfc)
S3method(as_nanoarrow_array_extension,geoarrow_extension_spec)
S3method(as_nanoarrow_array_stream,geoarrow_vctr)
S3method(as_nanoarrow_schema,geoarrow_vctr)
S3method(convert_array,geoarrow_vctr)
S3method(convert_array,sfc)
S3method(convert_array,wk_wkb)
S3method(convert_array,wk_wkt)
S3method(convert_array,wk_xy)
S3method(convert_array_extension,geoarrow_extension_spec)
S3method(format,geoarrow_vctr)
S3method(infer_geoarrow_schema,default)
S3method(infer_geoarrow_schema,nanoarrow_array)
S3method(infer_geoarrow_schema,nanoarrow_array_stream)
S3method(infer_nanoarrow_ptype_extension,geoarrow_extension_spec)
S3method(infer_nanoarrow_schema,geoarrow_vctr)
S3method(infer_nanoarrow_schema,sfc)
S3method(infer_nanoarrow_schema,wk_wkb)
Expand All @@ -26,6 +39,7 @@ S3method(wk_handle,geoarrow_vctr)
S3method(wk_is_geodesic,geoarrow_vctr)
export(as_geoarrow_array)
export(as_geoarrow_array_stream)
export(as_geoarrow_vctr)
export(geoarrow_handle)
export(geoarrow_schema_parse)
export(geoarrow_writer)
Expand All @@ -36,7 +50,12 @@ export(na_extension_large_wkt)
export(na_extension_wkb)
export(na_extension_wkt)
importFrom(nanoarrow,as_nanoarrow_array)
importFrom(nanoarrow,as_nanoarrow_array_extension)
importFrom(nanoarrow,as_nanoarrow_array_stream)
importFrom(nanoarrow,as_nanoarrow_schema)
importFrom(nanoarrow,convert_array)
importFrom(nanoarrow,convert_array_extension)
importFrom(nanoarrow,infer_nanoarrow_ptype_extension)
importFrom(nanoarrow,infer_nanoarrow_schema)
importFrom(wk,wk_crs)
importFrom(wk,wk_handle)
Expand Down
11 changes: 11 additions & 0 deletions r/geoarrow/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,14 @@ as_validity_buffer <- function(x) {

list(null_count == null_count, buffer = x$buffers[[2]])
}

# This really needs a helper in nanoarrow, but for now, we need a way to drop
# the extension type and convert storage only for testing
force_array_storage <- function(array) {
schema <- infer_nanoarrow_schema(array)
schema$metadata[["ARROW:extension:name"]] <- NULL
array_shallow <- nanoarrow::nanoarrow_allocate_array()
nanoarrow::nanoarrow_pointer_export(array, array_shallow)
nanoarrow::nanoarrow_array_set_schema(array_shallow, schema, validate = FALSE)
}

42 changes: 42 additions & 0 deletions r/geoarrow/R/nanoarrow-compat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@

register_geoarrow_extension <- function() {
for (ext_name in geoarrow_extension_name_all()) {
nanoarrow::register_nanoarrow_extension(
ext_name,
nanoarrow::nanoarrow_extension_spec(subclass = "geoarrow_extension_spec")
)
}
}

#' @importFrom nanoarrow infer_nanoarrow_ptype_extension
#' @export
infer_nanoarrow_ptype_extension.geoarrow_extension_spec <- function(extension_spec, x, ...) {
new_geoarrow_vctr(list(), x, integer())
}

#' @importFrom nanoarrow convert_array_extension
#' @export
convert_array_extension.geoarrow_extension_spec <- function(extension_spec,
array, to, ...) {
# For the default, this will dispatch to convert_array.geoarrow_vctr().
# This gets called if to is a base R type (e.g., integer())
stop(
sprintf(
"Can't convert geoarrow extension array to object of class '%s'",
class(to)[1]
)
)
}

#' @importFrom nanoarrow as_nanoarrow_array_extension
#' @export
as_nanoarrow_array_extension.geoarrow_extension_spec <- function(
extension_spec, x, ..., schema = NULL) {
as_geoarrow_array(x, schema = schema)
}

#' @importFrom nanoarrow convert_array
#' @export
convert_array.geoarrow_vctr <- function(array, to, ...) {
as_geoarrow_vctr(array, schema = as_nanoarrow_schema(to))
}
12 changes: 12 additions & 0 deletions r/geoarrow/R/sf-compat.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@

# exported in zzz.R
st_as_sfc.geoarrow_vctr <- function(x, ..., promote_multi = FALSE) {
sfc <- wk::wk_handle(x, wk::sfc_writer(promote_multi))
sf::st_set_crs(sfc, sf::st_crs(wk::wk_crs(x)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we prefer wk::wk_set_crs(sfc, wk::wk_crs(x)) here?

In theory, this change reduces geoarrow's knowledge of sf by deferring that responsibility to wk. In reality, it's the same code and still requires sf to be installed, so maybe it doesn't matter.

}

#' @export
convert_array.sfc <- function(array, to, ..., sfc_promote_multi = FALSE) {
vctr <- as_geoarrow_vctr(array)
st_as_sfc.geoarrow_vctr(vctr, promote_multi = sfc_promote_multi)
}

#' @importFrom nanoarrow infer_nanoarrow_schema
#' @export
infer_nanoarrow_schema.sfc <- function(x, ...) {
Expand Down
6 changes: 6 additions & 0 deletions r/geoarrow/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ na_extension_geoarrow <- function(geometry_type, dimensions = "XY",
na_extension_geoarrow_internal(type_id, crs = crs, edges = edges)
}

geoarrow_extension_name_all <- function() {
c("geoarrow.wkt", "geoarrow.wkb", "geoarrow.point", "geoarrow.linestring",
"geoarrow.polygon", "geoarrow.multipoint", "geoarrow.mutlilinestring",
"geoarrow.multipolygon")
}

#' Inspect a GeoArrow schema
#'
#' @param schema A [nanoarrow_schema][nanoarrow::as_nanoarrow_schema]
Expand Down
79 changes: 78 additions & 1 deletion r/geoarrow/R/vctr.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@

#' GeoArrow encoded arrays as R vectors
#'
#' @param x An object that works with [as_geoarrow_array_stream()]. Most
#' spatial objects in R already work with this method.
#' @param ... Passed to [as_geoarrow_array_stream()]
#' @param schema An optional `schema` (e.g., [na_extension_geoarrow()]).
#'
#' @return A vctr of class 'geoarrow_vctr'
#' @export
#'
#' @examples
#' as_geoarrow_vctr("POINT (0 1)")
#'
as_geoarrow_vctr <- function(x, ..., schema = NULL) {
if (inherits(x, "geoarrow_vctr") && is.null(schema)) {
return(x)
Expand Down Expand Up @@ -28,16 +41,80 @@ new_geoarrow_vctr <- function(chunks, schema, indices = NULL) {
`[.geoarrow_vctr` <- function(x, i) {
attrs <- attributes(x)
x <- NextMethod()
# Assert slice?

if (is.null(vctr_as_slice(x))) {
stop(
"Can't subset geoarrow_vctr with non-slice (e.g., only i:j indexing is supported)"
)
}

attributes(x) <- attrs
x
}

#' @export
`[<-.geoarrow_vctr` <- function(x, i, value) {
stop("subset assignment for geoarrow_vctr is not supported")
}

#' @export
`[[<-.geoarrow_vctr` <- function(x, i, value) {
stop("subset assignment for geoarrow_vctr is not supported")
}

#' @export
format.geoarrow_vctr <- function(x, ..., width = NULL, digits = NULL) {
if (is.null(width)) {
width <- getOption("width", 100L)
}

width <- max(width, 20)

if (is.null(digits)) {
digits <- getOption("digits", 7L)
}

digits <- max(digits, 0)

formatted_array <- geoarrow_kernel_call_scalar(
"format_wkt",
x,
options = c(
max_element_size_bytes = width - 10L,
precision = digits
),
n = length(attr(x, "chunks"))
paleolimbot marked this conversation as resolved.
Show resolved Hide resolved
)

formatted_chr <- nanoarrow::convert_array_stream(
formatted_array,
character(),
size = length(x)
)

sprintf("<%s>", formatted_chr)
}

# Because RStudio's viewer uses this, we want to use the potentially abbreviated
# WKT from the format method
#' @export
as.character.geoarrow_vctr <- function(x, ...) {
format(x, ...)
}

#' @export
infer_nanoarrow_schema.geoarrow_vctr <- function(x, ...) {
attr(x, "schema")
}

# Because zero-length vctrs are R's way of communicating "type", implement
# as_nanoarrow_schema() here so that it works in places that expect a type
#' @importFrom nanoarrow as_nanoarrow_schema
#' @export
as_nanoarrow_schema.geoarrow_vctr <- function(x, ...) {
attr(x, "schema")
}

#' @export
as_geoarrow_array_stream.geoarrow_vctr <- function(x, ..., schema = NULL) {
as_nanoarrow_array_stream.geoarrow_vctr(x, ..., schema = NULL)
Expand Down
54 changes: 54 additions & 0 deletions r/geoarrow/R/wk-compat.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,60 @@ infer_nanoarrow_schema.wk_xy <- function(x, ...) {
wk_geoarrow_schema(x, na_extension_geoarrow, "POINT", dimensions = dims)
}

#' @export
convert_array.wk_wkt <- function(array, to, ...) {
schema <- infer_nanoarrow_schema(array)
geo <- geoarrow_schema_parse(schema)
vctr <- as_geoarrow_vctr(array)

if (geo$extension_name == "geoarrow.wkt") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For a future pr, I think we could extract some of this into as_wkt, as_wkb, as_xy methods.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then these could be skeletons. Or the reverse, and as_ methods are just calling these methods.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They almost certainly should be optimized! I'm pretty sure they just go through the handler right now (but at least they work!):

library(wk)
library(geoarrow)

as_xy(as_geoarrow_vctr("POINT (0 1)"))
#> <wk_xy[1]>
#> [1] (0 1)
as_wkt(as_geoarrow_vctr("POINT (0 1)"))
#> <wk_wkt[1]>
#> [1] POINT (0 1)
as_wkb(as_geoarrow_vctr("POINT (0 1)"))
#> <wk_wkb[1]>
#> [1] <POINT (0 1)>

Created on 2023-12-01 with reprex v2.0.2

out <- wk::new_wk_wkt(convert_array(force_array_storage(array)))
} else {
out <- wk::wk_handle(vctr, wk::wkt_writer())
}

wk::wk_crs(out) <- wk::wk_crs_output(vctr, to)
wk::wk_is_geodesic_output(vctr, to)
out
paleolimbot marked this conversation as resolved.
Show resolved Hide resolved
}

#' @export
convert_array.wk_wkb <- function(array, to, ...) {
schema <- infer_nanoarrow_schema(array)
geo <- geoarrow_schema_parse(schema)
vctr <- as_geoarrow_vctr(array)

if (geo$extension_name == "geoarrow.wkb") {
storage <- convert_array(force_array_storage(array))
# Comes back as a blob::blob
attributes(storage) <- NULL
out <- wk::new_wk_wkb(storage)
} else {
out <- wk::wk_handle(vctr, wk::wkb_writer())
}

wk::wk_crs(out) <- wk::wk_crs_output(vctr, to)
wk::wk_is_geodesic_output(vctr, to)
paleolimbot marked this conversation as resolved.
Show resolved Hide resolved
out
}

#' @export
convert_array.wk_xy <- function(array, to, ...) {
schema <- infer_nanoarrow_schema(array)
geo <- geoarrow_schema_parse(schema)
vctr <- as_geoarrow_vctr(array)

if (geo$extension_name == "geoarrow.point") {
out <- wk::as_xy(convert_array(force_array_storage(array)))
} else {
out <- wk::wk_handle(vctr, wk::xy_writer())
}

wk::wk_crs(out) <- wk::wk_crs_output(vctr, to)
wk::wk_is_geodesic(out) <- wk::wk_is_geodesic_output(vctr, to)
wk::as_xy(out, dims = names(unclass(to)))
}

wk_geoarrow_schema <- function(x, type_constructor, ...) {
if (inherits(x, c("nanoarrow_array", "nanoarrow_array_stream"))) {
schema <- nanoarrow::infer_nanoarrow_schema(x)
Expand Down
71 changes: 71 additions & 0 deletions r/geoarrow/R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

# nocov start
.onLoad <- function(...) {
register_geoarrow_extension()

s3_register("sf::st_as_sfc", "geoarrow_vctr")
}

# From the `vctrs` package (this function is intended to be copied
# without attribution or license requirements to avoid a hard dependency on
# vctrs:
# https://github.com/r-lib/vctrs/blob/c2a7710fe55e3a2249c4fdfe75bbccbafcf38804/R/register-s3.R#L25-L31
s3_register <- function(generic, class, method = NULL) {
stopifnot(is.character(generic), length(generic) == 1)
stopifnot(is.character(class), length(class) == 1)

pieces <- strsplit(generic, "::")[[1]]
stopifnot(length(pieces) == 2)
package <- pieces[[1]]
generic <- pieces[[2]]

caller <- parent.frame()

get_method_env <- function() {
top <- topenv(caller)
if (isNamespace(top)) {
asNamespace(environmentName(top))
} else {
caller
}
}
get_method <- function(method, env) {
if (is.null(method)) {
get(paste0(generic, ".", class), envir = get_method_env())
} else {
method
}
}

register <- function(...) {
envir <- asNamespace(package)

# Refresh the method each time, it might have been updated by
# `devtools::load_all()`
method_fn <- get_method(method)
stopifnot(is.function(method_fn))


# Only register if generic can be accessed
if (exists(generic, envir)) {
registerS3method(generic, class, method_fn, envir = envir)
} else if (identical(Sys.getenv("NOT_CRAN"), "true")) {
warning(sprintf(
"Can't find generic `%s` in package %s to register S3 method.",
generic,
package
))
}
}

# Always register hook in case package is later unloaded & reloaded
setHook(packageEvent(package, "onLoad"), register)

# Avoid registration failures during loading (pkgload or regular)
if (isNamespaceLoaded(package)) {
register()
}

invisible()
}
# nocov end
26 changes: 26 additions & 0 deletions r/geoarrow/man/as_geoarrow_vctr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading