Skip to content

Commit

Permalink
Merge pull request #2238 from r-spatial/paleolimbot-stream-reading
Browse files Browse the repository at this point in the history
Paleolimbot stream reading
  • Loading branch information
edzer authored Oct 1, 2023
2 parents 9f302ee + 63afb30 commit 55f4cc8
Show file tree
Hide file tree
Showing 21 changed files with 345 additions and 68 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ Imports:
utils
Suggests:
blob,
nanoarrow,
covr,
dplyr (>= 0.8-3),
ggplot2,
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ CPL_read_ogr <- function(datasource, layer, query, options, quiet, toTypeUser, f
.Call(`_sf_CPL_read_ogr`, datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width)
}

CPL_read_gdal_stream <- function(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width) {
.Call(`_sf_CPL_read_gdal_stream`, stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width)
}

CPL_gdalinfo <- function(obj, options, oo, co) {
.Call(`_sf_CPL_gdalinfo`, obj, options, oo, co)
}
Expand Down
12 changes: 6 additions & 6 deletions R/cast_sfg.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ ClosePol <- function(mtrx) {
#' @examples
#' # example(st_read)
#' nc = st_read(system.file("shape/nc.shp", package="sf"))
#' mpl <- nc$geometry[[4]]
#' mpl <- st_geometry(nc)[[4]]
#' #st_cast(x) ## error 'argument "to" is missing, with no default'
#' cast_all <- function(xg) {
#' lapply(c("MULTIPOLYGON", "MULTILINESTRING", "MULTIPOINT", "POLYGON", "LINESTRING", "POINT"),
Expand Down Expand Up @@ -81,7 +81,7 @@ st_cast.MULTIPOLYGON <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' mls <- st_cast(nc$geometry[[4]], "MULTILINESTRING")
#' mls <- st_cast(st_geometry(nc)[[4]], "MULTILINESTRING")
#' st_sfc(cast_all(mls))
st_cast.MULTILINESTRING <- function(x, to, ...) {
switch(to,
Expand All @@ -108,7 +108,7 @@ st_cast.MULTILINESTRING <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' mpt <- st_cast(nc$geometry[[4]], "MULTIPOINT")
#' mpt <- st_cast(st_geometry(nc)[[4]], "MULTIPOINT")
#' st_sfc(cast_all(mpt))
st_cast.MULTIPOINT <- function(x, to, ...) {
switch(to,
Expand All @@ -135,7 +135,7 @@ st_cast.MULTIPOINT <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' pl <- st_cast(nc$geometry[[4]], "POLYGON")
#' pl <- st_cast(st_geometry(nc)[[4]], "POLYGON")
#' st_sfc(cast_all(pl))
st_cast.POLYGON <- function(x, to, ...) {
switch(to,
Expand All @@ -156,7 +156,7 @@ st_cast.POLYGON <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' ls <- st_cast(nc$geometry[[4]], "LINESTRING")
#' ls <- st_cast(st_geometry(nc)[[4]], "LINESTRING")
#' st_sfc(cast_all(ls))
st_cast.LINESTRING <- function(x, to, ...) {
switch(to,
Expand All @@ -173,7 +173,7 @@ st_cast.LINESTRING <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' pt <- st_cast(nc$geometry[[4]], "POINT")
#' pt <- st_cast(st_geometry(nc)[[4]], "POINT")
#' ## st_sfc(cast_all(pt)) ## Error: cannot create MULTIPOLYGON from POINT
#' st_sfc(lapply(c("POINT", "MULTIPOINT"), function(x) st_cast(pt, x)))
st_cast.POINT <- function(x, to, ...) {
Expand Down
96 changes: 82 additions & 14 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ set_utf8 = function(x) {
#' of LineString and MultiLineString, or of Polygon and MultiPolygon, convert
#' all to the Multi variety; defaults to \code{TRUE}
#' @param stringsAsFactors logical; logical: should character vectors be
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
#' \code{FALSE}, for \code{st_read} and R version < 4.1.0 equal to
#' \code{default.stringsAsFactors()}
#' @param int64_as_string logical; if TRUE, Int64 attributes are returned as
Expand Down Expand Up @@ -146,7 +146,7 @@ st_read.default = function(dsn, layer, ...) {
}

process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
geometry_column = 1, as_tibble = FALSE, optional = FALSE) {

which.geom = which(vapply(x, function(f) inherits(f, "sfc"), TRUE))
Expand All @@ -156,7 +156,7 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,

# in case no geometry is present:
if (length(which.geom) == 0) {
if (! quiet)
if (! quiet)
warning("no simple feature geometries present: returning a data.frame or tbl_df", call. = FALSE)
x = if (!as_tibble) {
if (any(sapply(x, is.list)))
Expand Down Expand Up @@ -192,8 +192,13 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
for (i in seq_along(lc.other))
x[[ nm.lc[i] ]] = list.cols[[i]]

for (i in seq_along(geom))
x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox
for (i in seq_along(geom)) {
if (is.null(attr(geom[[i]], "bbox"))) {
x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox
} else {
x[[ nm[i] ]] = geom[[i]]
}
}

x = st_as_sf(x, ...,
sf_column_name = if (is.character(geometry_column)) geometry_column else nm[geometry_column],
Expand All @@ -204,20 +209,72 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
x
}

# Allow setting the default to TRUE to make it easier to run existing tests
# of st_read() through the stream interface
default_st_read_use_stream = function() {
getOption(
"sf.st_read_use_stream",
identical(Sys.getenv("R_SF_ST_READ_USE_STREAM"), "true")
)
}

process_cpl_read_ogr_stream = function(x, default_crs, num_features, fid_column_name,
crs = NULL, ...) {
is_geometry_column = vapply(
x$get_schema()$children,
function(s) identical(s$metadata[["ARROW:extension:name"]], "ogc.wkb"),
logical(1)
)

crs = if (is.null(crs)) st_crs(default_crs) else st_crs(crs)
if (num_features == -1) {
num_features = NULL
}
df = suppressWarnings(nanoarrow::convert_array_stream(x, size = num_features))

df[is_geometry_column] = lapply(df[is_geometry_column], function(x) {
class(x) <- "WKB"
x <- st_as_sfc(x)
st_set_crs(x, crs)
})

# # Prefer "geometry" as the geometry column name
# if (any(is_geometry_column) && !("geometry" %in% names(df))) {
# names(df)[which(is_geometry_column)[1]] = "geometry"
# }

# Rename OGC_FID to fid_column_name and move to end
if (length(fid_column_name) == 1 && "OGC_FID" %in% names(df)) {
df <- df[c(setdiff(names(df), "OGC_FID"), "OGC_FID")]
names(df)[names(df) == "OGC_FID"] = fid_column_name
}

# Move geometry to the end
# if ("geometry" %in% names(df)) {
# df <- df[c(setdiff(names(df), "geometry"), "geometry")]
# }
gc1 = which(is_geometry_column)[1]
df = df[c(setdiff(seq_along(df), gc1), gc1)]

process_cpl_read_ogr(df, ...)
}

#' @name st_read
#' @param fid_column_name character; name of column to write feature IDs to; defaults to not doing this
#' @param drivers character; limited set of driver short names to be tried (default: try all)
#' @param wkt_filter character; WKT representation of a spatial filter (may be used as bounding box, selecting overlapping geometries); see examples
#' @param optional logical; passed to \link[base]{as.data.frame}; always \code{TRUE} when \code{as_tibble} is \code{TRUE}
#' @param use_stream Use TRUE to use the experimental columnar interface introduced in GDAL 3.6.
#' @note The use of \code{system.file} in examples make sure that examples run regardless where R is installed:
#' typical users will not use \code{system.file} but give the file name directly, either with full path or relative
#' to the current working directory (see \link{getwd}). "Shapefiles" consist of several files with the same basename
#' that reside in the same directory, only one of them having extension \code{.shp}.
#' @export
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
type = 0, promote_to_multi = TRUE, stringsAsFactors = sf_stringsAsFactors(),
int64_as_string = FALSE, check_ring_dir = FALSE, fid_column_name = character(0),
drivers = character(0), wkt_filter = character(0), optional = FALSE) {
drivers = character(0), wkt_filter = character(0), optional = FALSE,
use_stream = default_st_read_use_stream()) {

layer = if (missing(layer))
character(0)
Expand All @@ -233,11 +290,22 @@ st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet
if (length(promote_to_multi) > 1)
stop("`promote_to_multi' should have length one, and applies to all geometry columns")

x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
optional = optional, ...)


if (use_stream) {
stream = nanoarrow::nanoarrow_allocate_array_stream()
info = CPL_read_gdal_stream(stream, dsn, layer, query, as.character(options), quiet,
drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column_name, getOption("width"))
process_cpl_read_ogr_stream(stream, default_crs = info[[1]], num_features = info[[2]],
fid_column_name = fid_column_name, stringsAsFactors = stringsAsFactors, quiet = quiet, ...)
} else {
x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))

process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
optional = optional, ...)
}
}

#' @name st_read
Expand Down Expand Up @@ -606,7 +674,7 @@ print.sf_layers = function(x, ...) {
#' @param options character; driver dependent dataset open options, multiple options supported.
#' @param do_count logical; if TRUE, count the features by reading them, even if their count is not reported by the driver
#' @name st_layers
#' @return list object of class \code{sf_layers} with elements
#' @return list object of class \code{sf_layers} with elements
#' \describe{
#' \item{name}{name of the layer}
#' \item{geomtype}{list with for each layer the geometry types}
Expand Down Expand Up @@ -751,7 +819,7 @@ check_append_delete <- function(append, delete) {

#' @name st_write
#' @export
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
#' omitted; it returns TRUE on success, FALSE on failure, invisibly.
st_delete = function(dsn, layer = character(0), driver = guess_driver_can_write(dsn), quiet = FALSE) {
invisible(CPL_delete_ogr(dsn, layer, driver, quiet) == 0)
Expand Down
7 changes: 2 additions & 5 deletions R/tidyverse.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ mutate.sf <- function(.data, ..., .dots) {
#' @name tidyverse
#' @examples
#' if (require(dplyr, quietly = TRUE)) {
#' nc %>% transmute(AREA = AREA/10, geometry = geometry) %>% class()
#' nc %>% transmute(AREA = AREA/10) %>% class()
#' }
transmute.sf <- function(.data, ..., .dots) {
Expand All @@ -144,9 +143,7 @@ transmute.sf <- function(.data, ..., .dots) {
#' @examples
#' if (require(dplyr, quietly = TRUE)) {
#' nc %>% select(SID74, SID79) %>% names()
#' nc %>% select(SID74, SID79, geometry) %>% names()
#' nc %>% select(SID74, SID79) %>% class()
#' nc %>% select(SID74, SID79, geometry) %>% class()
#' }
#' @details \code{select} keeps the geometry regardless whether it is selected or not; to deselect it, first pipe through \code{as.data.frame} to let dplyr's own \code{select} drop it.
select.sf <- function(.data, ...) {
Expand Down Expand Up @@ -391,7 +388,7 @@ distinct.sf <- function(.data, ..., .keep_all = FALSE) {
#' @param na.rm see original function docs
#' @param factor_key see original function docs
#' @examples
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE)) {
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE) && "geometry" %in% names(nc)) {
#' nc %>% select(SID74, SID79) %>% gather("VAR", "SID", -geometry) %>% summary()
#' }
gather.sf <- function(data, key, value, ..., na.rm = FALSE, convert = FALSE, factor_key = FALSE) {
Expand Down Expand Up @@ -527,7 +524,7 @@ pivot_wider.sf = function(data,
#' @param fill see original function docs
#' @param drop see original function docs
#' @examples
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE)) {
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE) && "geometry" %in% names(nc)) {
#' nc$row = 1:100 # needed for spread to work
#' nc %>% select(SID74, SID79, geometry, row) %>%
#' gather("VAR", "SID", -geometry, -row) %>%
Expand Down
1 change: 1 addition & 0 deletions R/transform.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ chk_mpol = function(x) {
sanity_check = function(x) {
d = st_dimension(x) # flags empty geoms as NA
if (any(d == 2, na.rm = TRUE)) { # the polygon stuff
x = st_cast(x[d == 2]) # convert GEOMETRY to POLYGON or MULTIPOLYGON, if possible
if (inherits(x, "sfc_POLYGON"))
st_sfc(lapply(x, chk_pol), crs = st_crs(x))
else if (inherits(x, "sfc_MULTIPOLYGON"))
Expand Down
12 changes: 6 additions & 6 deletions man/st_cast.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/st_layers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/st_read.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/st_write.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 2 additions & 5 deletions man/tidyverse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 55f4cc8

Please sign in to comment.