Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental support for GDAL 3.6 columnar API #2036

Merged
merged 22 commits into from
Oct 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ Imports:
utils
Suggests:
blob,
nanoarrow,
covr,
dplyr (>= 0.8-3),
ggplot2,
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ CPL_read_ogr <- function(datasource, layer, query, options, quiet, toTypeUser, f
.Call(`_sf_CPL_read_ogr`, datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width)
}

CPL_read_gdal_stream <- function(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width) {
.Call(`_sf_CPL_read_gdal_stream`, stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width)
}

CPL_gdalinfo <- function(obj, options, oo, co) {
.Call(`_sf_CPL_gdalinfo`, obj, options, oo, co)
}
Expand Down
94 changes: 80 additions & 14 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ set_utf8 = function(x) {
#' of LineString and MultiLineString, or of Polygon and MultiPolygon, convert
#' all to the Multi variety; defaults to \code{TRUE}
#' @param stringsAsFactors logical; logical: should character vectors be
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
#' \code{FALSE}, for \code{st_read} and R version < 4.1.0 equal to
#' \code{default.stringsAsFactors()}
#' @param int64_as_string logical; if TRUE, Int64 attributes are returned as
Expand Down Expand Up @@ -146,7 +146,7 @@ st_read.default = function(dsn, layer, ...) {
}

process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
geometry_column = 1, as_tibble = FALSE, optional = FALSE) {

which.geom = which(vapply(x, function(f) inherits(f, "sfc"), TRUE))
Expand All @@ -156,7 +156,7 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,

# in case no geometry is present:
if (length(which.geom) == 0) {
if (! quiet)
if (! quiet)
warning("no simple feature geometries present: returning a data.frame or tbl_df", call. = FALSE)
x = if (!as_tibble) {
if (any(sapply(x, is.list)))
Expand Down Expand Up @@ -192,8 +192,13 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
for (i in seq_along(lc.other))
x[[ nm.lc[i] ]] = list.cols[[i]]

for (i in seq_along(geom))
x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox
for (i in seq_along(geom)) {
if (is.null(attr(geom[[i]], "bbox"))) {
x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox
} else {
x[[ nm[i] ]] = geom[[i]]
}
}

x = st_as_sf(x, ...,
sf_column_name = if (is.character(geometry_column)) geometry_column else nm[geometry_column],
Expand All @@ -204,20 +209,70 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
x
}

# Allow setting the default to TRUE to make it easier to run existing tests
# of st_read() through the stream interface
default_st_read_use_stream = function() {
getOption(
"sf.st_read_use_stream",
identical(Sys.getenv("R_SF_ST_READ_USE_STREAM"), "true")
)
}

process_cpl_read_ogr_stream = function(x, default_crs, num_features, fid_column_name,
crs = NULL, ...) {
is_geometry_column = vapply(
x$get_schema()$children,
function(s) identical(s$metadata[["ARROW:extension:name"]], "ogc.wkb"),
logical(1)
)

crs = if (is.null(crs)) st_crs(default_crs) else st_crs(crs)
if (num_features == -1) {
num_features = NULL
}
df = suppressWarnings(nanoarrow::convert_array_stream(x, size = num_features))

df[is_geometry_column] = lapply(df[is_geometry_column], function(x) {
class(x) <- "WKB"
x <- st_as_sfc(x)
st_set_crs(x, crs)
})

# Prefer "geometry" as the geometry column name
if (any(is_geometry_column) && !("geometry" %in% names(df))) {
names(df)[which(is_geometry_column)[1]] = "geometry"
}

# Rename OGC_FID to fid_column_name and move to end
if (length(fid_column_name) == 1 && "OGC_FID" %in% names(df)) {
df <- df[c(setdiff(names(df), "OGC_FID"), "OGC_FID")]
names(df)[names(df) == "OGC_FID"] = fid_column_name
}

# Move geometry to the end
if ("geometry" %in% names(df)) {
df <- df[c(setdiff(names(df), "geometry"), "geometry")]
}

process_cpl_read_ogr(df, ...)
}

#' @name st_read
#' @param fid_column_name character; name of column to write feature IDs to; defaults to not doing this
#' @param drivers character; limited set of driver short names to be tried (default: try all)
#' @param wkt_filter character; WKT representation of a spatial filter (may be used as bounding box, selecting overlapping geometries); see examples
#' @param optional logical; passed to \link[base]{as.data.frame}; always \code{TRUE} when \code{as_tibble} is \code{TRUE}
#' @param use_stream Use TRUE to use the experimental columnar interface introduced in GDAL 3.6.
#' @note The use of \code{system.file} in examples make sure that examples run regardless where R is installed:
#' typical users will not use \code{system.file} but give the file name directly, either with full path or relative
#' to the current working directory (see \link{getwd}). "Shapefiles" consist of several files with the same basename
#' that reside in the same directory, only one of them having extension \code{.shp}.
#' @export
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
type = 0, promote_to_multi = TRUE, stringsAsFactors = sf_stringsAsFactors(),
int64_as_string = FALSE, check_ring_dir = FALSE, fid_column_name = character(0),
drivers = character(0), wkt_filter = character(0), optional = FALSE) {
drivers = character(0), wkt_filter = character(0), optional = FALSE,
use_stream = default_st_read_use_stream()) {

layer = if (missing(layer))
character(0)
Expand All @@ -233,11 +288,22 @@ st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet
if (length(promote_to_multi) > 1)
stop("`promote_to_multi' should have length one, and applies to all geometry columns")

x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
optional = optional, ...)


if (use_stream) {
stream = nanoarrow::nanoarrow_allocate_array_stream()
info = CPL_read_gdal_stream(stream, dsn, layer, query, as.character(options), quiet,
drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column_name, getOption("width"))
process_cpl_read_ogr_stream(stream, default_crs = info[[1]], num_features = info[[2]],
fid_column_name = fid_column_name, stringsAsFactors = stringsAsFactors, quiet = quiet, ...)
} else {
x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))

process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
optional = optional, ...)
}
}

#' @name st_read
Expand Down Expand Up @@ -606,7 +672,7 @@ print.sf_layers = function(x, ...) {
#' @param options character; driver dependent dataset open options, multiple options supported.
#' @param do_count logical; if TRUE, count the features by reading them, even if their count is not reported by the driver
#' @name st_layers
#' @return list object of class \code{sf_layers} with elements
#' @return list object of class \code{sf_layers} with elements
#' \describe{
#' \item{name}{name of the layer}
#' \item{geomtype}{list with for each layer the geometry types}
Expand Down Expand Up @@ -751,7 +817,7 @@ check_append_delete <- function(append, delete) {

#' @name st_write
#' @export
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
#' omitted; it returns TRUE on success, FALSE on failure, invisibly.
st_delete = function(dsn, layer = character(0), driver = guess_driver_can_write(dsn), quiet = FALSE) {
invisible(CPL_delete_ogr(dsn, layer, driver, quiet) == 0)
Expand Down
2 changes: 1 addition & 1 deletion man/st_layers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/st_read.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/st_write.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,28 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// CPL_read_gdal_stream
Rcpp::List CPL_read_gdal_stream(Rcpp::RObject stream_xptr, Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, Rcpp::CharacterVector query, Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers, Rcpp::CharacterVector wkt_filter, bool dsn_exists, bool dsn_isdb, Rcpp::CharacterVector fid_column, int width);
RcppExport SEXP _sf_CPL_read_gdal_stream(SEXP stream_xptrSEXP, SEXP datasourceSEXP, SEXP layerSEXP, SEXP querySEXP, SEXP optionsSEXP, SEXP quietSEXP, SEXP driversSEXP, SEXP wkt_filterSEXP, SEXP dsn_existsSEXP, SEXP dsn_isdbSEXP, SEXP fid_columnSEXP, SEXP widthSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::RObject >::type stream_xptr(stream_xptrSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type datasource(datasourceSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type layer(layerSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type query(querySEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type options(optionsSEXP);
Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type drivers(driversSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type wkt_filter(wkt_filterSEXP);
Rcpp::traits::input_parameter< bool >::type dsn_exists(dsn_existsSEXP);
Rcpp::traits::input_parameter< bool >::type dsn_isdb(dsn_isdbSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type fid_column(fid_columnSEXP);
Rcpp::traits::input_parameter< int >::type width(widthSEXP);
rcpp_result_gen = Rcpp::wrap(CPL_read_gdal_stream(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width));
return rcpp_result_gen;
END_RCPP
}
// CPL_gdalinfo
Rcpp::CharacterVector CPL_gdalinfo(Rcpp::CharacterVector obj, Rcpp::CharacterVector options, Rcpp::CharacterVector oo, Rcpp::CharacterVector co);
RcppExport SEXP _sf_CPL_gdalinfo(SEXP objSEXP, SEXP optionsSEXP, SEXP ooSEXP, SEXP coSEXP) {
Expand Down Expand Up @@ -1442,6 +1464,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_sf_CPL_gdal_linestring_sample", (DL_FUNC) &_sf_CPL_gdal_linestring_sample, 2},
{"_sf_CPL_get_layers", (DL_FUNC) &_sf_CPL_get_layers, 3},
{"_sf_CPL_read_ogr", (DL_FUNC) &_sf_CPL_read_ogr, 14},
{"_sf_CPL_read_gdal_stream", (DL_FUNC) &_sf_CPL_read_gdal_stream, 12},
{"_sf_CPL_gdalinfo", (DL_FUNC) &_sf_CPL_gdalinfo, 4},
{"_sf_CPL_ogrinfo", (DL_FUNC) &_sf_CPL_ogrinfo, 4},
{"_sf_CPL_gdaladdo", (DL_FUNC) &_sf_CPL_gdaladdo, 8},
Expand Down
62 changes: 47 additions & 15 deletions src/gdal_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,14 +288,14 @@ Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string,
// POSIXlt: sec min hour mday mon year wday yday isdst ...
Rcpp::List dtlst =
Rcpp::List::create(
Rcpp::_["sec"] = (double) Second,
Rcpp::_["sec"] = (double) Second,
Rcpp::_["min"] = (int) Minute,
Rcpp::_["hour"] = (int) Hour,
Rcpp::_["mday"] = (int) Day,
Rcpp::_["mon"] = (int) Month - 1,
Rcpp::_["year"] = (int) Year - 1900,
Rcpp::_["wday"] = NA_INTEGER,
Rcpp::_["yday"] = NA_INTEGER,
Rcpp::_["wday"] = NA_INTEGER,
Rcpp::_["yday"] = NA_INTEGER,
Rcpp::_["isdst"] = NA_INTEGER,
Rcpp::_["zone"] = tzone,
Rcpp::_["gmtoff"] = NA_INTEGER);
Expand Down Expand Up @@ -502,22 +502,25 @@ Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string,
return out;
}

// [[Rcpp::export]]
Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer,
static void finalize_dataset_xptr(SEXP dataset_xptr) {
GDALDataset *poDS = (GDALDataset*)R_ExternalPtrAddr(dataset_xptr);
if (poDS != nullptr) {
GDALClose(poDS);
}
}

Rcpp::List CPL_ogr_layer_setup(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer,
Rcpp::CharacterVector query,
Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser,
Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers,
Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers,
Rcpp::CharacterVector wkt_filter,
bool promote_to_multi = true, bool int64_as_string = false,
bool dsn_exists = true,
bool dsn_isdb = false,
int width = 80) {

// adapted from the OGR tutorial @ www.gdal.org
bool dsn_exists,
bool dsn_isdb,
int width) {
// adapted from the OGR tutorial @ www.gdal.org
std::vector <char *> open_options = create_options(options, quiet);
std::vector <char *> drivers_v = create_options(drivers, quiet);
GDALDataset *poDS;
poDS = (GDALDataset *) GDALOpenEx( datasource[0], GDAL_OF_VECTOR | GDAL_OF_READONLY,
poDS = (GDALDataset *) GDALOpenEx( datasource[0], GDAL_OF_VECTOR | GDAL_OF_READONLY,
drivers.size() ? drivers_v.data() : NULL, open_options.data(), NULL );
if( poDS == NULL ) {
// could not open dsn
Expand All @@ -533,6 +536,11 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
Rcpp::stop("Cannot open %s; The file doesn't seem to exist.", datasource);
}

// Will close the dataset if some early return/exception prevents GDALClose() from being
// called/allows the result to be accessed by the caller.
Rcpp::RObject dataset_xptr = R_MakeExternalPtr(poDS, R_NilValue, R_NilValue);
R_RegisterCFinalizer(dataset_xptr, &finalize_dataset_xptr);

if (layer.size() == 0 && Rcpp::CharacterVector::is_na(query[0])) { // no layer specified
switch (poDS->GetLayerCount()) {
case 0: { // error:
Expand Down Expand Up @@ -596,7 +604,7 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
Rcpp::Rcout << "Reading layer `" << layer[0] << "' from data source ";
// if (LENGTH(datasource[0]) > (width - (34 + LENGTH(layer[0]))))
Rcpp::String ds(datasource(0));
if (layer.size()) {
if (layer.size()) {
Rcpp::String la(layer(0));
if (strlen(ds.get_cstring()) > (width - (34 + strlen(la.get_cstring()))))
Rcpp::Rcout << std::endl << " ";
Expand All @@ -607,6 +615,29 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
Rcpp::Rcout << "using driver `" << poDS->GetDriverName() << "'" << std::endl; // #nocov
}

// Keeps the dataset external pointer alive as long as the layer external pointer is alive
Rcpp::RObject layer_xptr = R_MakeExternalPtr(poLayer, R_NilValue, dataset_xptr);

return Rcpp::List::create(dataset_xptr, layer_xptr);
}

// [[Rcpp::export]]
Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer,
Rcpp::CharacterVector query,
Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser,
Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers,
Rcpp::CharacterVector wkt_filter,
bool promote_to_multi = true, bool int64_as_string = false,
bool dsn_exists = true,
bool dsn_isdb = false,
int width = 80) {
Rcpp::List prep = CPL_ogr_layer_setup(datasource, layer, query, options,
quiet, drivers,
wkt_filter,
dsn_exists, dsn_isdb, width);
OGRDataSource* poDS = (OGRDataSource*)(R_ExternalPtrAddr(prep[0]));
OGRLayer* poLayer = (OGRLayer*)R_ExternalPtrAddr(prep[1]);

Rcpp::List out = sf_from_ogrlayer(poLayer, quiet, int64_as_string, toTypeUser, fid_column_name,
promote_to_multi);

Expand All @@ -615,5 +646,6 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector
poDS->ReleaseResultSet(poLayer);

GDALClose(poDS);
R_SetExternalPtrAddr(prep[0], nullptr);
return out;
}
12 changes: 11 additions & 1 deletion src/gdal_read.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string,

Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string,
Rcpp::NumericVector toTypeUser, Rcpp::CharacterVector fid_column, bool promote_to_multi);

Rcpp::List CPL_ogr_layer_setup(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer,
Rcpp::CharacterVector query,
Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers,
Rcpp::CharacterVector wkt_filter,
bool dsn_exists,
bool dsn_isdb,
int width);

Rcpp::List CPL_read_gdal(Rcpp::CharacterVector fname, Rcpp::CharacterVector options, Rcpp::CharacterVector driver,
bool read_data, Rcpp::NumericVector NA_value, Rcpp::List RasterIO_parameters);
Loading
Loading