Skip to content

Commit

Permalink
Merge branch 'paleolimbot-use-promote-multi'
Browse files Browse the repository at this point in the history
  • Loading branch information
edzer committed Oct 30, 2023
2 parents 538ebc0 + 3fb8cd7 commit 1bc3af4
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 37 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ Suggests:
tidyselect (>= 1.0.0),
tmap (>= 2.0),
vctrs,
wk
wk (>= 0.9.0)
LinkingTo:
Rcpp
VignetteBuilder:
Expand Down
61 changes: 34 additions & 27 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -218,44 +218,49 @@ default_st_read_use_stream = function() {
)
}

process_cpl_read_ogr_stream = function(x, default_crs, num_features, fid_column_name,
crs = NULL, ...) {
is_geometry_column = vapply(
process_cpl_read_ogr_stream = function(x, geom_column_info, num_features, fid_column_name,
crs = NULL, promote_to_multi = TRUE, ...) {
is_geometry_column = vapply(
x$get_schema()$children,
function(s) identical(s$metadata[["ARROW:extension:name"]], "ogc.wkb"),
logical(1)
)

crs = if (is.null(crs)) st_crs(default_crs) else st_crs(crs)
if (num_features == -1) {

geom_column_info$index = which(is_geometry_column)

if (num_features == -1) {
num_features = NULL
}

# Suppress warnings about extension type conversion (since we want the
# default behaviour of converting the storage type)
df = suppressWarnings(nanoarrow::convert_array_stream(x, size = num_features))

df[is_geometry_column] = lapply(df[is_geometry_column], function(x) {
class(x) <- "WKB"
x <- st_as_sfc(x)
st_set_crs(x, crs)
})

# # Prefer "geometry" as the geometry column name
# if (any(is_geometry_column) && !("geometry" %in% names(df))) {
# names(df)[which(is_geometry_column)[1]] = "geometry"
# }

for (i in seq_len(nrow(geom_column_info))) {
crs = if (is.null(crs)) st_crs(geom_column_info$crs[[i]]) else st_crs(crs)
name = geom_column_info$name[[i]]
index = geom_column_info$index[[i]]

column_wkb = df[[index]]
attributes(column_wkb) = NULL
column_sfc = wk::wk_handle(
wk::new_wk_wkb(column_wkb),
wk::sfc_writer(promote_multi = promote_to_multi)
)

df[[index]] = st_set_crs(column_sfc, crs)
names(df)[index] = name
}

# Rename OGC_FID to fid_column_name and move to end
if (length(fid_column_name) == 1 && "OGC_FID" %in% names(df)) {
df <- df[c(setdiff(names(df), "OGC_FID"), "OGC_FID")]
df = df[c(setdiff(names(df), "OGC_FID"), "OGC_FID")]
names(df)[names(df) == "OGC_FID"] = fid_column_name
}

# Move geometry to the end
# if ("geometry" %in% names(df)) {
# df <- df[c(setdiff(names(df), "geometry"), "geometry")]
# }
gc1 = which(is_geometry_column)[1]
df = df[c(setdiff(seq_along(df), gc1), gc1)]

# All geometry columns to the end
df = df[c(setdiff(seq_along(df), geom_column_info$index), geom_column_info$index)]

process_cpl_read_ogr(df, ...)
}

Expand Down Expand Up @@ -296,8 +301,10 @@ st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet
stream = nanoarrow::nanoarrow_allocate_array_stream()
info = CPL_read_gdal_stream(stream, dsn, layer, query, as.character(options), quiet,
drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column_name, getOption("width"))
process_cpl_read_ogr_stream(stream, default_crs = info[[1]], num_features = info[[2]],
fid_column_name = fid_column_name, stringsAsFactors = stringsAsFactors, quiet = quiet, ...)
geom_column_info = data.frame(name = info[[1]], crs = info[[2]], stringsAsFactors = FALSE)
process_cpl_read_ogr_stream(stream, geom_column_info, num_features = info[[3]],
fid_column_name = fid_column_name, stringsAsFactors = stringsAsFactors, quiet = quiet,
promote_to_multi = promote_to_multi, ...)
} else {
x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
Expand Down
39 changes: 30 additions & 9 deletions src/gdal_read_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,32 @@ Rcpp::List CPL_read_gdal_stream(
auto stream_out = reinterpret_cast<struct ArrowArrayStream*>(
R_ExternalPtrAddr(stream_xptr));

OGRSpatialReference* crs = poLayer->GetSpatialRef();

Rcpp::String wkt_str = NA_STRING;
if (crs != nullptr) {
char* wkt_out;
crs->exportToWkt(&wkt_out);
wkt_str = wkt_out;
CPLFree(wkt_out);
OGRFeatureDefn* poFDefn = poLayer->GetLayerDefn();
Rcpp::CharacterVector geom_field_name(poFDefn->GetGeomFieldCount());
Rcpp::CharacterVector geom_field_crs(poFDefn->GetGeomFieldCount());

for (int i = 0; i < poFDefn->GetGeomFieldCount(); i++) {
OGRGeomFieldDefn* poGFDefn = poFDefn->GetGeomFieldDefn(i);
if (poGFDefn == nullptr) {
Rcpp::stop("GeomFieldDefn error"); // #nocov
}

const char* name = poGFDefn->GetNameRef();
if (strlen(name) == 0) {
name = "geometry";
}

const OGRSpatialReference* crs = poGFDefn->GetSpatialRef();
Rcpp::String wkt_str = NA_STRING;
if (crs != nullptr) {
char* wkt_out;
crs->exportToWkt(&wkt_out);
wkt_str = wkt_out;
CPLFree(wkt_out);
}

geom_field_name[i] = name;
geom_field_crs[i] = wkt_str;
}

struct ArrowArrayStream stream_temp;
Expand All @@ -122,7 +140,10 @@ Rcpp::List CPL_read_gdal_stream(
num_features = -1;
}

return Rcpp::List::create(wkt_str, Rcpp::NumericVector::create(num_features));
return Rcpp::List::create(
geom_field_name,
geom_field_crs,
Rcpp::NumericVector::create(num_features));

#else

Expand Down

0 comments on commit 1bc3af4

Please sign in to comment.