Skip to content

Commit

Permalink
ENH: Add support for dataset read options (#233)
Browse files Browse the repository at this point in the history
  • Loading branch information
brendan-ward authored Apr 2, 2023
1 parent 60cd0e9 commit d371dde
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 61 deletions.
6 changes: 4 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

### Improvements

- Add automatic detection of 2.5D geometries in write_dataframe (#223, #229)
- Add "driver" property to read_info result (#224)
- Add automatic detection of 2.5D geometries in `write_dataframe` (#223, #229)
- Add "driver" property to `read_info` result (#224)
- Add support for dataset open options to `read`, `read_dataframe`, and
`read_info` (#233)

## 0.5.1 (2023-01-26)

Expand Down
153 changes: 96 additions & 57 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,29 @@ cdef int commit_transaction(OGRDataSourceH ogr_dataset) except 1:
# return 0


# ported from fiona::_shim22.pyx::gdal_open_vector
cdef void* ogr_open(const char* path_c, int mode, options) except NULL:
cdef char** dict_to_options(object values):
"""Convert a python dictionary into name / value pairs (stored in a char**)
Parameters
----------
values: dict
Returns
-------
char**
"""
cdef char **options = NULL

for k, v in values.items():
k = k.encode('UTF-8')
v = v.encode('UTF-8')
options = CSLAddNameValue(options, <const char *>k, <const char *>v)

return options


cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL:
cdef void* ogr_dataset = NULL
cdef char **ogr_drivers = NULL
cdef void* ogr_driver = NULL
cdef char **open_opts = NULL

# Force linear approximations in all cases
OGRSetNonLinearGeometriesEnabledFlag(0)
Expand All @@ -125,12 +142,14 @@ cdef void* ogr_open(const char* path_c, int mode, options) except NULL:
else:
flags |= GDAL_OF_READONLY

open_opts = CSLAddNameValue(open_opts, "VALIDATE_OPEN_OPTIONS", "NO")

try:
# WARNING: GDAL logs warnings about invalid open options to stderr
# instead of raising an error
ogr_dataset = exc_wrap_pointer(
GDALOpenEx(path_c, flags, <const char *const *>ogr_drivers, <const char *const *>open_opts, NULL)
GDALOpenEx(path_c, flags, NULL, <const char *const *>options, NULL)
)

return ogr_dataset

except NullPointerError:
Expand All @@ -139,10 +158,6 @@ cdef void* ogr_open(const char* path_c, int mode, options) except NULL:
except CPLE_BaseError as exc:
raise DataSourceError(str(exc))

finally:
CSLDestroy(ogr_drivers)
CSLDestroy(open_opts)


cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL:
"""Open OGR layer by index or name.
Expand Down Expand Up @@ -266,7 +281,7 @@ cdef str get_crs(OGRLayerH ogr_layer):

cdef get_driver(OGRDataSourceH ogr_dataset):
"""Get the driver for a dataset.
Parameters
----------
ogr_dataset : pointer to open OGR dataset
Expand Down Expand Up @@ -858,6 +873,7 @@ cdef get_bounds(

def ogr_read(
str path,
object dataset_kwargs,
object layer=None,
object encoding=None,
int read_geometry=True,
Expand All @@ -870,11 +886,11 @@ def ogr_read(
object fids=None,
str sql=None,
str sql_dialect=None,
int return_fids=False,
**kwargs):
int return_fids=False):

cdef int err = 0
cdef const char *path_c = NULL
cdef char **dataset_options = NULL
cdef const char *where_c = NULL
cdef const char *field_c = NULL
cdef char **fields_c = NULL
Expand All @@ -897,8 +913,10 @@ def ogr_read(
if sql is not None and layer is not None:
raise ValueError("'sql' paramater cannot be combined with 'layer'")

ogr_dataset = ogr_open(path_c, 0, kwargs)
try:
dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_open(path_c, 0, dataset_options)

if sql is None:
# layer defaults to index 0
if layer is None:
Expand Down Expand Up @@ -991,6 +1009,10 @@ def ogr_read(
}

finally:
if dataset_options != NULL:
CSLDestroy(dataset_options)
dataset_options = NULL

if ogr_dataset != NULL:
if sql is not None:
GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
Expand All @@ -1008,6 +1030,7 @@ def ogr_read(

def ogr_read_arrow(
str path,
dataset_kwargs,
object layer=None,
object encoding=None,
int read_geometry=True,
Expand All @@ -1020,11 +1043,11 @@ def ogr_read_arrow(
object fids=None,
str sql=None,
str sql_dialect=None,
int return_fids=False,
**kwargs):
int return_fids=False):

cdef int err = 0
cdef const char *path_c = NULL
cdef char **dataset_options = NULL
cdef const char *where_c = NULL
cdef OGRDataSourceH ogr_dataset = NULL
cdef OGRLayerH ogr_layer = NULL
Expand All @@ -1051,8 +1074,10 @@ def ogr_read_arrow(
if sql is not None and layer is not None:
raise ValueError("'sql' paramater cannot be combined with 'layer'")

ogr_dataset = ogr_open(path_c, 0, kwargs)
try:
dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_open(path_c, 0, dataset_options)

if sql is None:
# layer defaults to index 0
if layer is None:
Expand Down Expand Up @@ -1128,10 +1153,16 @@ def ogr_read_arrow(
}

finally:

CSLDestroy(options)
if fields_c != NULL:
CSLDestroy(fields_c)
fields_c = NULL

if dataset_options != NULL:
CSLDestroy(dataset_options)
dataset_options = NULL

if ogr_dataset != NULL:
if sql is not None:
GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
Expand All @@ -1152,8 +1183,7 @@ def ogr_read_bounds(
int skip_features=0,
int max_features=0,
object where=None,
tuple bbox=None,
**kwargs):
tuple bbox=None):

cdef int err = 0
cdef const char *path_c = NULL
Expand All @@ -1170,7 +1200,7 @@ def ogr_read_bounds(
if layer is None:
layer = 0

ogr_dataset = ogr_open(path_c, 0, kwargs)
ogr_dataset = ogr_open(path_c, 0, NULL)
ogr_layer = get_ogr_layer(ogr_dataset, layer)

# Apply the attribute filter
Expand All @@ -1187,8 +1217,14 @@ def ogr_read_bounds(
return get_bounds(ogr_layer, skip_features, num_features)


def ogr_read_info(str path, object layer=None, object encoding=None, **kwargs):
def ogr_read_info(
str path,
dataset_kwargs,
object layer=None,
object encoding=None):

cdef const char *path_c = NULL
cdef char **dataset_options = NULL
cdef OGRDataSourceH ogr_dataset = NULL
cdef OGRLayerH ogr_layer = NULL

Expand All @@ -1199,37 +1235,44 @@ def ogr_read_info(str path, object layer=None, object encoding=None, **kwargs):
if layer is None:
layer = 0

ogr_dataset = ogr_open(path_c, 0, kwargs)
ogr_layer = get_ogr_layer(ogr_dataset, layer)
try:
dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_open(path_c, 0, dataset_options)
ogr_layer = get_ogr_layer(ogr_dataset, layer)

# Encoding is derived from the user, from the dataset capabilities / type,
# or from the system locale
encoding = (
encoding
or detect_encoding(ogr_dataset, ogr_layer)
or locale.getpreferredencoding()
)
# Encoding is derived from the user, from the dataset capabilities / type,
# or from the system locale
encoding = (
encoding
or detect_encoding(ogr_dataset, ogr_layer)
or locale.getpreferredencoding()
)

fields = get_fields(ogr_layer, encoding)

fields = get_fields(ogr_layer, encoding)

meta = {
'crs': get_crs(ogr_layer),
'encoding': encoding,
'fields': fields[:,2], # return only names
'dtypes': fields[:,3],
'geometry_type': get_geometry_type(ogr_layer),
'features': OGR_L_GetFeatureCount(ogr_layer, 1),
'driver': get_driver(ogr_dataset),
"capabilities": {
"random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead),
"fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex),
"fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter),
meta = {
'crs': get_crs(ogr_layer),
'encoding': encoding,
'fields': fields[:,2], # return only names
'dtypes': fields[:,3],
'geometry_type': get_geometry_type(ogr_layer),
'features': OGR_L_GetFeatureCount(ogr_layer, 1),
'driver': get_driver(ogr_dataset),
"capabilities": {
"random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead),
"fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex),
"fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter),
}
}
}

if ogr_dataset != NULL:
GDALClose(ogr_dataset)
ogr_dataset = NULL
finally:
if dataset_options != NULL:
CSLDestroy(dataset_options)
dataset_options = NULL

if ogr_dataset != NULL:
GDALClose(ogr_dataset)
ogr_dataset = NULL

return meta

Expand All @@ -1243,7 +1286,7 @@ def ogr_list_layers(str path):
path_b = path.encode('utf-8')
path_c = path_b

ogr_dataset = ogr_open(path_c, 0, None)
ogr_dataset = ogr_open(path_c, 0, NULL)

layer_count = GDALDatasetGetLayerCount(ogr_dataset)

Expand Down Expand Up @@ -1419,7 +1462,7 @@ def ogr_write(
layer_exists = False
if os.path.exists(path):
try:
ogr_dataset = ogr_open(path_c, 1, None)
ogr_dataset = ogr_open(path_c, 1, NULL)

for i in range(GDALDatasetGetLayerCount(ogr_dataset)):
name = OGR_L_GetName(GDALDatasetGetLayer(ogr_dataset, i))
Expand All @@ -1444,11 +1487,7 @@ def ogr_write(

# either it didn't exist or could not open it in write mode
if ogr_dataset == NULL:
for k, v in dataset_kwargs.items():
k = k.encode('UTF-8')
v = v.encode('UTF-8')
dataset_options = CSLAddNameValue(dataset_options, <const char *>k, <const char *>v)

dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_create(path_c, driver_c, dataset_options)

# if we are not appending to an existing layer, we need to create
Expand All @@ -1466,7 +1505,7 @@ def ogr_write(
OGRReleaseDataSource(ogr_dataset)
ogr_dataset = NULL
if dataset_options != NULL:
CSLDestroy(<char**>dataset_options)
CSLDestroy(dataset_options)
dataset_options = NULL
raise exc

Expand Down
12 changes: 10 additions & 2 deletions pyogrio/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pyogrio._env import GDALEnv
from pyogrio.raw import _preprocess_options_key_value
from pyogrio.util import get_vsi_path


Expand Down Expand Up @@ -145,7 +146,7 @@ def read_bounds(
return result


def read_info(path_or_buffer, /, layer=None, encoding=None):
def read_info(path_or_buffer, /, layer=None, encoding=None, **kwargs):
"""Read information about an OGR data source.
``crs`` and ``geometry`` will be ``None`` and ``features`` will be 0 for a
Expand All @@ -160,6 +161,9 @@ def read_info(path_or_buffer, /, layer=None, encoding=None):
If present, will be used as the encoding for reading string values from
the data source, unless encoding can be inferred directly from the data
source.
**kwargs
Additional driver-specific dataset open options passed to OGR. Invalid
options are logged by OGR to stderr and are not captured.
Returns
-------
Expand All @@ -178,8 +182,12 @@ def read_info(path_or_buffer, /, layer=None, encoding=None):
"""
path, buffer = get_vsi_path(path_or_buffer)

dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}

try:
result = ogr_read_info(path, layer=layer, encoding=encoding)
result = ogr_read_info(
path, layer=layer, encoding=encoding, dataset_kwargs=dataset_kwargs
)
finally:
if buffer is not None:
remove_virtual_file(path)
Expand Down
5 changes: 5 additions & 0 deletions pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def read_dataframe(
sql_dialect=None,
fid_as_index=False,
use_arrow=False,
**kwargs,
):
"""Read from an OGR data source to a GeoPandas GeoDataFrame or Pandas DataFrame.
If the data source does not have a geometry column or ``read_geometry`` is False,
Expand Down Expand Up @@ -121,6 +122,9 @@ def read_dataframe(
Whether to use Arrow as the transfer mechanism of the read data
from GDAL to Python (requires GDAL >= 3.6 and `pyarrow` to be
installed). When enabled, this provides a further speed-up.
**kwargs
Additional driver-specific dataset open options passed to OGR. Invalid
options are logged by OGR to stderr and are not captured.
Returns
-------
Expand Down Expand Up @@ -157,6 +161,7 @@ def read_dataframe(
sql=sql,
sql_dialect=sql_dialect,
return_fids=fid_as_index,
**kwargs,
)

if use_arrow:
Expand Down
Loading

0 comments on commit d371dde

Please sign in to comment.