Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add support for dataset read options #233

Merged
merged 2 commits into from
Apr 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

### Improvements

- Add automatic detection of 2.5D geometries in write_dataframe (#223, #229)
- Add "driver" property to read_info result (#224)
- Add automatic detection of 2.5D geometries in `write_dataframe` (#223, #229)
- Add "driver" property to `read_info` result (#224)
- Add support for dataset open options to `read`, `read_dataframe`, and
`read_info` (#233)

## 0.5.1 (2023-01-26)

Expand Down
153 changes: 96 additions & 57 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,29 @@ cdef int commit_transaction(OGRDataSourceH ogr_dataset) except 1:
# return 0


# ported from fiona::_shim22.pyx::gdal_open_vector
cdef void* ogr_open(const char* path_c, int mode, options) except NULL:
cdef char** dict_to_options(object values):
"""Convert a python dictionary into name / value pairs (stored in a char**)

Parameters
----------
values: dict

Returns
-------
char**
"""
cdef char **options = NULL

for k, v in values.items():
k = k.encode('UTF-8')
v = v.encode('UTF-8')
options = CSLAddNameValue(options, <const char *>k, <const char *>v)

return options


cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL:
cdef void* ogr_dataset = NULL
cdef char **ogr_drivers = NULL
cdef void* ogr_driver = NULL
cdef char **open_opts = NULL

# Force linear approximations in all cases
OGRSetNonLinearGeometriesEnabledFlag(0)
Expand All @@ -125,12 +142,14 @@ cdef void* ogr_open(const char* path_c, int mode, options) except NULL:
else:
flags |= GDAL_OF_READONLY

open_opts = CSLAddNameValue(open_opts, "VALIDATE_OPEN_OPTIONS", "NO")

try:
# WARNING: GDAL logs warnings about invalid open options to stderr
# instead of raising an error
ogr_dataset = exc_wrap_pointer(
GDALOpenEx(path_c, flags, <const char *const *>ogr_drivers, <const char *const *>open_opts, NULL)
GDALOpenEx(path_c, flags, NULL, <const char *const *>options, NULL)
)

return ogr_dataset

except NullPointerError:
Expand All @@ -139,10 +158,6 @@ cdef void* ogr_open(const char* path_c, int mode, options) except NULL:
except CPLE_BaseError as exc:
raise DataSourceError(str(exc))

finally:
CSLDestroy(ogr_drivers)
CSLDestroy(open_opts)


cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL:
"""Open OGR layer by index or name.
Expand Down Expand Up @@ -266,7 +281,7 @@ cdef str get_crs(OGRLayerH ogr_layer):

cdef get_driver(OGRDataSourceH ogr_dataset):
"""Get the driver for a dataset.

Parameters
----------
ogr_dataset : pointer to open OGR dataset
Expand Down Expand Up @@ -858,6 +873,7 @@ cdef get_bounds(

def ogr_read(
str path,
object dataset_kwargs,
object layer=None,
object encoding=None,
int read_geometry=True,
Expand All @@ -870,11 +886,11 @@ def ogr_read(
object fids=None,
str sql=None,
str sql_dialect=None,
int return_fids=False,
**kwargs):
int return_fids=False):

cdef int err = 0
cdef const char *path_c = NULL
cdef char **dataset_options = NULL
cdef const char *where_c = NULL
cdef const char *field_c = NULL
cdef char **fields_c = NULL
Expand All @@ -897,8 +913,10 @@ def ogr_read(
if sql is not None and layer is not None:
raise ValueError("'sql' paramater cannot be combined with 'layer'")

ogr_dataset = ogr_open(path_c, 0, kwargs)
try:
dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_open(path_c, 0, dataset_options)

if sql is None:
# layer defaults to index 0
if layer is None:
Expand Down Expand Up @@ -991,6 +1009,10 @@ def ogr_read(
}

finally:
if dataset_options != NULL:
CSLDestroy(dataset_options)
dataset_options = NULL

if ogr_dataset != NULL:
if sql is not None:
GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
Expand All @@ -1008,6 +1030,7 @@ def ogr_read(

def ogr_read_arrow(
str path,
dataset_kwargs,
object layer=None,
object encoding=None,
int read_geometry=True,
Expand All @@ -1020,11 +1043,11 @@ def ogr_read_arrow(
object fids=None,
str sql=None,
str sql_dialect=None,
int return_fids=False,
**kwargs):
int return_fids=False):

cdef int err = 0
cdef const char *path_c = NULL
cdef char **dataset_options = NULL
cdef const char *where_c = NULL
cdef OGRDataSourceH ogr_dataset = NULL
cdef OGRLayerH ogr_layer = NULL
Expand All @@ -1051,8 +1074,10 @@ def ogr_read_arrow(
if sql is not None and layer is not None:
raise ValueError("'sql' paramater cannot be combined with 'layer'")

ogr_dataset = ogr_open(path_c, 0, kwargs)
try:
dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_open(path_c, 0, dataset_options)

if sql is None:
# layer defaults to index 0
if layer is None:
Expand Down Expand Up @@ -1128,10 +1153,16 @@ def ogr_read_arrow(
}

finally:

CSLDestroy(options)
if fields_c != NULL:
CSLDestroy(fields_c)
fields_c = NULL

if dataset_options != NULL:
CSLDestroy(dataset_options)
dataset_options = NULL

if ogr_dataset != NULL:
if sql is not None:
GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
Expand All @@ -1152,8 +1183,7 @@ def ogr_read_bounds(
int skip_features=0,
int max_features=0,
object where=None,
tuple bbox=None,
**kwargs):
tuple bbox=None):

cdef int err = 0
cdef const char *path_c = NULL
Expand All @@ -1170,7 +1200,7 @@ def ogr_read_bounds(
if layer is None:
layer = 0

ogr_dataset = ogr_open(path_c, 0, kwargs)
ogr_dataset = ogr_open(path_c, 0, NULL)
ogr_layer = get_ogr_layer(ogr_dataset, layer)

# Apply the attribute filter
Expand All @@ -1187,8 +1217,14 @@ def ogr_read_bounds(
return get_bounds(ogr_layer, skip_features, num_features)


def ogr_read_info(str path, object layer=None, object encoding=None, **kwargs):
def ogr_read_info(
str path,
dataset_kwargs,
object layer=None,
object encoding=None):

cdef const char *path_c = NULL
cdef char **dataset_options = NULL
cdef OGRDataSourceH ogr_dataset = NULL
cdef OGRLayerH ogr_layer = NULL

Expand All @@ -1199,37 +1235,44 @@ def ogr_read_info(str path, object layer=None, object encoding=None, **kwargs):
if layer is None:
layer = 0

ogr_dataset = ogr_open(path_c, 0, kwargs)
ogr_layer = get_ogr_layer(ogr_dataset, layer)
try:
dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_open(path_c, 0, dataset_options)
ogr_layer = get_ogr_layer(ogr_dataset, layer)

# Encoding is derived from the user, from the dataset capabilities / type,
# or from the system locale
encoding = (
encoding
or detect_encoding(ogr_dataset, ogr_layer)
or locale.getpreferredencoding()
)
# Encoding is derived from the user, from the dataset capabilities / type,
# or from the system locale
encoding = (
encoding
or detect_encoding(ogr_dataset, ogr_layer)
or locale.getpreferredencoding()
)

fields = get_fields(ogr_layer, encoding)

fields = get_fields(ogr_layer, encoding)

meta = {
'crs': get_crs(ogr_layer),
'encoding': encoding,
'fields': fields[:,2], # return only names
'dtypes': fields[:,3],
'geometry_type': get_geometry_type(ogr_layer),
'features': OGR_L_GetFeatureCount(ogr_layer, 1),
'driver': get_driver(ogr_dataset),
"capabilities": {
"random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead),
"fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex),
"fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter),
meta = {
'crs': get_crs(ogr_layer),
'encoding': encoding,
'fields': fields[:,2], # return only names
'dtypes': fields[:,3],
'geometry_type': get_geometry_type(ogr_layer),
'features': OGR_L_GetFeatureCount(ogr_layer, 1),
'driver': get_driver(ogr_dataset),
"capabilities": {
"random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead),
"fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex),
"fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter),
}
}
}

if ogr_dataset != NULL:
GDALClose(ogr_dataset)
ogr_dataset = NULL
finally:
if dataset_options != NULL:
CSLDestroy(dataset_options)
dataset_options = NULL

if ogr_dataset != NULL:
GDALClose(ogr_dataset)
ogr_dataset = NULL

return meta

Expand All @@ -1243,7 +1286,7 @@ def ogr_list_layers(str path):
path_b = path.encode('utf-8')
path_c = path_b

ogr_dataset = ogr_open(path_c, 0, None)
ogr_dataset = ogr_open(path_c, 0, NULL)

layer_count = GDALDatasetGetLayerCount(ogr_dataset)

Expand Down Expand Up @@ -1419,7 +1462,7 @@ def ogr_write(
layer_exists = False
if os.path.exists(path):
try:
ogr_dataset = ogr_open(path_c, 1, None)
ogr_dataset = ogr_open(path_c, 1, NULL)

for i in range(GDALDatasetGetLayerCount(ogr_dataset)):
name = OGR_L_GetName(GDALDatasetGetLayer(ogr_dataset, i))
Expand All @@ -1444,11 +1487,7 @@ def ogr_write(

# either it didn't exist or could not open it in write mode
if ogr_dataset == NULL:
for k, v in dataset_kwargs.items():
k = k.encode('UTF-8')
v = v.encode('UTF-8')
dataset_options = CSLAddNameValue(dataset_options, <const char *>k, <const char *>v)

dataset_options = dict_to_options(dataset_kwargs)
ogr_dataset = ogr_create(path_c, driver_c, dataset_options)

# if we are not appending to an existing layer, we need to create
Expand All @@ -1466,7 +1505,7 @@ def ogr_write(
OGRReleaseDataSource(ogr_dataset)
ogr_dataset = NULL
if dataset_options != NULL:
CSLDestroy(<char**>dataset_options)
CSLDestroy(dataset_options)
dataset_options = NULL
raise exc

Expand Down
12 changes: 10 additions & 2 deletions pyogrio/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pyogrio._env import GDALEnv
from pyogrio.raw import _preprocess_options_key_value
from pyogrio.util import get_vsi_path


Expand Down Expand Up @@ -145,7 +146,7 @@ def read_bounds(
return result


def read_info(path_or_buffer, /, layer=None, encoding=None):
def read_info(path_or_buffer, /, layer=None, encoding=None, **kwargs):
"""Read information about an OGR data source.

``crs`` and ``geometry`` will be ``None`` and ``features`` will be 0 for a
Expand All @@ -160,6 +161,9 @@ def read_info(path_or_buffer, /, layer=None, encoding=None):
If present, will be used as the encoding for reading string values from
the data source, unless encoding can be inferred directly from the data
source.
**kwargs
Additional driver-specific dataset open options passed to OGR. Invalid
options are logged by OGR to stderr and are not captured.

Returns
-------
Expand All @@ -178,8 +182,12 @@ def read_info(path_or_buffer, /, layer=None, encoding=None):
"""
path, buffer = get_vsi_path(path_or_buffer)

dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}

try:
result = ogr_read_info(path, layer=layer, encoding=encoding)
result = ogr_read_info(
path, layer=layer, encoding=encoding, dataset_kwargs=dataset_kwargs
)
finally:
if buffer is not None:
remove_virtual_file(path)
Expand Down
5 changes: 5 additions & 0 deletions pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def read_dataframe(
sql_dialect=None,
fid_as_index=False,
use_arrow=False,
**kwargs,
):
"""Read from an OGR data source to a GeoPandas GeoDataFrame or Pandas DataFrame.
If the data source does not have a geometry column or ``read_geometry`` is False,
Expand Down Expand Up @@ -121,6 +122,9 @@ def read_dataframe(
Whether to use Arrow as the transfer mechanism of the read data
from GDAL to Python (requires GDAL >= 3.6 and `pyarrow` to be
installed). When enabled, this provides a further speed-up.
**kwargs
Additional driver-specific dataset open options passed to OGR. Invalid
options are logged by OGR to stderr and are not captured.

Returns
-------
Expand Down Expand Up @@ -157,6 +161,7 @@ def read_dataframe(
sql=sql,
sql_dialect=sql_dialect,
return_fids=fid_as_index,
**kwargs,
)

if use_arrow:
Expand Down
Loading