From 8a090dc0bdd359009ad73f9c97bc88d33b8e205b Mon Sep 17 00:00:00 2001 From: theroggy Date: Thu, 14 Apr 2022 16:43:59 +0200 Subject: [PATCH 1/9] Add option to read_dataframe with sql stmt --- pyogrio/_io.pyx | 57 +++++++++++++++++++++-- pyogrio/_ogr.pxd | 6 +++ pyogrio/geopandas.py | 4 ++ pyogrio/raw.py | 4 ++ pyogrio/tests/test_geopandas_io.py | 73 ++++++++++++++++++++++++++++++ 5 files changed, 141 insertions(+), 3 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 59d5fe67..d60f3268 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -171,6 +171,43 @@ cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL: return ogr_layer +cdef OGRLayerH execute_sql(GDALDatasetH ogr_dataset, sql, sql_dialect) except NULL: + """Execute an SQL statement on a dataset. + + Parameters + ---------- + ogr_dataset : pointer to open OGR dataset + sql : str + The sql statement to execute + sql_dialect : str + The sql dialect the sql statement is written in + + Returns + ------- + pointer to OGR layer + """ + cdef OGRLayerH ogr_layer = NULL + + try: + sql_b = sql.encode('utf-8') + sql_c = sql_b + if sql_dialect is not None: + sql_dialect_b = sql_dialect.encode('utf-8') + sql_dialect_c = sql_dialect_b + ogr_layer = exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c)) + else: + ogr_layer = exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)) + + # GDAL does not always raise exception messages in this case + except NullPointerError: + raise DataLayerError(f"Error executing sql '{sql}'") from None + + except CPLE_BaseError as exc: + raise DataLayerError(str(exc)) + + return ogr_layer + + cdef str get_crs(OGRLayerH ogr_layer): """Read CRS from layer as EPSG: if available or WKT. @@ -738,6 +775,8 @@ def ogr_read( object where=None, tuple bbox=None, object fids=None, + object sql=None, + object sql_dialect=None, int return_fids=False, **kwargs): @@ -757,15 +796,24 @@ def ogr_read( layer = 0 if fids is not None: - if where is not None or bbox is not None or skip_features or max_features: + if where is not None or bbox is not None or sql is not None or skip_features or max_features: raise ValueError( - "cannot set both 'fids' and any of 'where', 'bbox', " + "cannot set both 'fids' and any of 'where', 'bbox', 'sql', " "'skip_features' or 'max_features'" ) fids = np.asarray(fids, dtype=np.intc) + if sql is not None: + if where is not None or bbox is not None or fids is not None: # or skip_features or max_features: + raise ValueError( + "cannot set both 'sql' and any of 'where', 'bbox', 'fids'" + ) + ogr_dataset = ogr_open(path_c, 0, kwargs) - ogr_layer = get_ogr_layer(ogr_dataset, layer) + if sql is None: + ogr_layer = get_ogr_layer(ogr_dataset, layer) + else: + ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect) crs = get_crs(ogr_layer) @@ -834,6 +882,9 @@ def ogr_read( } if ogr_dataset != NULL: + # TODO: shouldn't this happen in an exception handler? + if sql is not None: + GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer) GDALClose(ogr_dataset) ogr_dataset = NULL diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index be94c340..bfc4a5f6 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -324,6 +324,12 @@ cdef extern from "gdal.h": int GDALDatasetGetLayerCount(GDALDatasetH ds) OGRLayerH GDALDatasetGetLayer(GDALDatasetH ds, int iLayer) OGRLayerH GDALDatasetGetLayerByName(GDALDatasetH ds, char * pszName) + OGRLayerH GDALDatasetExecuteSQL( + GDALDatasetH ds, + const char* pszStatement, + OGRGeometryH hSpatialFilter, + const char* pszDialect) + void GDALDatasetReleaseResultSet(GDALDatasetH, OGRLayerH) OGRErr GDALDatasetStartTransaction(GDALDatasetH ds, int bForce) OGRErr GDALDatasetCommitTransaction(GDALDatasetH ds) OGRErr GDALDatasetRollbackTransaction(GDALDatasetH ds) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index 281954b8..b1f286c9 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -32,6 +32,8 @@ def read_dataframe( where=None, bbox=None, fids=None, + sql=None, + sql_dialect=None, fid_as_index=False, ): """Read from an OGR data source to a GeoPandas GeoDataFrame or Pandas DataFrame. @@ -115,6 +117,8 @@ def read_dataframe( where=where, bbox=bbox, fids=fids, + sql=sql, + sql_dialect=sql_dialect, return_fids=fid_as_index, ) diff --git a/pyogrio/raw.py b/pyogrio/raw.py index fb585b47..533ccc1b 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -30,6 +30,8 @@ def read( where=None, bbox=None, fids=None, + sql=None, + sql_dialect=None, return_fids=False, ): """Read OGR data source. @@ -133,6 +135,8 @@ def read( where=where, bbox=bbox, fids=fids, + sql=sql, + sql_dialect=sql_dialect, return_fids=return_fids, ) finally: diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 5a34b397..1437bc8f 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -186,6 +186,79 @@ def test_read_fids_force_2d(test_fgdb_vsi): assert not df.iloc[0].geometry.has_z +#@pytest.mark.filterwarnings("ignore: Layer") +def test_read_sql(naturalearth_lowres): + # empty filter should return full set of records + df = read_dataframe(naturalearth_lowres, sql=None) + assert len(df) == 177 + assert len(df.columns) == 6 + + # should return singular item + sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'" + df = read_dataframe(naturalearth_lowres, sql=sql) + assert len(df) == 1 + assert len(df.columns) == 6 + assert df.iloc[0].iso_a3 == "CAN" + area_canada = df.iloc[0].geometry.area + + # use spatialite function (needs SQLITE dialect) + sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3 + FROM naturalearth_lowres + WHERE ISO_A3 = 'CAN'""" + df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="SQLITE") + assert len(df) == 1 + assert df.iloc[0].geometry.area > area_canada + + # the geometry column doesn't need/cannot be specified when using the + # default OGRSQL dialect but is returned nonetheless, so 4 columns + sql = """SELECT name, pop_est, iso_a3 + FROM naturalearth_lowres + WHERE iso_a3 IN ('CAN', 'USA', 'MEX') + ORDER BY name""" + df = read_dataframe(naturalearth_lowres, sql=sql) + assert len(df.columns) == 4 + assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 + names_found = df['name'].to_list() + #assert len(names_found) == 4 + + # the geometry column doesn't need/cannot be specified when using the + # default OGRSQL dialect but is returned nonetheless, so 4 columns + sql = """SELECT name, pop_est, iso_a3 + FROM naturalearth_lowres + WHERE iso_a3 IN ('CAN', 'USA', 'MEX') + ORDER BY name""" + df = read_dataframe(naturalearth_lowres, sql=sql, skip_features=1, max_features=1) + assert len(df.columns) == 4 + assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 + names_found_skip_max = df['name'].to_list() + assert len(names_found_skip_max) == 1 + assert names_found_skip_max[0] == names_found[1] + + # should return items within range + sql = """SELECT name, pop_est, iso_a3 + FROM naturalearth_lowres + WHERE POP_EST >= 10000000 AND POP_EST < 100000000""" + df = read_dataframe(naturalearth_lowres, sql=sql) + assert len(df) == 75 + assert len(df.columns) == 4 + assert df.pop_est.min() >= 10000000 + assert df.pop_est.max() < 100000000 + + # should match no items + sql = """SELECT name, pop_est, iso_a3 + FROM naturalearth_lowres + WHERE ISO_A3 = 'INVALID'""" + df = read_dataframe(naturalearth_lowres, sql=sql) + assert len(df) == 0 + + +def test_read_sql_invalid(naturalearth_lowres): + with pytest.raises(Exception, match="SQL Expression Parsing Error"): + read_dataframe(naturalearth_lowres, sql="invalid") + with pytest.raises(ValueError, match="cannot set both 'sql' and any of"): + read_dataframe(naturalearth_lowres, sql="whatever", where="test") + + @pytest.mark.parametrize( "driver,ext", [ From e583c006bb52a1dc9ddc51b6b34ad8a06f943f9d Mon Sep 17 00:00:00 2001 From: theroggy Date: Sun, 17 Apr 2022 23:11:12 +0200 Subject: [PATCH 2/9] Add tests on gpkg and geojson fgb not possible yet, because the test file contains a combination of polygons and multipolygons --- pyogrio/tests/conftest.py | 18 +++++++++++++----- pyogrio/tests/test_geopandas_io.py | 12 +++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index ac507a10..d0d8399a 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -4,7 +4,7 @@ import pytest from pyogrio import __gdal_version_string__, __version__, list_drivers - +import pyogrio _data_dir = Path(__file__).parent.resolve() / "fixtures" @@ -23,10 +23,18 @@ def data_dir(): return _data_dir -@pytest.fixture(scope="session") -def naturalearth_lowres(): - return _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") - +@pytest.fixture(scope="function") +def naturalearth_lowres(tmp_path, suffix: str = ".shp"): #request): + shp_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") + if suffix.lower() == ".shp": + return shp_path + else: + suffix_path = tmp_path / f"{shp_path.stem}{suffix}" + if suffix_path.exists(): + return suffix_path + gdf = pyogrio.read_dataframe(shp_path) + pyogrio.write_dataframe(gdf, suffix_path) + return suffix_path @pytest.fixture(scope="function") def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 1437bc8f..c07dfd19 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -186,8 +186,9 @@ def test_read_fids_force_2d(test_fgdb_vsi): assert not df.iloc[0].geometry.has_z -#@pytest.mark.filterwarnings("ignore: Layer") -def test_read_sql(naturalearth_lowres): +@pytest.mark.parametrize( + "suffix", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) +def test_read_sql(naturalearth_lowres, suffix): # empty filter should return full set of records df = read_dataframe(naturalearth_lowres, sql=None) assert len(df) == 177 @@ -215,7 +216,7 @@ def test_read_sql(naturalearth_lowres): FROM naturalearth_lowres WHERE iso_a3 IN ('CAN', 'USA', 'MEX') ORDER BY name""" - df = read_dataframe(naturalearth_lowres, sql=sql) + df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="OGRSQL") assert len(df.columns) == 4 assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 names_found = df['name'].to_list() @@ -227,7 +228,8 @@ def test_read_sql(naturalearth_lowres): FROM naturalearth_lowres WHERE iso_a3 IN ('CAN', 'USA', 'MEX') ORDER BY name""" - df = read_dataframe(naturalearth_lowres, sql=sql, skip_features=1, max_features=1) + df = read_dataframe( + naturalearth_lowres, sql=sql, skip_features=1, max_features=1, sql_dialect="OGRSQL") assert len(df.columns) == 4 assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 names_found_skip_max = df['name'].to_list() @@ -238,7 +240,7 @@ def test_read_sql(naturalearth_lowres): sql = """SELECT name, pop_est, iso_a3 FROM naturalearth_lowres WHERE POP_EST >= 10000000 AND POP_EST < 100000000""" - df = read_dataframe(naturalearth_lowres, sql=sql) + df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="OGRSQL") assert len(df) == 75 assert len(df.columns) == 4 assert df.pop_est.min() >= 10000000 From 92fd31b7257b6d66a9d70891f71ca179374e0b55 Mon Sep 17 00:00:00 2001 From: theroggy Date: Sun, 17 Apr 2022 23:38:36 +0200 Subject: [PATCH 3/9] Remove other commit --- pyogrio/_io.pyx | 57 ++------------------ pyogrio/_ogr.pxd | 6 --- pyogrio/geopandas.py | 4 -- pyogrio/raw.py | 4 -- pyogrio/tests/test_geopandas_io.py | 84 +++--------------------------- 5 files changed, 9 insertions(+), 146 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index d60f3268..59d5fe67 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -171,43 +171,6 @@ cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL: return ogr_layer -cdef OGRLayerH execute_sql(GDALDatasetH ogr_dataset, sql, sql_dialect) except NULL: - """Execute an SQL statement on a dataset. - - Parameters - ---------- - ogr_dataset : pointer to open OGR dataset - sql : str - The sql statement to execute - sql_dialect : str - The sql dialect the sql statement is written in - - Returns - ------- - pointer to OGR layer - """ - cdef OGRLayerH ogr_layer = NULL - - try: - sql_b = sql.encode('utf-8') - sql_c = sql_b - if sql_dialect is not None: - sql_dialect_b = sql_dialect.encode('utf-8') - sql_dialect_c = sql_dialect_b - ogr_layer = exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c)) - else: - ogr_layer = exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)) - - # GDAL does not always raise exception messages in this case - except NullPointerError: - raise DataLayerError(f"Error executing sql '{sql}'") from None - - except CPLE_BaseError as exc: - raise DataLayerError(str(exc)) - - return ogr_layer - - cdef str get_crs(OGRLayerH ogr_layer): """Read CRS from layer as EPSG: if available or WKT. @@ -775,8 +738,6 @@ def ogr_read( object where=None, tuple bbox=None, object fids=None, - object sql=None, - object sql_dialect=None, int return_fids=False, **kwargs): @@ -796,24 +757,15 @@ def ogr_read( layer = 0 if fids is not None: - if where is not None or bbox is not None or sql is not None or skip_features or max_features: + if where is not None or bbox is not None or skip_features or max_features: raise ValueError( - "cannot set both 'fids' and any of 'where', 'bbox', 'sql', " + "cannot set both 'fids' and any of 'where', 'bbox', " "'skip_features' or 'max_features'" ) fids = np.asarray(fids, dtype=np.intc) - if sql is not None: - if where is not None or bbox is not None or fids is not None: # or skip_features or max_features: - raise ValueError( - "cannot set both 'sql' and any of 'where', 'bbox', 'fids'" - ) - ogr_dataset = ogr_open(path_c, 0, kwargs) - if sql is None: - ogr_layer = get_ogr_layer(ogr_dataset, layer) - else: - ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect) + ogr_layer = get_ogr_layer(ogr_dataset, layer) crs = get_crs(ogr_layer) @@ -882,9 +834,6 @@ def ogr_read( } if ogr_dataset != NULL: - # TODO: shouldn't this happen in an exception handler? - if sql is not None: - GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer) GDALClose(ogr_dataset) ogr_dataset = NULL diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index bfc4a5f6..be94c340 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -324,12 +324,6 @@ cdef extern from "gdal.h": int GDALDatasetGetLayerCount(GDALDatasetH ds) OGRLayerH GDALDatasetGetLayer(GDALDatasetH ds, int iLayer) OGRLayerH GDALDatasetGetLayerByName(GDALDatasetH ds, char * pszName) - OGRLayerH GDALDatasetExecuteSQL( - GDALDatasetH ds, - const char* pszStatement, - OGRGeometryH hSpatialFilter, - const char* pszDialect) - void GDALDatasetReleaseResultSet(GDALDatasetH, OGRLayerH) OGRErr GDALDatasetStartTransaction(GDALDatasetH ds, int bForce) OGRErr GDALDatasetCommitTransaction(GDALDatasetH ds) OGRErr GDALDatasetRollbackTransaction(GDALDatasetH ds) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index b1f286c9..281954b8 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -32,8 +32,6 @@ def read_dataframe( where=None, bbox=None, fids=None, - sql=None, - sql_dialect=None, fid_as_index=False, ): """Read from an OGR data source to a GeoPandas GeoDataFrame or Pandas DataFrame. @@ -117,8 +115,6 @@ def read_dataframe( where=where, bbox=bbox, fids=fids, - sql=sql, - sql_dialect=sql_dialect, return_fids=fid_as_index, ) diff --git a/pyogrio/raw.py b/pyogrio/raw.py index 533ccc1b..fb585b47 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -30,8 +30,6 @@ def read( where=None, bbox=None, fids=None, - sql=None, - sql_dialect=None, return_fids=False, ): """Read OGR data source. @@ -135,8 +133,6 @@ def read( where=where, bbox=bbox, fids=fids, - sql=sql, - sql_dialect=sql_dialect, return_fids=return_fids, ) finally: diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index c07dfd19..a3bafaf0 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -116,7 +116,9 @@ def test_read_fid_as_index(naturalearth_lowres): @pytest.mark.filterwarnings("ignore: Layer") -def test_read_where(naturalearth_lowres): +@pytest.mark.parametrize( + "suffix", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) +def test_read_where(naturalearth_lowres, suffix): # empty filter should return full set of records df = read_dataframe(naturalearth_lowres, where="") assert len(df) == 177 @@ -142,8 +144,9 @@ def test_read_where(naturalearth_lowres): df = read_dataframe(naturalearth_lowres, where="ISO_A3 = 'INVALID'") assert len(df) == 0 - -def test_read_where_invalid(naturalearth_lowres): +@pytest.mark.parametrize( + "suffix", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) +def test_read_where_invalid(naturalearth_lowres, suffix): with pytest.raises(ValueError, match="Invalid SQL"): read_dataframe(naturalearth_lowres, where="invalid") @@ -186,81 +189,6 @@ def test_read_fids_force_2d(test_fgdb_vsi): assert not df.iloc[0].geometry.has_z -@pytest.mark.parametrize( - "suffix", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) -def test_read_sql(naturalearth_lowres, suffix): - # empty filter should return full set of records - df = read_dataframe(naturalearth_lowres, sql=None) - assert len(df) == 177 - assert len(df.columns) == 6 - - # should return singular item - sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'" - df = read_dataframe(naturalearth_lowres, sql=sql) - assert len(df) == 1 - assert len(df.columns) == 6 - assert df.iloc[0].iso_a3 == "CAN" - area_canada = df.iloc[0].geometry.area - - # use spatialite function (needs SQLITE dialect) - sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3 - FROM naturalearth_lowres - WHERE ISO_A3 = 'CAN'""" - df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="SQLITE") - assert len(df) == 1 - assert df.iloc[0].geometry.area > area_canada - - # the geometry column doesn't need/cannot be specified when using the - # default OGRSQL dialect but is returned nonetheless, so 4 columns - sql = """SELECT name, pop_est, iso_a3 - FROM naturalearth_lowres - WHERE iso_a3 IN ('CAN', 'USA', 'MEX') - ORDER BY name""" - df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="OGRSQL") - assert len(df.columns) == 4 - assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 - names_found = df['name'].to_list() - #assert len(names_found) == 4 - - # the geometry column doesn't need/cannot be specified when using the - # default OGRSQL dialect but is returned nonetheless, so 4 columns - sql = """SELECT name, pop_est, iso_a3 - FROM naturalearth_lowres - WHERE iso_a3 IN ('CAN', 'USA', 'MEX') - ORDER BY name""" - df = read_dataframe( - naturalearth_lowres, sql=sql, skip_features=1, max_features=1, sql_dialect="OGRSQL") - assert len(df.columns) == 4 - assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 - names_found_skip_max = df['name'].to_list() - assert len(names_found_skip_max) == 1 - assert names_found_skip_max[0] == names_found[1] - - # should return items within range - sql = """SELECT name, pop_est, iso_a3 - FROM naturalearth_lowres - WHERE POP_EST >= 10000000 AND POP_EST < 100000000""" - df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="OGRSQL") - assert len(df) == 75 - assert len(df.columns) == 4 - assert df.pop_est.min() >= 10000000 - assert df.pop_est.max() < 100000000 - - # should match no items - sql = """SELECT name, pop_est, iso_a3 - FROM naturalearth_lowres - WHERE ISO_A3 = 'INVALID'""" - df = read_dataframe(naturalearth_lowres, sql=sql) - assert len(df) == 0 - - -def test_read_sql_invalid(naturalearth_lowres): - with pytest.raises(Exception, match="SQL Expression Parsing Error"): - read_dataframe(naturalearth_lowres, sql="invalid") - with pytest.raises(ValueError, match="cannot set both 'sql' and any of"): - read_dataframe(naturalearth_lowres, sql="whatever", where="test") - - @pytest.mark.parametrize( "driver,ext", [ From fbf6e7267029ebe26913655d6ea0451e2e028f1a Mon Sep 17 00:00:00 2001 From: theroggy Date: Wed, 20 Apr 2022 00:16:56 +0200 Subject: [PATCH 4/9] Remove commented code --- pyogrio/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index d0d8399a..4497b27c 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -24,7 +24,7 @@ def data_dir(): @pytest.fixture(scope="function") -def naturalearth_lowres(tmp_path, suffix: str = ".shp"): #request): +def naturalearth_lowres(tmp_path, suffix: str = ".shp"): shp_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") if suffix.lower() == ".shp": return shp_path From bda4d3781306d085f0a1d74f67d6901e90ad8c0c Mon Sep 17 00:00:00 2001 From: theroggy Date: Wed, 20 Apr 2022 00:37:53 +0200 Subject: [PATCH 5/9] Use request object to get paramter values --- pyogrio/tests/conftest.py | 18 +++++++++++------- pyogrio/tests/test_geopandas_io.py | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index 4497b27c..61d8ba34 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -24,17 +24,21 @@ def data_dir(): @pytest.fixture(scope="function") -def naturalearth_lowres(tmp_path, suffix: str = ".shp"): +def naturalearth_lowres(tmp_path, request): + ext = request._pyfuncitem.callspec.params.get("ext") + if ext is None: + ext = ".shp" + shp_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") - if suffix.lower() == ".shp": + if ext.lower() == ".shp": return shp_path else: - suffix_path = tmp_path / f"{shp_path.stem}{suffix}" - if suffix_path.exists(): - return suffix_path + ext_path = tmp_path / f"{shp_path.stem}{ext}" + if ext_path.exists(): + return ext_path gdf = pyogrio.read_dataframe(shp_path) - pyogrio.write_dataframe(gdf, suffix_path) - return suffix_path + pyogrio.write_dataframe(gdf, ext_path) + return ext_path @pytest.fixture(scope="function") def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index a3bafaf0..cc49044e 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -117,8 +117,8 @@ def test_read_fid_as_index(naturalearth_lowres): @pytest.mark.filterwarnings("ignore: Layer") @pytest.mark.parametrize( - "suffix", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) -def test_read_where(naturalearth_lowres, suffix): + "ext", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) +def test_read_where(naturalearth_lowres, ext): # empty filter should return full set of records df = read_dataframe(naturalearth_lowres, where="") assert len(df) == 177 From 95154a6d05ff71a1a41c8e113d44fa55e0013a76 Mon Sep 17 00:00:00 2001 From: theroggy Date: Wed, 20 Apr 2022 02:09:23 +0200 Subject: [PATCH 6/9] Split fixture to standard vs _ext(ended) --- pyogrio/tests/conftest.py | 11 +++++++++-- pyogrio/tests/test_geopandas_io.py | 16 ++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index 61d8ba34..71cd913f 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -24,8 +24,15 @@ def data_dir(): @pytest.fixture(scope="function") -def naturalearth_lowres(tmp_path, request): - ext = request._pyfuncitem.callspec.params.get("ext") +def naturalearth_lowres(): + return _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") + + +@pytest.fixture(scope="function") +def naturalearth_lowres_ext(tmp_path, request): + ext = None + if "callspec" in dir(request._pyfuncitem): + ext = request._pyfuncitem.callspec.params.get("ext") if ext is None: ext = ".shp" diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index cc49044e..77a1f0f7 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -118,37 +118,37 @@ def test_read_fid_as_index(naturalearth_lowres): @pytest.mark.filterwarnings("ignore: Layer") @pytest.mark.parametrize( "ext", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) -def test_read_where(naturalearth_lowres, ext): +def test_read_where(naturalearth_lowres_ext, ext): # empty filter should return full set of records - df = read_dataframe(naturalearth_lowres, where="") + df = read_dataframe(naturalearth_lowres_ext, where="") assert len(df) == 177 # should return singular item - df = read_dataframe(naturalearth_lowres, where="iso_a3 = 'CAN'") + df = read_dataframe(naturalearth_lowres_ext, where="iso_a3 = 'CAN'") assert len(df) == 1 assert df.iloc[0].iso_a3 == "CAN" - df = read_dataframe(naturalearth_lowres, where="iso_a3 IN ('CAN', 'USA', 'MEX')") + df = read_dataframe(naturalearth_lowres_ext, where="iso_a3 IN ('CAN', 'USA', 'MEX')") assert len(df) == 3 assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 # should return items within range df = read_dataframe( - naturalearth_lowres, where="POP_EST >= 10000000 AND POP_EST < 100000000" + naturalearth_lowres_ext, where="POP_EST >= 10000000 AND POP_EST < 100000000" ) assert len(df) == 75 assert df.pop_est.min() >= 10000000 assert df.pop_est.max() < 100000000 # should match no items - df = read_dataframe(naturalearth_lowres, where="ISO_A3 = 'INVALID'") + df = read_dataframe(naturalearth_lowres_ext, where="ISO_A3 = 'INVALID'") assert len(df) == 0 @pytest.mark.parametrize( "suffix", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) -def test_read_where_invalid(naturalearth_lowres, suffix): +def test_read_where_invalid(naturalearth_lowres_ext, suffix): with pytest.raises(ValueError, match="Invalid SQL"): - read_dataframe(naturalearth_lowres, where="invalid") + read_dataframe(naturalearth_lowres_ext, where="invalid") @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)]) From e141df1b8232bac2a810e055a99957d5f2987927 Mon Sep 17 00:00:00 2001 From: theroggy Date: Thu, 21 Apr 2022 12:53:06 +0200 Subject: [PATCH 7/9] Make naturalearth_lowres configurable+add all_ext --- pyogrio/tests/conftest.py | 43 ++++++++-------- pyogrio/tests/test_geopandas_io.py | 80 +++++++++++++++++------------- 2 files changed, 68 insertions(+), 55 deletions(-) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index 71cd913f..a87e1004 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -18,34 +18,36 @@ def pytest_report_header(config): ) +def prepare_testfile(testfile_path, dst_dir, ext): + if ext == testfile_path.suffix: + return testfile_path + else: + dst_path = dst_dir / f"{testfile_path.stem}{ext}" + if dst_path.exists(): + return dst_path + gdf = pyogrio.read_dataframe(testfile_path) + pyogrio.write_dataframe(gdf, dst_path) + return dst_path + + @pytest.fixture(scope="session") def data_dir(): return _data_dir @pytest.fixture(scope="function") -def naturalearth_lowres(): - return _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") +def naturalearth_lowres(tmp_path, request): + ext = request.param \ + if ("param" in dir(request) and request.param is not None) else ".shp" + testfile_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") + return prepare_testfile(testfile_path, tmp_path, ext) + + +@pytest.fixture(scope="function", params=[".shp", ".gpkg", ".json"]) +def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request): + return prepare_testfile(naturalearth_lowres, tmp_path, request.param) -@pytest.fixture(scope="function") -def naturalearth_lowres_ext(tmp_path, request): - ext = None - if "callspec" in dir(request._pyfuncitem): - ext = request._pyfuncitem.callspec.params.get("ext") - if ext is None: - ext = ".shp" - - shp_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") - if ext.lower() == ".shp": - return shp_path - else: - ext_path = tmp_path / f"{shp_path.stem}{ext}" - if ext_path.exists(): - return ext_path - gdf = pyogrio.read_dataframe(shp_path) - pyogrio.write_dataframe(gdf, ext_path) - return ext_path @pytest.fixture(scope="function") def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): @@ -63,4 +65,3 @@ def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): @pytest.fixture(scope="session") def test_fgdb_vsi(): return f"/vsizip/{_data_dir}/test_fgdb.gdb.zip" - diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 77a1f0f7..cc2f005d 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -21,8 +21,8 @@ pytestmark = pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available") -def test_read_dataframe(naturalearth_lowres): - df = read_dataframe(naturalearth_lowres) +def test_read_dataframe(naturalearth_lowres_all_ext): + df = read_dataframe(naturalearth_lowres_all_ext) assert isinstance(df, gp.GeoDataFrame) @@ -45,8 +45,19 @@ def test_read_dataframe_vsi(naturalearth_lowres_vsi): assert len(df) == 177 -def test_read_no_geometry(naturalearth_lowres): - df = read_dataframe(naturalearth_lowres, read_geometry=False) +@pytest.mark.parametrize( + "naturalearth_lowres, expected_ext", + [(".gpkg", ".gpkg"), (None, ".shp")], + indirect=["naturalearth_lowres"]) +def test_fixture_naturalearth_lowres(naturalearth_lowres, expected_ext): + # Test the fixture with "indirect" parameter + assert naturalearth_lowres.suffix == expected_ext + df = read_dataframe(naturalearth_lowres) + assert len(df) == 177 + + +def test_read_no_geometry(naturalearth_lowres_all_ext): + df = read_dataframe(naturalearth_lowres_all_ext, read_geometry=False) assert isinstance(df, pd.DataFrame) assert not isinstance(df, gp.GeoDataFrame) @@ -82,9 +93,9 @@ def test_read_layer(test_fgdb_vsi): assert "RIVER_MILE" in df.columns -def test_read_layer_invalid(naturalearth_lowres): +def test_read_layer_invalid(naturalearth_lowres_all_ext): with pytest.raises(DataLayerError, match="Layer 'wrong' could not be opened"): - read_dataframe(naturalearth_lowres, layer="wrong") + read_dataframe(naturalearth_lowres_all_ext, layer="wrong") @pytest.mark.filterwarnings("ignore: Measured") @@ -98,7 +109,7 @@ def test_read_null_values(test_fgdb_vsi): # make sure that Null values are preserved assert df.SEGMENT_NAME.isnull().max() == True - assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] == None + assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] is None def test_read_fid_as_index(naturalearth_lowres): @@ -116,39 +127,41 @@ def test_read_fid_as_index(naturalearth_lowres): @pytest.mark.filterwarnings("ignore: Layer") -@pytest.mark.parametrize( - "ext", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) -def test_read_where(naturalearth_lowres_ext, ext): +def test_read_where(naturalearth_lowres_all_ext): # empty filter should return full set of records - df = read_dataframe(naturalearth_lowres_ext, where="") + df = read_dataframe(naturalearth_lowres_all_ext, where="") assert len(df) == 177 # should return singular item - df = read_dataframe(naturalearth_lowres_ext, where="iso_a3 = 'CAN'") + df = read_dataframe(naturalearth_lowres_all_ext, where="iso_a3 = 'CAN'") assert len(df) == 1 assert df.iloc[0].iso_a3 == "CAN" - df = read_dataframe(naturalearth_lowres_ext, where="iso_a3 IN ('CAN', 'USA', 'MEX')") + df = read_dataframe(naturalearth_lowres_all_ext, where="iso_a3 IN ('CAN', 'USA', 'MEX')") assert len(df) == 3 assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0 # should return items within range df = read_dataframe( - naturalearth_lowres_ext, where="POP_EST >= 10000000 AND POP_EST < 100000000" + naturalearth_lowres_all_ext, where="POP_EST >= 10000000 AND POP_EST < 100000000" ) assert len(df) == 75 assert df.pop_est.min() >= 10000000 assert df.pop_est.max() < 100000000 # should match no items - df = read_dataframe(naturalearth_lowres_ext, where="ISO_A3 = 'INVALID'") + df = read_dataframe(naturalearth_lowres_all_ext, where="ISO_A3 = 'INVALID'") assert len(df) == 0 -@pytest.mark.parametrize( - "suffix", [".shp", ".json", ".gpkg"] ) #, ".fgb"] ) -def test_read_where_invalid(naturalearth_lowres_ext, suffix): - with pytest.raises(ValueError, match="Invalid SQL"): - read_dataframe(naturalearth_lowres_ext, where="invalid") + +def test_read_where_invalid(naturalearth_lowres_all_ext): + if naturalearth_lowres_all_ext.suffix in [".gpkg"]: + # Geopackage doesn't raise, but returns empty df? + gdf = read_dataframe(naturalearth_lowres_all_ext, where="invalid") + assert len(gdf) == 0 + else: + with pytest.raises(ValueError, match="Invalid SQL"): + read_dataframe(naturalearth_lowres_all_ext, where="invalid") @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)]) @@ -241,27 +254,27 @@ def test_write_empty_dataframe(tmpdir, driver, ext): assert_geodataframe_equal(df, expected) -def test_write_dataframe_gdalparams(tmpdir, naturalearth_lowres): +def test_write_dataframe_gdalparams(tmp_path, naturalearth_lowres): original_df = read_dataframe(naturalearth_lowres) - - test_noindex_filename = os.path.join(str(tmpdir), f"test_gdalparams_noindex.shp") + + test_noindex_filename = tmp_path / "test_gdalparams_noindex.shp" write_dataframe(original_df, test_noindex_filename, SPATIAL_INDEX="NO") - assert os.path.exists(test_noindex_filename) is True - test_noindex_index_filename = os.path.join(str(tmpdir), f"test_gdalparams_noindex.qix") - assert os.path.exists(test_noindex_index_filename) is False - - test_withindex_filename = os.path.join(str(tmpdir), f"test_gdalparams_withindex.shp") + assert test_noindex_filename.exists() is True + test_noindex_index_filename = tmp_path / "test_gdalparams_noindex.qix" + assert test_noindex_index_filename.exists() is False + + test_withindex_filename = tmp_path / "test_gdalparams_withindex.shp" write_dataframe(original_df, test_withindex_filename, SPATIAL_INDEX="YES") - assert os.path.exists(test_withindex_filename) is True - test_withindex_index_filename = os.path.join(str(tmpdir), f"test_gdalparams_withindex.qix") - assert os.path.exists(test_withindex_index_filename) is True + assert test_withindex_filename.exists() is True + test_withindex_index_filename = tmp_path / "test_gdalparams_withindex.qix" + assert test_withindex_index_filename.exists() is True @pytest.mark.filterwarnings( "ignore: You will likely lose important projection information" ) -def test_custom_crs_io(tmpdir, naturalearth_lowres): - df = read_dataframe(naturalearth_lowres) +def test_custom_crs_io(tmpdir, naturalearth_lowres_all_ext): + df = read_dataframe(naturalearth_lowres_all_ext) # project Belgium to a custom Albers Equal Area projection expected = df.loc[df.name == "Belgium"].to_crs( "+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3" @@ -278,4 +291,3 @@ def test_custom_crs_io(tmpdir, naturalearth_lowres): assert crs["lat_2"] == 51.5 assert crs["lon_0"] == 4.3 assert df.crs.equals(expected.crs) - From 26dbaccab4fb6fc592d0669115b60d153e05dbb2 Mon Sep 17 00:00:00 2001 From: theroggy Date: Thu, 21 Apr 2022 23:29:17 +0200 Subject: [PATCH 8/9] Small improvements --- pyogrio/tests/conftest.py | 15 +++++++-------- pyogrio/tests/test_geopandas_io.py | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index a87e1004..019f0015 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -21,13 +21,13 @@ def pytest_report_header(config): def prepare_testfile(testfile_path, dst_dir, ext): if ext == testfile_path.suffix: return testfile_path - else: - dst_path = dst_dir / f"{testfile_path.stem}{ext}" - if dst_path.exists(): - return dst_path - gdf = pyogrio.read_dataframe(testfile_path) - pyogrio.write_dataframe(gdf, dst_path) + + dst_path = dst_dir / f"{testfile_path.stem}{ext}" + if dst_path.exists(): return dst_path + gdf = pyogrio.read_dataframe(testfile_path) + pyogrio.write_dataframe(gdf, dst_path) + return dst_path @pytest.fixture(scope="session") @@ -37,8 +37,7 @@ def data_dir(): @pytest.fixture(scope="function") def naturalearth_lowres(tmp_path, request): - ext = request.param \ - if ("param" in dir(request) and request.param is not None) else ".shp" + ext = getattr(request, "param", ".shp") testfile_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp") return prepare_testfile(testfile_path, tmp_path, ext) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index cc2f005d..4421a121 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -47,7 +47,7 @@ def test_read_dataframe_vsi(naturalearth_lowres_vsi): @pytest.mark.parametrize( "naturalearth_lowres, expected_ext", - [(".gpkg", ".gpkg"), (None, ".shp")], + [(".gpkg", ".gpkg"), (".shp", ".shp")], indirect=["naturalearth_lowres"]) def test_fixture_naturalearth_lowres(naturalearth_lowres, expected_ext): # Test the fixture with "indirect" parameter From a798333075d2ef0687d28d795cd8e780d3762239 Mon Sep 17 00:00:00 2001 From: theroggy Date: Fri, 22 Apr 2022 02:06:46 +0200 Subject: [PATCH 9/9] Use _all_ext fixture where relevant + fix tests --- pyogrio/tests/test_geopandas_io.py | 31 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 4421a121..116b7cb9 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -112,18 +112,23 @@ def test_read_null_values(test_fgdb_vsi): assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] is None -def test_read_fid_as_index(naturalearth_lowres): +def test_read_fid_as_index(naturalearth_lowres_all_ext): kwargs = {"skip_features": 2, "max_features": 2} # default is to not set FIDs as index - df = read_dataframe(naturalearth_lowres, **kwargs) + df = read_dataframe(naturalearth_lowres_all_ext, **kwargs) assert_index_equal(df.index, pd.RangeIndex(0, 2)) - df = read_dataframe(naturalearth_lowres, fid_as_index=False, **kwargs) + df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs) assert_index_equal(df.index, pd.RangeIndex(0, 2)) - df = read_dataframe(naturalearth_lowres, fid_as_index=True, **kwargs) - assert_index_equal(df.index, pd.Index([2, 3], name="fid")) + df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=True, **kwargs) + if naturalearth_lowres_all_ext.suffix in ['.gpkg']: + # File format where fid starts at 1 + assert_index_equal(df.index, pd.Index([3, 4], name="fid")) + else: + # File format where fid starts at 0 + assert_index_equal(df.index, pd.Index([2, 3], name="fid")) @pytest.mark.filterwarnings("ignore: Layer") @@ -165,26 +170,26 @@ def test_read_where_invalid(naturalearth_lowres_all_ext): @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)]) -def test_read_bbox_invalid(naturalearth_lowres, bbox): +def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox): with pytest.raises(ValueError, match="Invalid bbox"): - read_dataframe(naturalearth_lowres, bbox=bbox) + read_dataframe(naturalearth_lowres_all_ext, bbox=bbox) -def test_read_bbox(naturalearth_lowres): +def test_read_bbox(naturalearth_lowres_all_ext): # should return no features with pytest.warns(UserWarning, match="does not have any features to read"): - df = read_dataframe(naturalearth_lowres, bbox=(0, 0, 0.00001, 0.00001)) + df = read_dataframe(naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001)) assert len(df) == 0 - df = read_dataframe(naturalearth_lowres, bbox=(-140, 20, -100, 40)) + df = read_dataframe(naturalearth_lowres_all_ext, bbox=(-140, 20, -100, 40)) assert len(df) == 2 assert np.array_equal(df.iso_a3, ["USA", "MEX"]) -def test_read_fids(naturalearth_lowres): +def test_read_fids(naturalearth_lowres_all_ext): # ensure keyword is properly passed through - fids = np.array([0, 10, 5], dtype=np.int64) - df = read_dataframe(naturalearth_lowres, fids=fids, fid_as_index=True) + fids = np.array([1, 10, 5], dtype=np.int64) + df = read_dataframe(naturalearth_lowres_all_ext, fids=fids, fid_as_index=True) assert len(df) == 3 assert np.array_equal(fids, df.index.values)