Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create fixture for test files of other file types #74

Merged
merged 9 commits into from
Apr 22, 2022
29 changes: 24 additions & 5 deletions pyogrio/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

from pyogrio import __gdal_version_string__, __version__, list_drivers

import pyogrio

_data_dir = Path(__file__).parent.resolve() / "fixtures"

Expand All @@ -18,14 +18,34 @@ def pytest_report_header(config):
)


def prepare_testfile(testfile_path, dst_dir, ext):
if ext == testfile_path.suffix:
return testfile_path

dst_path = dst_dir / f"{testfile_path.stem}{ext}"
if dst_path.exists():
return dst_path
gdf = pyogrio.read_dataframe(testfile_path)
pyogrio.write_dataframe(gdf, dst_path)
return dst_path


@pytest.fixture(scope="session")
def data_dir():
return _data_dir


@pytest.fixture(scope="session")
def naturalearth_lowres():
return _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp")
@pytest.fixture(scope="function")
def naturalearth_lowres(tmp_path, request):
ext = getattr(request, "param", ".shp")
testfile_path = _data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp")

return prepare_testfile(testfile_path, tmp_path, ext)


@pytest.fixture(scope="function", params=[".shp", ".gpkg", ".json"])
brendan-ward marked this conversation as resolved.
Show resolved Hide resolved
def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request):
return prepare_testfile(naturalearth_lowres, tmp_path, request.param)


@pytest.fixture(scope="function")
Expand All @@ -44,4 +64,3 @@ def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres):
@pytest.fixture(scope="session")
def test_fgdb_vsi():
return f"/vsizip/{_data_dir}/test_fgdb.gdb.zip"

106 changes: 63 additions & 43 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
pytestmark = pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")


def test_read_dataframe(naturalearth_lowres):
df = read_dataframe(naturalearth_lowres)
def test_read_dataframe(naturalearth_lowres_all_ext):
df = read_dataframe(naturalearth_lowres_all_ext)

assert isinstance(df, gp.GeoDataFrame)

Expand All @@ -45,8 +45,19 @@ def test_read_dataframe_vsi(naturalearth_lowres_vsi):
assert len(df) == 177


def test_read_no_geometry(naturalearth_lowres):
df = read_dataframe(naturalearth_lowres, read_geometry=False)
@pytest.mark.parametrize(
"naturalearth_lowres, expected_ext",
[(".gpkg", ".gpkg"), (".shp", ".shp")],
indirect=["naturalearth_lowres"])
def test_fixture_naturalearth_lowres(naturalearth_lowres, expected_ext):
# Test the fixture with "indirect" parameter
assert naturalearth_lowres.suffix == expected_ext
df = read_dataframe(naturalearth_lowres)
assert len(df) == 177


def test_read_no_geometry(naturalearth_lowres_all_ext):
df = read_dataframe(naturalearth_lowres_all_ext, read_geometry=False)
assert isinstance(df, pd.DataFrame)
assert not isinstance(df, gp.GeoDataFrame)

Expand Down Expand Up @@ -82,9 +93,9 @@ def test_read_layer(test_fgdb_vsi):
assert "RIVER_MILE" in df.columns


def test_read_layer_invalid(naturalearth_lowres):
def test_read_layer_invalid(naturalearth_lowres_all_ext):
with pytest.raises(DataLayerError, match="Layer 'wrong' could not be opened"):
read_dataframe(naturalearth_lowres, layer="wrong")
read_dataframe(naturalearth_lowres_all_ext, layer="wrong")


@pytest.mark.filterwarnings("ignore: Measured")
Expand All @@ -98,77 +109,87 @@ def test_read_null_values(test_fgdb_vsi):

# make sure that Null values are preserved
assert df.SEGMENT_NAME.isnull().max() == True
assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] == None
assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] is None


def test_read_fid_as_index(naturalearth_lowres):
def test_read_fid_as_index(naturalearth_lowres_all_ext):
kwargs = {"skip_features": 2, "max_features": 2}

# default is to not set FIDs as index
df = read_dataframe(naturalearth_lowres, **kwargs)
df = read_dataframe(naturalearth_lowres_all_ext, **kwargs)
assert_index_equal(df.index, pd.RangeIndex(0, 2))

df = read_dataframe(naturalearth_lowres, fid_as_index=False, **kwargs)
df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
assert_index_equal(df.index, pd.RangeIndex(0, 2))

df = read_dataframe(naturalearth_lowres, fid_as_index=True, **kwargs)
assert_index_equal(df.index, pd.Index([2, 3], name="fid"))
df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=True, **kwargs)
if naturalearth_lowres_all_ext.suffix in ['.gpkg']:
# File format where fid starts at 1
assert_index_equal(df.index, pd.Index([3, 4], name="fid"))
else:
# File format where fid starts at 0
assert_index_equal(df.index, pd.Index([2, 3], name="fid"))


@pytest.mark.filterwarnings("ignore: Layer")
def test_read_where(naturalearth_lowres):
def test_read_where(naturalearth_lowres_all_ext):
# empty filter should return full set of records
df = read_dataframe(naturalearth_lowres, where="")
df = read_dataframe(naturalearth_lowres_all_ext, where="")
assert len(df) == 177

# should return singular item
df = read_dataframe(naturalearth_lowres, where="iso_a3 = 'CAN'")
df = read_dataframe(naturalearth_lowres_all_ext, where="iso_a3 = 'CAN'")
assert len(df) == 1
assert df.iloc[0].iso_a3 == "CAN"

df = read_dataframe(naturalearth_lowres, where="iso_a3 IN ('CAN', 'USA', 'MEX')")
df = read_dataframe(naturalearth_lowres_all_ext, where="iso_a3 IN ('CAN', 'USA', 'MEX')")
assert len(df) == 3
assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0

# should return items within range
df = read_dataframe(
naturalearth_lowres, where="POP_EST >= 10000000 AND POP_EST < 100000000"
naturalearth_lowres_all_ext, where="POP_EST >= 10000000 AND POP_EST < 100000000"
)
assert len(df) == 75
assert df.pop_est.min() >= 10000000
assert df.pop_est.max() < 100000000

# should match no items
df = read_dataframe(naturalearth_lowres, where="ISO_A3 = 'INVALID'")
df = read_dataframe(naturalearth_lowres_all_ext, where="ISO_A3 = 'INVALID'")
assert len(df) == 0


def test_read_where_invalid(naturalearth_lowres):
with pytest.raises(ValueError, match="Invalid SQL"):
read_dataframe(naturalearth_lowres, where="invalid")
def test_read_where_invalid(naturalearth_lowres_all_ext):
if naturalearth_lowres_all_ext.suffix in [".gpkg"]:
# Geopackage doesn't raise, but returns empty df?
gdf = read_dataframe(naturalearth_lowres_all_ext, where="invalid")
assert len(gdf) == 0
else:
with pytest.raises(ValueError, match="Invalid SQL"):
read_dataframe(naturalearth_lowres_all_ext, where="invalid")


@pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
def test_read_bbox_invalid(naturalearth_lowres, bbox):
def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox):
with pytest.raises(ValueError, match="Invalid bbox"):
read_dataframe(naturalearth_lowres, bbox=bbox)
read_dataframe(naturalearth_lowres_all_ext, bbox=bbox)


def test_read_bbox(naturalearth_lowres):
def test_read_bbox(naturalearth_lowres_all_ext):
# should return no features
with pytest.warns(UserWarning, match="does not have any features to read"):
df = read_dataframe(naturalearth_lowres, bbox=(0, 0, 0.00001, 0.00001))
df = read_dataframe(naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001))
assert len(df) == 0

df = read_dataframe(naturalearth_lowres, bbox=(-140, 20, -100, 40))
df = read_dataframe(naturalearth_lowres_all_ext, bbox=(-140, 20, -100, 40))
assert len(df) == 2
assert np.array_equal(df.iso_a3, ["USA", "MEX"])


def test_read_fids(naturalearth_lowres):
def test_read_fids(naturalearth_lowres_all_ext):
# ensure keyword is properly passed through
fids = np.array([0, 10, 5], dtype=np.int64)
df = read_dataframe(naturalearth_lowres, fids=fids, fid_as_index=True)
fids = np.array([1, 10, 5], dtype=np.int64)
df = read_dataframe(naturalearth_lowres_all_ext, fids=fids, fid_as_index=True)
assert len(df) == 3
assert np.array_equal(fids, df.index.values)

Expand Down Expand Up @@ -238,27 +259,27 @@ def test_write_empty_dataframe(tmpdir, driver, ext):
assert_geodataframe_equal(df, expected)


def test_write_dataframe_gdalparams(tmpdir, naturalearth_lowres):
def test_write_dataframe_gdalparams(tmp_path, naturalearth_lowres):
original_df = read_dataframe(naturalearth_lowres)
test_noindex_filename = os.path.join(str(tmpdir), f"test_gdalparams_noindex.shp")

test_noindex_filename = tmp_path / "test_gdalparams_noindex.shp"
write_dataframe(original_df, test_noindex_filename, SPATIAL_INDEX="NO")
assert os.path.exists(test_noindex_filename) is True
test_noindex_index_filename = os.path.join(str(tmpdir), f"test_gdalparams_noindex.qix")
assert os.path.exists(test_noindex_index_filename) is False
test_withindex_filename = os.path.join(str(tmpdir), f"test_gdalparams_withindex.shp")
assert test_noindex_filename.exists() is True
test_noindex_index_filename = tmp_path / "test_gdalparams_noindex.qix"
assert test_noindex_index_filename.exists() is False

test_withindex_filename = tmp_path / "test_gdalparams_withindex.shp"
write_dataframe(original_df, test_withindex_filename, SPATIAL_INDEX="YES")
assert os.path.exists(test_withindex_filename) is True
test_withindex_index_filename = os.path.join(str(tmpdir), f"test_gdalparams_withindex.qix")
assert os.path.exists(test_withindex_index_filename) is True
assert test_withindex_filename.exists() is True
test_withindex_index_filename = tmp_path / "test_gdalparams_withindex.qix"
assert test_withindex_index_filename.exists() is True


@pytest.mark.filterwarnings(
"ignore: You will likely lose important projection information"
)
def test_custom_crs_io(tmpdir, naturalearth_lowres):
df = read_dataframe(naturalearth_lowres)
def test_custom_crs_io(tmpdir, naturalearth_lowres_all_ext):
df = read_dataframe(naturalearth_lowres_all_ext)
# project Belgium to a custom Albers Equal Area projection
expected = df.loc[df.name == "Belgium"].to_crs(
"+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3"
Expand All @@ -275,4 +296,3 @@ def test_custom_crs_io(tmpdir, naturalearth_lowres):
assert crs["lat_2"] == 51.5
assert crs["lon_0"] == 4.3
assert df.crs.equals(expected.crs)