diff --git a/CHANGES.md b/CHANGES.md index 5065728c..c5742723 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,8 @@ ### Bug fixes - Fix int32 overflow when reading int64 columns (#260) +- Fix `fid_as_index=True` doesn't set fid as index using `read_dataframe` with + `use_arrow=True` (#265) ## 0.6.0 (2023-04-27) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 9065904c..8371261c 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1164,6 +1164,12 @@ def ogr_open_arrow( geometry_name = get_string(OGR_L_GetGeometryColumn(ogr_layer)) + fid_column = get_string(OGR_L_GetFIDColumn(ogr_layer)) + # OGR_L_GetFIDColumn returns the column name if it is a custom column, + # or "" if not. For arrow, the default column name is "OGC_FID". + if fid_column == "": + fid_column = "OGC_FID" + # Apply the attribute filter if where is not None and where != "": apply_where_filter(ogr_layer, where) @@ -1212,6 +1218,7 @@ def ogr_open_arrow( 'fields': fields[:,2], # return only names 'geometry_type': geometry_type, 'geometry_name': geometry_name, + 'fid_column': fid_column, } yield meta, reader diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index 1cadc2ed..88f12d9d 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -278,6 +278,7 @@ cdef extern from "ogr_api.h": OGRErr OGR_L_CreateFeature(OGRLayerH layer, OGRFeatureH feature) OGRErr OGR_L_CreateField(OGRLayerH layer, OGRFieldDefnH fielddefn, int flexible) const char* OGR_L_GetName(OGRLayerH layer) + const char* OGR_L_GetFIDColumn(OGRLayerH layer) const char* OGR_L_GetGeometryColumn(OGRLayerH layer) OGRSpatialReferenceH OGR_L_GetSpatialRef(OGRLayerH layer) int OGR_L_TestCapability(OGRLayerH layer, const char *name) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index a69ddb14..ef7de377 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -167,6 +167,9 @@ def read_dataframe( if use_arrow: meta, table = result df = table.to_pandas() + if fid_as_index: + df = df.set_index(meta["fid_column"]) + df.index.names = ["fid"] geometry_name = meta["geometry_name"] or "wkb_geometry" if geometry_name in df.columns: df["geometry"] = from_wkb(df.pop(geometry_name), crs=meta["crs"]) diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index c4e66649..cebcaf1b 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -7,7 +7,7 @@ try: import pandas as pd - from pandas.testing import assert_frame_equal + from pandas.testing import assert_frame_equal, assert_index_equal from geopandas.testing import assert_geodataframe_equal except ImportError: pass @@ -33,16 +33,13 @@ def test_read_arrow(naturalearth_lowres_all_ext): def test_read_arrow_fid(naturalearth_lowres_all_ext): - result = read_dataframe( - naturalearth_lowres_all_ext, use_arrow=True, fid_as_index=True - ) + kwargs = {"use_arrow": True, "where": "fid >= 2 AND fid <= 3"} - if naturalearth_lowres_all_ext.suffix == ".gpkg": - fid_col = "fid" - else: - fid_col = "OGC_FID" + df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs) + assert_index_equal(df.index, pd.RangeIndex(0, 2)) - assert fid_col in result.columns + df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=True, **kwargs) + assert_index_equal(df.index, pd.Index([2, 3], name="fid")) def test_read_arrow_columns(naturalearth_lowres):