Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(geospatial): accept geopandas GDFs in memtable #10485

Merged
merged 1 commit into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,19 @@ def test_memtable_column_naming_mismatch(con, monkeypatch, df, columns):
ibis.memtable(df, columns=columns)


@pytest.mark.notyet(
["mssql", "mysql", "exasol", "impala"], reason="various syntax errors reported"
)
def test_memtable_from_geopandas_dataframe(con, data_dir):
gpd = pytest.importorskip("geopandas")
gdf = gpd.read_file(data_dir / "geojson" / "zones.geojson")[:5]

# Read in memtable
t = ibis.memtable(gdf)
# Execute a few rows to force ingestion
con.to_pandas(t.limit(2).select("geometry"))


@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError)
@pytest.mark.notimpl(["druid"], raises=AssertionError)
@pytest.mark.notyet(
Expand Down
18 changes: 18 additions & 0 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from collections.abc import Iterable, Sequence
from pathlib import Path

import geopandas as gpd
import pandas as pd
import polars as pl
import pyarrow as pa
Expand Down Expand Up @@ -552,6 +553,23 @@ def _memtable_from_polars_dataframe(
).to_expr()


@_memtable.register("geopandas.geodataframe.GeoDataFrame")
def _memtable_from_geopandas_geodataframe(
data: gpd.GeoDataFrame,
*,
name: str | None = None,
schema: SchemaLike | None = None,
columns: Iterable[str] | None = None,
):
# The Pandas data proxy and the `to_arrow` method on it can't handle
# geopandas geometry columns. But if we first make the geometry columns WKB,
# then the geo column gets treated (correctly) as just a binary blob, and
# DuckDB can cast it to a proper geometry column after import.
wkb_df = data.to_wkb()
cpcloud marked this conversation as resolved.
Show resolved Hide resolved

return _memtable(wkb_df, name=name, schema=schema, columns=columns)


def _deferred_method_call(expr, method_name, **kwargs):
method = operator.methodcaller(method_name, **kwargs)
if isinstance(expr, str):
Expand Down