Skip to content

fix: sqlframe false positives in compliance checks #2011

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 23 additions & 25 deletions narwhals/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,32 +386,8 @@ def _from_native_impl( # noqa: PLR0915
msg = "Invalid parameter combination: `eager_only=True` and `eager_or_interchange_only=True`"
raise ValueError(msg)

# SQLFrame
# This one needs checking before extensions as `hasattr` always returns `True`.
if is_sqlframe_dataframe(native_object): # pragma: no cover
from narwhals._spark_like.dataframe import SparkLikeLazyFrame

if series_only:
msg = "Cannot only use `series_only` with SQLFrame DataFrame"
raise TypeError(msg)
if eager_only or eager_or_interchange_only:
msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with SQLFrame DataFrame"
raise TypeError(msg)
import sqlframe._version

backend_version = parse_version(sqlframe._version)
return LazyFrame(
SparkLikeLazyFrame(
native_object,
backend_version=backend_version,
version=version,
implementation=Implementation.SQLFRAME,
),
level="lazy",
)

# Extensions
elif is_compliant_dataframe(native_object):
if is_compliant_dataframe(native_object):
if series_only:
if not pass_through:
msg = "Cannot only use `series_only` with dataframe"
Expand Down Expand Up @@ -765,6 +741,28 @@ def _from_native_impl( # noqa: PLR0915
level="lazy",
)

elif is_sqlframe_dataframe(native_object): # pragma: no cover
from narwhals._spark_like.dataframe import SparkLikeLazyFrame

if series_only:
msg = "Cannot only use `series_only` with SQLFrame DataFrame"
raise TypeError(msg)
if eager_only or eager_or_interchange_only:
msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with SQLFrame DataFrame"
raise TypeError(msg)
import sqlframe._version

backend_version = parse_version(sqlframe._version)
return LazyFrame(
SparkLikeLazyFrame(
native_object,
backend_version=backend_version,
version=version,
implementation=Implementation.SQLFRAME,
),
level="lazy",
)

# Interchange protocol
elif _supports_dataframe_interchange(native_object):
from narwhals._interchange.dataframe import InterchangeFrame
Expand Down
15 changes: 12 additions & 3 deletions narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import timezone
from enum import Enum
from enum import auto
from inspect import getattr_static
from secrets import token_hex
from typing import TYPE_CHECKING
from typing import Any
Expand Down Expand Up @@ -1275,16 +1276,24 @@ def dtype_matches_time_unit_and_time_zone(
)


def _hasattr_static(obj: Any, attr: str) -> bool:
try:
getattr_static(obj, attr)
except AttributeError:
return False
return True


def is_compliant_dataframe(obj: Any) -> TypeIs[CompliantDataFrame]:
return hasattr(obj, "__narwhals_dataframe__")
return _hasattr_static(obj, "__narwhals_dataframe__")


def is_compliant_lazyframe(obj: Any) -> TypeIs[CompliantLazyFrame]:
return hasattr(obj, "__narwhals_lazyframe__")
return _hasattr_static(obj, "__narwhals_lazyframe__")


def is_compliant_series(obj: Any) -> TypeIs[CompliantSeries]:
return hasattr(obj, "__narwhals_series__")
return _hasattr_static(obj, "__narwhals_series__")


def is_compliant_expr(
Expand Down
14 changes: 0 additions & 14 deletions tests/frame/schema_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,20 +205,6 @@ def test_schema_object(method: str, expected: Any) -> None:
assert getattr(schema, method)() == expected


@pytest.mark.skipif(
PANDAS_VERSION < (2,),
reason="Before 2.0, pandas would raise on `drop_duplicates`",
)
def test_from_non_hashable_column_name() -> None:
# This is technically super-illegal
# BUT, it shows up in a scikit-learn test, so...
df = pd.DataFrame([[1, 2], [3, 4]], columns=["pizza", ["a", "b"]])

df = nw.from_native(df, eager_only=True)
assert df.columns == ["pizza", ["a", "b"]]
assert df["pizza"].dtype == nw.Int64


def test_validate_not_duplicated_columns_pandas_like() -> None:
df = pd.DataFrame([[1, 2], [4, 5]], columns=["a", "a"])
with pytest.raises(
Expand Down
34 changes: 34 additions & 0 deletions tests/translate/from_native_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,40 @@ def test_eager_only_lazy_dask(eager_only: Any, context: Any) -> None:
assert nw.from_native(dframe, eager_only=eager_only, strict=False) is dframe


def test_series_only_sqlframe() -> None: # pragma: no cover
pytest.importorskip("sqlframe")
from sqlframe.duckdb import DuckDBSession

session = DuckDBSession()
df = ( # type: ignore[no-any-return]
session.createDataFrame([*zip(*data.values())], schema=[*data.keys()])
)

with pytest.raises(TypeError, match="Cannot only use `series_only`"):
nw.from_native(df, series_only=True)


@pytest.mark.parametrize(
("eager_only", "context"),
[
(False, does_not_raise()),
(True, pytest.raises(TypeError, match="Cannot only use `eager_only`")),
],
)
def test_eager_only_sqlframe(eager_only: Any, context: Any) -> None: # pragma: no cover
pytest.importorskip("sqlframe")
from sqlframe.duckdb import DuckDBSession

session = DuckDBSession()
df = ( # type: ignore[no-any-return]
session.createDataFrame([*zip(*data.values())], schema=[*data.keys()])
)

with context:
res = nw.from_native(df, eager_only=eager_only)
assert isinstance(res, nw.LazyFrame)


def test_from_native_strict_false_typing() -> None:
df = pl.DataFrame()
nw.from_native(df, strict=False)
Expand Down
Loading