diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index cb582df21e0..a35802f2ab0 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,6 +25,7 @@ jobs: - docs-build - wheel-build-cudf - wheel-tests-cudf + - test-cudf-polars - wheel-build-dask-cudf - wheel-tests-dask-cudf - devcontainer @@ -132,6 +133,17 @@ jobs: with: build_type: pull-request script: ci/test_wheel_cudf.sh + test-cudf-polars: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + with: + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + build_type: pull-request + # This always runs, but only fails if this PR touches code in + # pylibcudf or cudf_polars + script: "ci/test_cudf_polars.sh" wheel-build-dask-cudf: needs: wheel-build-cudf secrets: inherit diff --git a/ci/test_cudf_polars.sh b/ci/test_cudf_polars.sh new file mode 100755 index 00000000000..669e049ab26 --- /dev/null +++ b/ci/test_cudf_polars.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -eou pipefail + +# We will only fail these tests if the PR touches code in pylibcudf +# or cudf_polars itself. +# Note, the three dots mean we are doing diff between the merge-base +# of upstream and HEAD. So this is asking, "does _this branch_ touch +# files in cudf_polars/pylibcudf", rather than "are there changes +# between upstream and this branch which touch cudf_polars/pylibcudf" +# TODO: is the target branch exposed anywhere in an environment variable? +if [ -n "$(git diff --name-only origin/branch-24.08...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ]; +then + HAS_CHANGES=1 +else + HAS_CHANGES=0 +fi + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} +RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ +mkdir -p "${RAPIDS_TESTS_DIR}" + +rapids-logger "Install cudf wheel" +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install $(echo ./dist/cudf*.whl)[test] + +rapids-logger "Install polars (allow pre-release versions)" +python -m pip install 'polars>=1.0.0a0' + +rapids-logger "Install cudf_polars" +python -m pip install --no-deps python/cudf_polars + +rapids-logger "Run cudf_polars tests" + +function set_exitcode() +{ + EXITCODE=$? +} +EXITCODE=0 +trap set_exitcode ERR +set +e + +python -m pytest \ + --cache-clear \ + --cov cudf_polars \ + --cov-fail-under=100 \ + --cov-config=python/cudf_polars/pyproject.toml \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf_polars.xml" \ + python/cudf_polars/tests + +trap ERR +set -e + +if [ ${EXITCODE} != 0 ]; then + rapids-logger "Testing FAILED: exitcode ${EXITCODE}" +else + rapids-logger "Testing PASSED" +fi + +if [ ${HAS_CHANGES} == 1 ]; then + exit ${EXITCODE} +else + exit 0 +fi diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index 4ad6e75fb2e..3f5f3c74050 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -123,7 +123,7 @@ def broadcast( ] -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class IR: """Abstract plan node, representing an unevaluated dataframe.""" @@ -157,7 +157,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: ) # pragma: no cover -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class PythonScan(IR): """Representation of input from a python function.""" @@ -171,7 +171,7 @@ def __post_init__(self): raise NotImplementedError("PythonScan not implemented") -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Scan(IR): """Input from files.""" @@ -248,7 +248,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.filter(mask) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Cache(IR): """ Return a cached plan node. @@ -269,7 +269,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return cache.setdefault(self.key, self.value.evaluate(cache=cache)) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class DataFrameScan(IR): """ Input from an existing polars DataFrame. @@ -315,7 +315,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Select(IR): """Produce a new dataframe selecting given expressions from an input.""" @@ -336,7 +336,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Reduce(IR): """ Produce a new dataframe selecting given expressions from an input. @@ -389,7 +389,7 @@ def placeholder_column(n: int) -> plc.Column: ) -@dataclasses.dataclass(slots=False) +@dataclasses.dataclass class GroupBy(IR): """Perform a groupby.""" @@ -490,7 +490,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame([*result_keys, *results]).slice(self.options.slice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Join(IR): """A join of two dataframes.""" @@ -518,8 +518,16 @@ class Join(IR): - coalesce: should key columns be coalesced (only makes sense for outer joins) """ - @cache + def __post_init__(self) -> None: + """Validate preconditions.""" + if any( + isinstance(e.value, expr.Literal) + for e in itertools.chain(self.left_on, self.right_on) + ): + raise NotImplementedError("Join with literal as join key.") + @staticmethod + @cache def _joiners( how: Literal["inner", "left", "full", "leftsemi", "leftanti"], ) -> tuple[ @@ -582,17 +590,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: for new, old in zip(columns[left.num_columns :], right.columns) ] return DataFrame([*left_cols, *right_cols]) - left_on = DataFrame( - broadcast( - *(e.evaluate(left) for e in self.left_on), target_length=left.num_rows - ) - ) - right_on = DataFrame( - broadcast( - *(e.evaluate(right) for e in self.right_on), - target_length=right.num_rows, - ) - ) + # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184 + left_on = DataFrame(broadcast(*(e.evaluate(left) for e in self.left_on))) + right_on = DataFrame(broadcast(*(e.evaluate(right) for e in self.right_on))) null_equality = ( plc.types.NullEquality.EQUAL if join_nulls @@ -602,13 +602,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: if right_policy is None: # Semi join lg = join_fn(left_on.table, right_on.table, null_equality) - left = left.replace_columns(*left_on.columns) table = plc.copying.gather(left.table, lg, left_policy) result = DataFrame.from_table(table, left.column_names) else: lg, rg = join_fn(left_on.table, right_on.table, null_equality) - left = left.replace_columns(*left_on.columns) - right = right.replace_columns(*right_on.columns) if coalesce and how == "inner": right = right.discard_columns(right_on.column_names_set) left = DataFrame.from_table( @@ -642,7 +639,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return result.slice(zlice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class HStack(IR): """Add new columns to a dataframe.""" @@ -671,7 +668,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.with_columns(columns) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Distinct(IR): """Produce a new dataframe with distinct rows.""" @@ -741,7 +738,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return result.slice(self.zlice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Sort(IR): """Sort a dataframe.""" @@ -810,7 +807,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns).slice(self.zlice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Slice(IR): """Slice a dataframe.""" @@ -827,7 +824,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.slice((self.offset, self.length)) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Filter(IR): """Filter a dataframe with a boolean mask.""" @@ -843,7 +840,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return df.filter(mask) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Projection(IR): """Select a subset of columns from a dataframe.""" @@ -860,7 +857,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: return DataFrame(columns) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class MapFunction(IR): """Apply some function to a dataframe.""" @@ -894,6 +891,13 @@ def __post_init__(self) -> None: # polars requires that all to-explode columns have the # same sub-shapes raise NotImplementedError("Explode with more than one column") + elif self.name == "rename": + old, new, _ = self.options + # TODO: perhaps polars should validate renaming in the IR? + if len(new) != len(set(new)) or ( + set(new) & (set(self.df.schema.keys() - set(old))) + ): + raise NotImplementedError("Duplicate new names in rename.") def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: """Evaluate and return a dataframe.""" @@ -919,7 +923,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: raise AssertionError("Should never be reached") # pragma: no cover -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class Union(IR): """Concatenate dataframes vertically.""" @@ -943,7 +947,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: ).slice(self.zlice) -@dataclasses.dataclass(slots=True) +@dataclasses.dataclass class HConcat(IR): """Concatenate dataframes horizontally.""" diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py index 6d597a91724..c04eac41bb7 100644 --- a/python/cudf_polars/cudf_polars/typing/__init__.py +++ b/python/cudf_polars/cudf_polars/typing/__init__.py @@ -6,7 +6,7 @@ from __future__ import annotations from collections.abc import Mapping -from typing import TYPE_CHECKING, Literal, Protocol, TypeAlias +from typing import TYPE_CHECKING, Literal, Protocol, Union from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir @@ -15,43 +15,45 @@ if TYPE_CHECKING: from typing import Callable + from typing_extensions import TypeAlias + import polars as pl -IR: TypeAlias = ( - pl_ir.PythonScan - | pl_ir.Scan - | pl_ir.Cache - | pl_ir.DataFrameScan - | pl_ir.Select - | pl_ir.GroupBy - | pl_ir.Join - | pl_ir.HStack - | pl_ir.Distinct - | pl_ir.Sort - | pl_ir.Slice - | pl_ir.Filter - | pl_ir.SimpleProjection - | pl_ir.MapFunction - | pl_ir.Union - | pl_ir.HConcat - | pl_ir.ExtContext -) - -Expr: TypeAlias = ( - pl_expr.Function - | pl_expr.Window - | pl_expr.Literal - | pl_expr.Sort - | pl_expr.SortBy - | pl_expr.Gather - | pl_expr.Filter - | pl_expr.Cast - | pl_expr.Column - | pl_expr.Agg - | pl_expr.BinaryExpr - | pl_expr.Len - | pl_expr.PyExprIR -) +IR: TypeAlias = Union[ + pl_ir.PythonScan, + pl_ir.Scan, + pl_ir.Cache, + pl_ir.DataFrameScan, + pl_ir.Select, + pl_ir.GroupBy, + pl_ir.Join, + pl_ir.HStack, + pl_ir.Distinct, + pl_ir.Sort, + pl_ir.Slice, + pl_ir.Filter, + pl_ir.SimpleProjection, + pl_ir.MapFunction, + pl_ir.Union, + pl_ir.HConcat, + pl_ir.ExtContext, +] + +Expr: TypeAlias = Union[ + pl_expr.Function, + pl_expr.Window, + pl_expr.Literal, + pl_expr.Sort, + pl_expr.SortBy, + pl_expr.Gather, + pl_expr.Filter, + pl_expr.Cast, + pl_expr.Column, + pl_expr.Agg, + pl_expr.BinaryExpr, + pl_expr.Len, + pl_expr.PyExprIR, +] Schema: TypeAlias = Mapping[str, plc.DataType] diff --git a/python/cudf_polars/tests/expressions/test_agg.py b/python/cudf_polars/tests/expressions/test_agg.py index 2ffa1c4af6d..267d0a99692 100644 --- a/python/cudf_polars/tests/expressions/test_agg.py +++ b/python/cudf_polars/tests/expressions/test_agg.py @@ -52,7 +52,7 @@ def test_agg(df, agg): # https://github.com/rapidsai/cudf/issues/15852 check_dtypes = agg not in {"n_unique", "median"} - if not check_dtypes and q.schema["a"] != pl.Float64: + if not check_dtypes and q.collect_schema()["a"] != pl.Float64: with pytest.raises(AssertionError): assert_gpu_result_equal(q) assert_gpu_result_equal(q, check_dtypes=check_dtypes, check_exact=False) @@ -65,7 +65,7 @@ def test_agg(df, agg): ) @pytest.mark.parametrize("op", ["min", "max"]) def test_agg_float_with_nans(propagate_nans, op): - df = pl.LazyFrame({"a": [1, 2, float("nan")]}) + df = pl.LazyFrame({"a": pl.Series([1, 2, float("nan")], dtype=pl.Float64())}) op = getattr(pl.Expr, f"nan_{op}" if propagate_nans else op) q = df.select(op(pl.col("a"))) diff --git a/python/cudf_polars/tests/expressions/test_booleanfunction.py b/python/cudf_polars/tests/expressions/test_booleanfunction.py index 951b749e670..a52fba26528 100644 --- a/python/cudf_polars/tests/expressions/test_booleanfunction.py +++ b/python/cudf_polars/tests/expressions/test_booleanfunction.py @@ -26,7 +26,7 @@ def has_nulls(request): def test_booleanfunction_reduction(ignore_nulls): ldf = pl.LazyFrame( { - "a": [1, 2, 3.0, 2, 5], + "a": pl.Series([1, 2, 3.0, 2, 5], dtype=pl.Float64()), "b": [0, 3, 1, -1, None], "c": [1, 6, 5, 3, 2], } @@ -82,7 +82,9 @@ def test_boolean_function_unary(request, expr, has_nans, has_nulls): ], ) def test_unsupported_boolean_function(expr): - df = pl.LazyFrame({"a": [1, float("nan"), 2, 4], "b": [1, 2, 3, 4]}) + df = pl.LazyFrame( + {"a": pl.Series([1, float("nan"), 2, 4], dtype=pl.Float64()), "b": [1, 2, 3, 4]} + ) q = df.select(expr) @@ -95,7 +97,11 @@ def test_unsupported_boolean_function(expr): ) def test_boolean_isbetween(closed, bounds): df = pl.LazyFrame( - {"a": [1, float("nan"), 2, 4], "lo": [1, 2, 2, 3], "hi": [10, 4, 2, 4]} + { + "a": pl.Series([1, float("nan"), 2, 4], dtype=pl.Float32()), + "lo": [1, 2, 2, 3], + "hi": [10, 4, 2, 4], + } ) q = df.select(pl.col("a").is_between(*bounds, closed=closed)) diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py index d4920d35f14..992efe0ba79 100644 --- a/python/cudf_polars/tests/expressions/test_rolling.py +++ b/python/cudf_polars/tests/expressions/test_rolling.py @@ -3,11 +3,9 @@ from __future__ import annotations -import pytest - import polars as pl -from cudf_polars import translate_ir +from cudf_polars.testing.asserts import assert_ir_translation_raises def test_rolling(): @@ -29,13 +27,13 @@ def test_rolling(): min_a=pl.min("a").rolling(index_column="dt", period="2d"), max_a=pl.max("a").rolling(index_column="dt", period="2d"), ) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + + assert_ir_translation_raises(q, NotImplementedError) def test_grouped_rolling(): df = pl.LazyFrame({"a": [1, 2, 3, 4, 5, 6], "b": [1, 2, 1, 3, 1, 2]}) q = df.select(pl.col("a").min().over("b")) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py index 3c498fe7286..9729e765948 100644 --- a/python/cudf_polars/tests/expressions/test_stringfunction.py +++ b/python/cudf_polars/tests/expressions/test_stringfunction.py @@ -8,8 +8,11 @@ import polars as pl -from cudf_polars import execute_with_cudf, translate_ir -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars import execute_with_cudf +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) @pytest.fixture @@ -47,22 +50,19 @@ def test_supported_stringfunction_expression(ldf): def test_unsupported_stringfunction(ldf): q = ldf.select(pl.col("a").str.count_matches("e", literal=True)) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) def test_contains_re_non_strict_raises(ldf): q = ldf.select(pl.col("a").str.contains(".", strict=False)) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) def test_contains_re_non_literal_raises(ldf): q = ldf.select(pl.col("a").str.contains(pl.col("b"), literal=False)) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) @pytest.mark.parametrize( diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py index e70f923b097..aefad59eb91 100644 --- a/python/cudf_polars/tests/test_groupby.py +++ b/python/cudf_polars/tests/test_groupby.py @@ -6,8 +6,10 @@ import polars as pl -from cudf_polars import translate_ir -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) @pytest.fixture @@ -72,7 +74,7 @@ def test_groupby(df: pl.LazyFrame, maintain_order, keys, exprs): q = df.group_by(*keys, maintain_order=maintain_order).agg(*exprs) if not maintain_order: - sort_keys = list(q.schema.keys())[: len(keys)] + sort_keys = list(q.collect_schema().keys())[: len(keys)] q = q.sort(*sort_keys) assert_gpu_result_equal(q, check_exact=False) @@ -97,5 +99,4 @@ def test_groupby_len(df, keys): def test_groupby_unsupported(df, expr): q = df.group_by("key1").agg(expr) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py index 81166b0b2f6..89f6fd3455b 100644 --- a/python/cudf_polars/tests/test_join.py +++ b/python/cudf_polars/tests/test_join.py @@ -6,7 +6,10 @@ import polars as pl -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) @pytest.mark.parametrize( @@ -71,3 +74,14 @@ def test_cross_join(): q = left.join(right, how="cross") assert_gpu_result_equal(q) + + +@pytest.mark.parametrize( + "left_on,right_on", [(pl.col("a"), pl.lit(2)), (pl.lit(2), pl.col("a"))] +) +def test_join_literal_key_unsupported(left_on, right_on): + left = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) + right = pl.LazyFrame({"a": [1, 2, 3], "b": [5, 6, 7]}) + q = left.join(right, left_on=left_on, right_on=right_on, how="inner") + + assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/test_mapfunction.py b/python/cudf_polars/tests/test_mapfunction.py index ec6b3f3fc0a..77032108e6f 100644 --- a/python/cudf_polars/tests/test_mapfunction.py +++ b/python/cudf_polars/tests/test_mapfunction.py @@ -6,8 +6,10 @@ import polars as pl -from cudf_polars import translate_ir -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) def test_merge_sorted_raises(): @@ -17,16 +19,14 @@ def test_merge_sorted_raises(): q = df1.merge_sorted(df2, key="a").merge_sorted(df3, key="a") - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) def test_explode_multiple_raises(): df = pl.LazyFrame({"a": [[1, 2], [3, 4]], "b": [[5, 6], [7, 8]]}) q = df.explode("a", "b") - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) @pytest.mark.parametrize("column", ["a", "b"]) @@ -41,3 +41,23 @@ def test_explode_single(column): q = df.explode(column) assert_gpu_result_equal(q) + + +@pytest.mark.parametrize("mapping", [{"b": "a"}, {"a": "c", "b": "c"}]) +def test_rename_duplicate_raises(mapping): + df = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) + + q = df.rename(mapping) + + assert_ir_translation_raises(q, NotImplementedError) + + +@pytest.mark.parametrize( + "mapping", [{}, {"b": "c"}, {"b": "a", "a": "b"}, {"a": "c", "b": "d"}] +) +def test_rename_columns(mapping): + df = pl.LazyFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) + + q = df.rename(mapping) + + assert_gpu_result_equal(q) diff --git a/python/cudf_polars/tests/test_python_scan.py b/python/cudf_polars/tests/test_python_scan.py index c03474e3dc8..fd8453b77c4 100644 --- a/python/cudf_polars/tests/test_python_scan.py +++ b/python/cudf_polars/tests/test_python_scan.py @@ -2,11 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import pytest - import polars as pl -from cudf_polars import translate_ir +from cudf_polars.testing.asserts import assert_ir_translation_raises def test_python_scan(): @@ -14,7 +12,6 @@ def source(with_columns, predicate, nrows): return pl.DataFrame({"a": pl.Series([1, 2, 3], dtype=pl.Int8())}) q = pl.LazyFrame._scan_python_function({"a": pl.Int8}, source, pyarrow=False) - with pytest.raises(NotImplementedError): - _ = translate_ir(q._ldf.visit()) + assert_ir_translation_raises(q, NotImplementedError) assert q.collect().equals(source(None, None, None)) diff --git a/python/cudf_polars/tests/test_union.py b/python/cudf_polars/tests/test_union.py index 6c9122bc260..b021d832910 100644 --- a/python/cudf_polars/tests/test_union.py +++ b/python/cudf_polars/tests/test_union.py @@ -2,12 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import pytest - import polars as pl -from cudf_polars import translate_ir -from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.asserts import ( + assert_gpu_result_equal, + assert_ir_translation_raises, +) def test_union(): @@ -31,8 +31,8 @@ def test_union_schema_mismatch_raises(): ).lazy() ldf2 = ldf.select(pl.col("a").cast(pl.Float32)) query = pl.concat([ldf, ldf2], how="diagonal") - with pytest.raises(NotImplementedError): - _ = translate_ir(query._ldf.visit()) + + assert_ir_translation_raises(query, NotImplementedError) def test_concat_vertical():