diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py index 60eba7eb37b..ca72c28cd5f 100644 --- a/python/cudf/cudf/core/udf/groupby_utils.py +++ b/python/cudf/cudf/core/udf/groupby_utils.py @@ -209,6 +209,10 @@ def _can_be_jitted(frame, func, args): by attempting to compile just the function to PTX using the target set of types """ + if not hasattr(func, "__code__"): + # Numba requires bytecode to be present to proceed. + # See https://github.com/numba/numba/issues/4587 + return False np_field_types = np.dtype( list( _supported_dtypes_from_frame( diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index e2af85b9e73..8fa192b4f7d 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -8,6 +8,7 @@ import string import textwrap from decimal import Decimal +from functools import partial import numpy as np import pandas as pd @@ -583,6 +584,25 @@ def f(group): assert precompiled.currsize == 3 +def test_groupby_apply_no_bytecode_fallback(): + # tests that a function which contains no bytecode + # attribute, but would still be executable using + # the iterative groupby apply approach, still works. + + gdf = cudf.DataFrame({"a": [0, 1, 1], "b": [1, 2, 3]}) + pdf = gdf.to_pandas() + + def f(group): + return group.sum() + + part = partial(f) + + expect = pdf.groupby("a").apply(part) + got = gdf.groupby("a").apply(part, engine="auto") + + assert_groupby_results_equal(expect, got) + + @pytest.mark.parametrize("func", [lambda group: group.x + group.y]) def test_groupby_apply_return_col_from_df(func): # tests a UDF that consists of purely colwise