You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
UserWarning: dask_expr does not support the DataFrameIOFunction protocol for column projection. To enable column projection, please ensure that the signature of func includes a columns= keyword argument instead.
Example w/ traceback
@pytest.mark.skipif(MACOS, reason="dask/distributed#8075")
@pytest.mark.parametrize(
"Worker", [Worker, pytest.param(Nanny, marks=[pytest.mark.slow])]
)
@gen_test()
async def test_file_descriptors_dont_leak(Worker):
pytest.importorskip("pandas")
> df = dask.datasets.timeseries(freq="10s", dtypes={"x": int, "y": float})
distributed/tests/test_client.py:6455:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../mambaforge/envs/distributed/lib/python3.10/site-packages/dask/datasets.py:63: in timeseries
return make_timeseries(
../../mambaforge/envs/distributed/lib/python3.10/site-packages/dask/dataframe/io/demo.py:434: in make_timese
return from_map(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
func = <dask.dataframe.io.demo.MakeDataframePart object at 0x7f74a13d18d0>, args = None
meta = x y
timestamp
2000-01-01 1035 0.431179
divisions = [Timestamp('2000-01-01 00:00:00'), Timestamp('2000-01-02 00:00:00'), Timestamp('2000-01-03 00:00
label = 'make-timeseries', enforce_metadata = False
iterables = [[([Timestamp('2000-01-01 00:00:00'), Timestamp('2000-01-02 00:00:00')], 261786203), ([Timestamp
kwargs = {}, DataFrameIOFunction = <class 'dask.dataframe.io.utils.DataFrameIOFunction'>
FromMap = <class 'dask_expr.io.io.FromMap'>
FromMapProjectable = <class 'dask_expr.io.io.FromMapProjectable'>, lengths = {30}, i = 0
def from_map(
func,
*iterables,
args=None,
meta=no_default,
divisions=None,
label=None,
enforce_metadata=False,
**kwargs,
):
"""Create a dask-expr collection from a custom function map
NOTE: The underlying ``Expr`` object produced by this API
will support column projection (via ``simplify``) if
the ``func`` argument has "columns" in its signature.
"""
from dask.dataframe.io.utils import DataFrameIOFunction
from dask_expr.io import FromMap, FromMapProjectable
if "token" in kwargs:
# This option doesn't really make sense in dask-expr
raise NotImplementedError("dask_expr does not support a token argument.")
lengths = set()
iterables = list(iterables)
for i, iterable in enumerate(iterables):
if not isinstance(iterable, Iterable):
raise ValueError(
f"All elements of `iterables` must be Iterable, got {type(iterable)}"
)
try:
lengths.add(len(iterable))
except (AttributeError, TypeError):
iterables[i] = list(iterable)
lengths.add(len(iterables[i]))
if len(lengths) == 0:
raise ValueError("`from_map` requires at least one Iterable input")
elif len(lengths) > 1:
raise ValueError("All `iterables` must have the same length")
if lengths == {0}:
raise ValueError("All `iterables` must have a non-zero length")
# Check if `func` supports column projection
allow_projection = False
columns_arg_required = False
if param := inspect.signature(func).parameters.get("columns", None):
allow_projection = True
columns_arg_required = param.default is param.empty
if meta is no_default and columns_arg_required:
raise TypeError(
"Argument `func` of `from_map` has a required `columns` "
" parameter and not `meta` provided."
"Either provide `meta` yourself or make `columns` an optional argument."
)
elif isinstance(func, DataFrameIOFunction):
> warnings.warn(
"dask_expr does not support the DataFrameIOFunction "
"protocol for column projection. To enable column "
"projection, please ensure that the signature of `func` "
"includes a `columns=` keyword argument instead."
)
E UserWarning: dask_expr does not support the DataFrameIOFunction protocol for column projection. To enable column projection, please ensure that the signature of `func` includes a `columns=` keyword argument instead.
The text was updated successfully, but these errors were encountered:
milesgranger
added a commit
to milesgranger/distributed
that referenced
this issue
Mar 7, 2024
crusaderky
changed the title
UserWarning: dask-expr does not support the DataFrameIOFunction protocol ...dask.datasets.timeseries: dask-expr does not support the DataFrameIOFunction protocol
Mar 8, 2024
Currently working on getting distributed CI green again (https://github.com/dask/distributed/actions/runs/8187119566/job/22386948701?pr=8549#step:19:7242) there are a number of these UserWarnings:
Example w/ traceback
The text was updated successfully, but these errors were encountered: