Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into strings-filter-nvbench
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Nov 8, 2024
2 parents 470cc32 + 990734f commit 8f65d9b
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 74 deletions.
3 changes: 2 additions & 1 deletion docs/cudf/source/developer_guide/cudf_pandas.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ In the rest of this document, to maintain a concrete pair of libraries in mind,
For example, future support could include pairs such as CuPy (as the "fast" library) and NumPy (as the "slow" library).

```{note}
We currently do not wrap the entire NumPy library because it exposes a C API. But we do wrap NumPy's `numpy.ndarray` and CuPy's `cupy.ndarray` in a proxy type.
1. We currently do not wrap the entire NumPy library because it exposes a C API. But we do wrap NumPy's `numpy.ndarray` and CuPy's `cupy.ndarray` in a proxy type.
2. There is a `custom_iter` method defined to always utilize slow objects `iter` method, that way we don't move the objects to GPU and trigger an error and again move the object to CPU to execute the iteration successfully.
```

### Types:
Expand Down
4 changes: 2 additions & 2 deletions java/src/main/java/ai/rapids/cudf/DeviceMemoryBufferView.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,7 @@
* that is backing it.
*/
public class DeviceMemoryBufferView extends BaseDeviceMemoryBuffer {
DeviceMemoryBufferView(long address, long lengthInBytes) {
public DeviceMemoryBufferView(long address, long lengthInBytes) {
// Set the cleaner to null so we don't end up releasing anything
super(address, lengthInBytes, (MemoryBufferCleaner) null);
}
Expand Down
11 changes: 10 additions & 1 deletion python/cudf/cudf/pandas/_wrappers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,13 @@ def array_interface(self: _FastSlowProxy):


def custom_iter(self: _FastSlowProxy):
return iter(self._fsproxy_slow)
"""
Custom iter method to handle the case where only the slow
object's iter method is used.
"""
# NOTE: Do not remove this method. This is required to avoid
# falling back to GPU for iter method.
return _maybe_wrap_result(
iter(self._fsproxy_slow),
None, # type: ignore
)
4 changes: 3 additions & 1 deletion python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,9 @@ def _maybe_wrap_result(result: Any, func: Callable, /, *args, **kwargs) -> Any:
"""
Wraps "result" in a fast-slow proxy if is a "proxiable" object.
"""
if _is_final_type(result):
if isinstance(result, (int, str, float, bool, type(None))):
return result
elif _is_final_type(result):
typ = get_final_type_map()[type(result)]
return typ._fsproxy_wrap(result, func)
elif _is_intermediate_type(result):
Expand Down
18 changes: 18 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,3 +1777,21 @@ def test_cudf_pandas_util_version(attrs):
assert not hasattr(pd.util, attrs)
else:
assert hasattr(pd.util, attrs)


def test_iteration_over_dataframe_dtypes_produces_proxy_objects(dataframe):
_, xdf = dataframe
xdf["b"] = xpd.IntervalIndex.from_arrays(xdf["a"], xdf["b"])
xdf["a"] = xpd.Series([1, 1, 1, 2, 3], dtype="category")
dtype_series = xdf.dtypes
assert all(is_proxy_object(x) for x in dtype_series)
assert isinstance(dtype_series.iloc[0], xpd.CategoricalDtype)
assert isinstance(dtype_series.iloc[1], xpd.IntervalDtype)


def test_iter_doesnot_raise(monkeypatch):
s = xpd.Series([1, 2, 3])
with monkeypatch.context() as monkeycontext:
monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")
for _ in s:
pass
26 changes: 16 additions & 10 deletions python/dask_cudf/dask_cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from dask_expr.io.io import FusedParquetIO
from dask_expr.io.parquet import FragmentWrapper, ReadParquetPyarrowFS

from dask._task_spec import Task

import cudf

from dask_cudf import _deprecated_api
Expand All @@ -19,7 +21,7 @@ def _load_multiple_files(
frag_filters,
columns,
schema,
*to_pandas_args,
**to_pandas_kwargs,
):
import pyarrow as pa

Expand All @@ -46,7 +48,7 @@ def _load_multiple_files(
)
return CudfReadParquetPyarrowFS._table_to_pandas(
get(dsk, name),
*to_pandas_args,
**to_pandas_kwargs,
)


Expand Down Expand Up @@ -89,7 +91,7 @@ def _table_to_pandas(table, index_name):
df = df.set_index(index_name)
return df

def _filtered_task(self, index: int):
def _filtered_task(self, name, index: int):
columns = self.columns.copy()
index_name = self.index.name
if self.index is not None:
Expand All @@ -99,16 +101,20 @@ def _filtered_task(self, index: int):
if columns is None:
columns = list(schema.names)
columns.append(index_name)
return (
return Task(
name,
self._table_to_pandas,
(
Task(
None,
self._fragment_to_table,
FragmentWrapper(self.fragments[index], filesystem=self.fs),
self.filters,
columns,
schema,
fragment_wrapper=FragmentWrapper(
self.fragments[index], filesystem=self.fs
),
filters=self.filters,
columns=columns,
schema=schema,
),
index_name,
index_name=index_name,
)

def _tune_up(self, parent):
Expand Down
59 changes: 0 additions & 59 deletions python/libcudf/cmake/Modules/WheelHelpers.cmake

This file was deleted.

0 comments on commit 8f65d9b

Please sign in to comment.