Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dask] dask cudf inplace prediction. #5512

Merged
merged 22 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions python-package/xgboost/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,8 +623,6 @@ def mapped_predict(data, is_df):
if is_df:
if lazy_isinstance(data, 'cudf.core.dataframe', 'DataFrame'):
import cudf # pylint: disable=import-error
# There's an error with cudf saying `concat_cudf` got an
# expected argument `ignore_index`. So this is not yet working.
prediction = cudf.DataFrame({'prediction': prediction},
dtype=numpy.float32)
else:
Expand Down
6 changes: 4 additions & 2 deletions tests/ci_build/Dockerfile.cudf
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ ENV PATH=/opt/python/bin:$PATH

# Create new Conda environment with cuDF and dask
RUN \
conda create -n cudf_test -c rapidsai -c nvidia -c numba -c conda-forge -c anaconda \
cudf=0.9 python=3.7 anaconda::cudatoolkit=$CUDA_VERSION dask dask-cuda cupy
conda create -n cudf_test python=3.7 && \
source activate cudf_test && \
conda install -c rapidsai -c nvidia -c numba -c conda-forge -c anaconda \
anaconda::cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy cudf

# Install other Python packages
RUN \
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ENV PATH=/opt/python/bin:$PATH
RUN \
pip install numpy pytest scipy scikit-learn pandas matplotlib wheel kubernetes urllib3 graphviz && \
pip install "dask[complete]" && \
conda install -c rapidsai -c nvidia -c numba -c conda-forge -c anaconda dask-cuda
conda install -c rapidsai -c conda-forge -c anaconda dask-cuda
hcho3 marked this conversation as resolved.
Show resolved Hide resolved

ENV GOSU_VERSION 1.10

Expand Down
3 changes: 2 additions & 1 deletion tests/ci_build/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ case "$suite" in
cudf)
source activate cudf_test
install_xgboost
pytest -v -s --fulltrace -m "not mgpu" tests/python-gpu/test_from_cudf.py tests/python-gpu/test_from_cupy.py
pytest -v -s --fulltrace -m "not mgpu" tests/python-gpu/test_from_cudf.py tests/python-gpu/test_from_cupy.py \
tests/python-gpu/test_gpu_with_dask.py::TestDistributedGPU::test_dask_dataframe
;;

cpu)
Expand Down
10 changes: 4 additions & 6 deletions tests/python-gpu/test_gpu_with_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,14 @@ def test_dask_dataframe(self):
predictions = dxgb.predict(client, out, dtrain).compute()
assert isinstance(predictions, np.ndarray)

# There's an error with cudf saying `concat_cudf` got an
# expected argument `ignore_index`. So the test here is just
# place holder.

# series_predictions = dxgb.inplace_predict(client, out, X)
# assert isinstance(series_predictions, dd.Series)
series_predictions = dxgb.inplace_predict(client, out, X)
assert isinstance(series_predictions, dd.Series)
series_predictions = series_predictions.compute()

single_node = out['booster'].predict(
xgboost.DMatrix(X.compute()))
cupy.testing.assert_allclose(single_node, predictions)
cupy.testing.assert_allclose(single_node, series_predictions)

@pytest.mark.skipif(**tm.no_cupy())
def test_dask_array(self):
Expand Down