Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dask] dask cudf inplace prediction. #5512

Merged
merged 22 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions python-package/xgboost/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,8 +623,6 @@ def mapped_predict(data, is_df):
if is_df:
if lazy_isinstance(data, 'cudf.core.dataframe', 'DataFrame'):
import cudf # pylint: disable=import-error
# There's an error with cudf saying `concat_cudf` got an
# expected argument `ignore_index`. So this is not yet working.
prediction = cudf.DataFrame({'prediction': prediction},
dtype=numpy.float32)
else:
Expand Down
6 changes: 3 additions & 3 deletions tests/ci_build/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ RUN \
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Python
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/python
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python

ENV PATH=/opt/python/bin:$PATH

Expand All @@ -29,7 +29,7 @@ RUN conda create -n py35 python=3.5 && \
RUN \
pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh \
recommonmark guzzle_sphinx_theme mock breathe graphviz \
pytest scikit-learn wheel kubernetes urllib3 jsonschema boto3 && \
pytest scikit-learn wheel python-kubernetes urllib3 jsonschema boto3 && \
hcho3 marked this conversation as resolved.
Show resolved Hide resolved
pip install https://h2o-release.s3.amazonaws.com/datatable/stable/datatable-0.7.0/datatable-0.7.0-cp37-cp37m-linux_x86_64.whl && \
pip install "dask[complete]"

Expand Down
16 changes: 6 additions & 10 deletions tests/ci_build/Dockerfile.cudf
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,16 @@ RUN \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential && \
# Python
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \
bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python

ENV PATH=/opt/python/bin:$PATH

# Create new Conda environment with cuDF and dask
# Create new Conda environment with cuDF, Dask, and cuPy
RUN \
conda create -n cudf_test -c rapidsai -c nvidia -c numba -c conda-forge -c anaconda \
cudf=0.9 python=3.7 anaconda::cudatoolkit=$CUDA_VERSION dask dask-cuda cupy

# Install other Python packages
RUN \
source activate cudf_test && \
pip install numpy pytest scipy scikit-learn pandas matplotlib wheel kubernetes urllib3 graphviz
conda create -n cudf_test -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.7 cudf cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy \
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz

ENV GOSU_VERSION 1.10

Expand Down
10 changes: 5 additions & 5 deletions tests/ci_build/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ RUN \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential && \
# Python
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \
bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python

ENV PATH=/opt/python/bin:$PATH

# Install Python packages
RUN \
pip install numpy pytest scipy scikit-learn pandas matplotlib wheel kubernetes urllib3 graphviz && \
pip install "dask[complete]" && \
conda install -c rapidsai -c nvidia -c numba -c conda-forge -c anaconda dask-cuda
conda create -n gpu_test -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.7 cudatoolkit=$CUDA_VERSION dask dask-cuda numpy pytest scipy \
scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz

ENV GOSU_VERSION 1.10

Expand Down
4 changes: 2 additions & 2 deletions tests/ci_build/Dockerfile.gpu_build
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ RUN \
$DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \
# Python
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \
bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \
# CMake
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr
Expand Down
4 changes: 2 additions & 2 deletions tests/ci_build/Dockerfile.jvm
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ RUN \
yum -y update && \
yum install -y devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ && \
# Python
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \
bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \
# CMake
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
Expand Down
4 changes: 2 additions & 2 deletions tests/ci_build/Dockerfile.jvm_cross
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ RUN \
apt-get update && \
apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \
# Python
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \
bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \
/opt/python/bin/pip install awscli && \
# Maven
wget https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
Expand Down
31 changes: 0 additions & 31 deletions tests/ci_build/Dockerfile.release

This file was deleted.

5 changes: 4 additions & 1 deletion tests/ci_build/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ function install_xgboost {
# Run specified test suite
case "$suite" in
gpu)
source activate gpu_test
install_xgboost
pytest -v -s --fulltrace -m "not mgpu" tests/python-gpu
;;

mgpu)
source activate gpu_test
install_xgboost
pytest -v -s --fulltrace -m "mgpu" tests/python-gpu
cd tests/distributed
Expand All @@ -44,7 +46,8 @@ case "$suite" in
cudf)
source activate cudf_test
install_xgboost
pytest -v -s --fulltrace -m "not mgpu" tests/python-gpu/test_from_cudf.py tests/python-gpu/test_from_cupy.py
pytest -v -s --fulltrace -m "not mgpu" tests/python-gpu/test_from_cudf.py tests/python-gpu/test_from_cupy.py \
tests/python-gpu/test_gpu_with_dask.py::TestDistributedGPU::test_dask_dataframe
;;

cpu)
Expand Down
12 changes: 6 additions & 6 deletions tests/python-gpu/test_gpu_with_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class TestDistributedGPU(unittest.TestCase):
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_dask_cudf())
@pytest.mark.skipif(**tm.no_dask_cuda())
@pytest.mark.mgpu
def test_dask_dataframe(self):
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
Expand All @@ -51,18 +52,17 @@ def test_dask_dataframe(self):
predictions = dxgb.predict(client, out, dtrain).compute()
assert isinstance(predictions, np.ndarray)

# There's an error with cudf saying `concat_cudf` got an
# expected argument `ignore_index`. So the test here is just
# place holder.

# series_predictions = dxgb.inplace_predict(client, out, X)
# assert isinstance(series_predictions, dd.Series)
series_predictions = dxgb.inplace_predict(client, out, X)
assert isinstance(series_predictions, dd.Series)
series_predictions = series_predictions.compute()

single_node = out['booster'].predict(
xgboost.DMatrix(X.compute()))
cupy.testing.assert_allclose(single_node, predictions)
cupy.testing.assert_allclose(single_node, series_predictions)

@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_dask_array(self):
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
Expand Down
1 change: 0 additions & 1 deletion tests/python-gpu/test_monotonic_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def assert_constraint(constraint, tree_method):
assert non_increasing(pred)


@pytest.mark.gpu
class TestMonotonicConstraints(unittest.TestCase):
def test_exact(self):
assert_constraint(1, 'exact')
Expand Down