From 15f40e51e9071aba1bb50d7211caa5467f8bc56a Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 1 Apr 2020 23:34:32 +1300 Subject: [PATCH] Add support for dlpack, expose python docs for DeviceQuantileDMatrix (#5465) --- doc/python/python_api.rst | 3 +++ python-package/xgboost/core.py | 30 ++++++++++++++++++++++-------- tests/python-gpu/test_from_cupy.py | 16 +++++++++++++++- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst index ad6ec3659b71..234b63db66bc 100644 --- a/doc/python/python_api.rst +++ b/doc/python/python_api.rst @@ -14,6 +14,9 @@ Core Data Structure :members: :show-inheritance: +.. autoclass:: xgboost.DeviceQuantileDMatrix + :show-inheritance: + .. autoclass:: xgboost.Booster :members: :show-inheritance: diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 733b0cfba8d4..893ef361259b 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -381,6 +381,17 @@ def _maybe_dt_data(data, feature_names, feature_types, return data, feature_names, feature_types +def _is_dlpack(x): + return 'PyCapsule' in str(type(x)) and "dltensor" in str(x) + +# Just convert dlpack into cupy (zero copy) +def _maybe_dlpack_data(data, feature_names, feature_types): + if not _is_dlpack(data): + return data, feature_names, feature_types + from cupy import fromDlpack # pylint: disable=E0401 + data = fromDlpack(data) + return data, feature_names, feature_types + def _convert_dataframes(data, feature_names, feature_types, meta=None, meta_type=None): @@ -399,6 +410,9 @@ def _convert_dataframes(data, feature_names, feature_types, data, feature_names, feature_types = _maybe_cudf_dataframe( data, feature_names, feature_types) + data, feature_names, feature_types = _maybe_dlpack_data( + data, feature_names, feature_types) + return data, feature_names, feature_types @@ -439,7 +453,7 @@ def __init__(self, data, label=None, weight=None, base_margin=None, """Parameters ---------- data : os.PathLike/string/numpy.array/scipy.sparse/pd.DataFrame/ - dt.Frame/cudf.DataFrame/cupy.array + dt.Frame/cudf.DataFrame/cupy.array/dlpack Data source of DMatrix. When data is string or os.PathLike type, it represents the path libsvm format txt file, csv file (by specifying uri parameter @@ -1028,12 +1042,12 @@ def feature_types(self, feature_types): class DeviceQuantileDMatrix(DMatrix): """Device memory Data Matrix used in XGBoost for training with tree_method='gpu_hist'. Do not use this for test/validation tasks as some information may be lost in quantisation. This - DMatrix is primarily designed to save memory in training and avoids intermediate steps, - directly creating a compressed representation for training without allocating additional - memory. Implementation does not currently consider weights in quantisation process(unlike - DMatrix). + DMatrix is primarily designed to save memory in training from device memory inputs by + avoiding intermediate storage. Implementation does not currently consider weights in + quantisation process(unlike DMatrix). Set max_bin to control the number of bins during + quantisation. - You can construct DeviceDMatrix from cupy/cudf + You can construct DeviceQuantileDMatrix from cupy/cudf/dlpack. """ def __init__(self, data, label=None, weight=None, base_margin=None, @@ -1044,8 +1058,8 @@ def __init__(self, data, label=None, weight=None, base_margin=None, nthread=None, max_bin=256): self.max_bin = max_bin if not (hasattr(data, "__cuda_array_interface__") or ( - CUDF_INSTALLED and isinstance(data, CUDF_DataFrame))): - raise ValueError('Only cupy/cudf currently supported for DeviceDMatrix') + CUDF_INSTALLED and isinstance(data, CUDF_DataFrame)) or _is_dlpack(data)): + raise ValueError('Only cupy/cudf/dlpack currently supported for DeviceQuantileDMatrix') super().__init__(data, label=label, weight=weight, base_margin=base_margin, missing=missing, diff --git a/tests/python-gpu/test_from_cupy.py b/tests/python-gpu/test_from_cupy.py index ebd9235e0b3c..371e68fa2599 100644 --- a/tests/python-gpu/test_from_cupy.py +++ b/tests/python-gpu/test_from_cupy.py @@ -95,7 +95,7 @@ def _test_cupy_metainfo(DMatrixT): assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cupy.get_uint_info('group_ptr')) -class TestFromArrayInterface: +class TestFromCupy: '''Tests for constructing DMatrix from data structure conforming Apache Arrow specification.''' @@ -122,3 +122,17 @@ def test_cupy_metainfo_simple_dmat(self): @pytest.mark.skipif(**tm.no_cupy()) def test_cupy_metainfo_device_dmat(self): _test_cupy_metainfo(xgb.DeviceQuantileDMatrix) + + @pytest.mark.skipif(**tm.no_cupy()) + def test_dlpack_simple_dmat(self): + import cupy as cp + n = 100 + X = cp.random.random((n, 2)) + xgb.DMatrix(X.toDlpack()) + + @pytest.mark.skipif(**tm.no_cupy()) + def test_dlpack_device_dmat(self): + import cupy as cp + n = 100 + X = cp.random.random((n, 2)) + xgb.DeviceQuantileDMatrix(X.toDlpack())