-
-
Notifications
You must be signed in to change notification settings - Fork 8.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Document for device ordinal. (#9398)
- Rewrite GPU demos. notebook is converted to script to avoid committing additional png plots. - Add GPU demos into the sphinx gallery. - Add RMM demos into the sphinx gallery. - Test for firing threads with different device ordinals.
- Loading branch information
1 parent
22b0a55
commit 275da17
Showing
32 changed files
with
343 additions
and
390 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
:orphan: | ||
|
||
GPU Acceleration Demo | ||
===================== | ||
|
||
This is a collection of demonstration scripts to showcase the basic usage of GPU. Please | ||
see :doc:`/gpu/index` for more info. There are other demonstrations for distributed GPU | ||
training using dask or spark. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,49 @@ | ||
""" | ||
Using xgboost on GPU devices | ||
============================ | ||
Shows how to train a model on the `forest cover type | ||
<https://archive.ics.uci.edu/ml/datasets/covertype>`_ dataset using GPU | ||
acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it | ||
time consuming to process. We compare the run-time and accuracy of the GPU and CPU | ||
histogram algorithms. | ||
In addition, The demo showcases using GPU with other GPU-related libraries including | ||
cupy and cuml. These libraries are not strictly required. | ||
""" | ||
import time | ||
|
||
import cupy as cp | ||
from cuml.model_selection import train_test_split | ||
from sklearn.datasets import fetch_covtype | ||
from sklearn.model_selection import train_test_split | ||
|
||
import xgboost as xgb | ||
|
||
# Fetch dataset using sklearn | ||
cov = fetch_covtype() | ||
X = cov.data | ||
y = cov.target | ||
X, y = fetch_covtype(return_X_y=True) | ||
X = cp.array(X) | ||
y = cp.array(y) | ||
y -= y.min() | ||
|
||
# Create 0.75/0.25 train/test split | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, train_size=0.75, | ||
random_state=42) | ||
X_train, X_test, y_train, y_test = train_test_split( | ||
X, y, test_size=0.25, train_size=0.75, random_state=42 | ||
) | ||
|
||
# Specify sufficient boosting iterations to reach a minimum | ||
num_round = 3000 | ||
|
||
# Leave most parameters as default | ||
param = {'objective': 'multi:softmax', # Specify multiclass classification | ||
'num_class': 8, # Number of possible output classes | ||
'tree_method': 'gpu_hist' # Use GPU accelerated algorithm | ||
} | ||
|
||
# Convert input data from numpy to XGBoost format | ||
dtrain = xgb.DMatrix(X_train, label=y_train) | ||
dtest = xgb.DMatrix(X_test, label=y_test) | ||
|
||
gpu_res = {} # Store accuracy result | ||
tmp = time.time() | ||
clf = xgb.XGBClassifier(device="cuda", n_estimators=num_round) | ||
# Train model | ||
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res) | ||
print("GPU Training Time: %s seconds" % (str(time.time() - tmp))) | ||
start = time.time() | ||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)]) | ||
gpu_res = clf.evals_result() | ||
print("GPU Training Time: %s seconds" % (str(time.time() - start))) | ||
|
||
# Repeat for CPU algorithm | ||
tmp = time.time() | ||
param['tree_method'] = 'hist' | ||
cpu_res = {} | ||
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=cpu_res) | ||
print("CPU Training Time: %s seconds" % (str(time.time() - tmp))) | ||
clf = xgb.XGBClassifier(device="cpu", n_estimators=num_round) | ||
start = time.time() | ||
cpu_res = clf.evals_result() | ||
print("CPU Training Time: %s seconds" % (str(time.time() - start))) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
""" | ||
Use GPU to speedup SHAP value computation | ||
========================================= | ||
Demonstrates using GPU acceleration to compute SHAP values for feature importance. | ||
""" | ||
import shap | ||
from sklearn.datasets import fetch_california_housing | ||
|
||
import xgboost as xgb | ||
|
||
# Fetch dataset using sklearn | ||
data = fetch_california_housing() | ||
print(data.DESCR) | ||
X = data.data | ||
y = data.target | ||
|
||
num_round = 500 | ||
|
||
param = { | ||
"eta": 0.05, | ||
"max_depth": 10, | ||
"tree_method": "hist", | ||
"device": "cuda", | ||
} | ||
|
||
# GPU accelerated training | ||
dtrain = xgb.DMatrix(X, label=y, feature_names=data.feature_names) | ||
model = xgb.train(param, dtrain, num_round) | ||
|
||
# Compute shap values using GPU with xgboost | ||
model.set_param({"device": "cuda"}) | ||
shap_values = model.predict(dtrain, pred_contribs=True) | ||
|
||
# Compute shap interaction values using GPU | ||
shap_interaction_values = model.predict(dtrain, pred_interactions=True) | ||
|
||
|
||
# shap will call the GPU accelerated version as long as the device parameter is set to | ||
# "cuda" | ||
explainer = shap.TreeExplainer(model) | ||
shap_values = explainer.shap_values(X) | ||
|
||
# visualize the first prediction's explanation | ||
shap.force_plot( | ||
explainer.expected_value, | ||
shap_values[0, :], | ||
X[0, :], | ||
feature_names=data.feature_names, | ||
matplotlib=True, | ||
) | ||
|
||
# Show a summary of feature importance | ||
shap.summary_plot(shap_values, X, plot_type="bar", feature_names=data.feature_names) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
Using XGBoost with RAPIDS Memory Manager (RMM) plugin (EXPERIMENTAL) | ||
==================================================================== | ||
|
||
`RAPIDS Memory Manager (RMM) <https://github.com/rapidsai/rmm>`__ library provides a | ||
collection of efficient memory allocators for NVIDIA GPUs. It is now possible to use | ||
XGBoost with memory allocators provided by RMM, by enabling the RMM integration plugin. | ||
|
||
The demos in this directory highlights one RMM allocator in particular: **the pool | ||
sub-allocator**. This allocator addresses the slow speed of ``cudaMalloc()`` by | ||
allocating a large chunk of memory upfront. Subsequent allocations will draw from the pool | ||
of already allocated memory and thus avoid the overhead of calling ``cudaMalloc()`` | ||
directly. See `this GTC talk slides | ||
<https://on-demand.gputechconf.com/gtc/2015/presentation/S5530-Stephen-Jones.pdf>`_ for | ||
more details. | ||
|
||
Before running the demos, ensure that XGBoost is compiled with the RMM plugin enabled. To do this, | ||
run CMake with option ``-DPLUGIN_RMM=ON`` (``-DUSE_CUDA=ON`` also required): | ||
|
||
.. code-block:: sh | ||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON | ||
make -j$(nproc) | ||
CMake will attempt to locate the RMM library in your build environment. You may choose to build | ||
RMM from the source, or install it using the Conda package manager. If CMake cannot find RMM, you | ||
should specify the location of RMM with the CMake prefix: | ||
|
||
.. code-block:: sh | ||
# If using Conda: | ||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX | ||
# If using RMM installed with a custom location | ||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=/path/to/rmm | ||
******************************** | ||
Informing XGBoost about RMM pool | ||
******************************** | ||
|
||
When XGBoost is compiled with RMM, most of the large size allocation will go through RMM | ||
allocators, but some small allocations in performance critical areas are using a different | ||
caching allocator so that we can have better control over memory allocation behavior. | ||
Users can override this behavior and force the use of rmm for all allocations by setting | ||
the global configuration ``use_rmm``: | ||
|
||
.. code-block:: python | ||
with xgb.config_context(use_rmm=True): | ||
clf = xgb.XGBClassifier(tree_method="hist", device="cuda") | ||
Depending on the choice of memory pool size or type of allocator, this may have negative | ||
performance impact. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,5 @@ doxygen | |
parser.py | ||
*.pyc | ||
web-data | ||
# generated by doxygen | ||
tmp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.