diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml
index a2b8b1e6d214..612834bd0301 100644
--- a/.github/workflows/jvm_tests.yml
+++ b/.github/workflows/jvm_tests.yml
@@ -30,7 +30,7 @@ jobs:
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
activate-environment: jvm_tests
environment-file: tests/ci_build/conda_env/jvm_tests.yml
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 621c8b465f9b..c866182706f0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -81,7 +81,7 @@ jobs:
submodules: 'true'
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
activate-environment: linux_sycl_test
environment-file: tests/ci_build/conda_env/linux_sycl_test.yml
@@ -123,7 +123,7 @@ jobs:
submodules: 'true'
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
activate-environment: cpp_test
environment-file: tests/ci_build/conda_env/cpp_test.yml
diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index e3b60f12cc1e..4d6e38d24e30 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -26,7 +26,7 @@ jobs:
submodules: 'true'
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
activate-environment: python_lint
environment-file: tests/ci_build/conda_env/python_lint.yml
@@ -58,7 +58,7 @@ jobs:
submodules: 'true'
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
activate-environment: sdist_test
environment-file: tests/ci_build/conda_env/sdist_test.yml
@@ -130,7 +130,7 @@ jobs:
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
activate-environment: macos_cpu_test
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
@@ -227,7 +227,7 @@ jobs:
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
activate-environment: linux_cpu_test
environment-file: tests/ci_build/conda_env/linux_cpu_test.yml
@@ -280,7 +280,7 @@ jobs:
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
activate-environment: linux_sycl_test
environment-file: tests/ci_build/conda_env/linux_sycl_test.yml
diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml
index cd611f46371f..4fdd3e9373cc 100644
--- a/.github/workflows/python_wheels.yml
+++ b/.github/workflows/python_wheels.yml
@@ -34,7 +34,7 @@ jobs:
run: brew install libomp
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- miniforge-variant: Mambaforge
+ miniforge-variant: Miniforge3
miniforge-version: latest
python-version: 3.9
use-mamba: true
diff --git a/.gitignore b/.gitignore
index 4480c81029a5..3725dd1603d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -139,11 +139,13 @@ credentials.csv
.bloop
# python tests
+*.bin
demo/**/*.txt
*.dmatrix
.hypothesis
__MACOSX/
model*.json
+/tests/python/models/models/
# R tests
*.htm
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0df914467e6b..5fb464b673b7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@ if(PLUGIN_SYCL)
string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
-project(xgboost LANGUAGES CXX C VERSION 2.1.3)
+project(xgboost LANGUAGES CXX C VERSION 2.1.4)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
@@ -327,7 +327,6 @@ if(PLUGIN_RMM)
list(REMOVE_ITEM rmm_link_libs CUDA::cudart)
list(APPEND rmm_link_libs CUDA::cudart_static)
set_target_properties(rmm::rmm PROPERTIES INTERFACE_LINK_LIBRARIES "${rmm_link_libs}")
- get_target_property(rmm_link_libs rmm::rmm INTERFACE_LINK_LIBRARIES)
endif()
if(PLUGIN_SYCL)
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 1c58e6166b3b..f77ca73ec343 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
-Version: 2.1.3.1
-Date: 2024-11-26
+Version: 2.1.4.1
+Date: 2025-02-06
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"),
diff --git a/R-package/configure b/R-package/configure
index e353456c9390..f1f005752270 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for xgboost 2.1.3.
+# Generated by GNU Autoconf 2.71 for xgboost 2.1.4.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='2.1.3'
-PACKAGE_STRING='xgboost 2.1.3'
+PACKAGE_VERSION='2.1.4'
+PACKAGE_STRING='xgboost 2.1.4'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1259,7 +1259,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures xgboost 2.1.3 to adapt to many kinds of systems.
+\`configure' configures xgboost 2.1.4 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1321,7 +1321,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of xgboost 2.1.3:";;
+ short | recursive ) echo "Configuration of xgboost 2.1.4:";;
esac
cat <<\_ACEOF
@@ -1404,7 +1404,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-xgboost configure 2.1.3
+xgboost configure 2.1.4
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1603,7 +1603,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by xgboost $as_me 2.1.3, which was
+It was created by xgboost $as_me 2.1.4, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3709,7 +3709,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by xgboost $as_me 2.1.3, which was
+This file was extended by xgboost $as_me 2.1.4, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -3773,7 +3773,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
-xgboost config.status 2.1.3
+xgboost config.status 2.1.4
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"
diff --git a/R-package/configure.ac b/R-package/configure.ac
index 06e1b5386974..bc3354795643 100644
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@
AC_PREREQ(2.69)
-AC_INIT([xgboost],[2.1.3],[],[xgboost],[])
+AC_INIT([xgboost],[2.1.4],[],[xgboost],[])
: ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index b12302a166c0..4e66c15186eb 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -55,7 +55,9 @@ function(compute_cmake_cuda_archs archs)
# Set up defaults based on CUDA varsion
if(NOT CMAKE_CUDA_ARCHITECTURES)
- if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
+ if(CUDA_VERSION VERSION_GREATER_EQUAL "12.8")
+ set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90 100 120)
+ elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80)
diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py
index f53835ffbee9..7c48074e561e 100644
--- a/demo/dask/gpu_training.py
+++ b/demo/dask/gpu_training.py
@@ -50,8 +50,8 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
.. versionadded:: 1.2.0
"""
- X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X))
- y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y))
+ X = dd.from_dask_array(X).to_backend("cudf")
+ y = dd.from_dask_array(y).to_backend("cudf")
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not
# be used for anything else other than training unless a reference is specified. See
diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h
index 4d3ed975fa4a..3cccc74f3e59 100644
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
#define XGBOOST_VER_MINOR 1 /* NOLINT */
-#define XGBOOST_VER_PATCH 3 /* NOLINT */
+#define XGBOOST_VER_PATCH 4 /* NOLINT */
#endif // XGBOOST_VERSION_CONFIG_H_
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index 31af7cdaf608..14092a82176f 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.3
+ 2.1.4
pom
XGBoost JVM Package
JVM Package for XGBoost
diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml
index 31ec46a04d60..876de03a8e45 100644
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.3
+ 2.1.4
xgboost4j-example
xgboost4j-example_2.12
- 2.1.3
+ 2.1.4
jar
diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml
index 7bbcd5407bff..6569dc831530 100644
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,12 +6,12 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.3
+ 2.1.4
xgboost4j-flink
xgboost4j-flink_2.12
- 2.1.3
+ 2.1.4
2.2.0
diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml
index 6a0ff0e7905f..493b6c5d00a0 100644
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.3
+ 2.1.4
xgboost4j-gpu_2.12
xgboost4j-gpu
- 2.1.3
+ 2.1.4
jar
diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml
index 92fe0b6994aa..8826259d7bf5 100644
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.3
+ 2.1.4
xgboost4j-spark-gpu
xgboost4j-spark-gpu_2.12
diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml
index b5a401ee477b..0ca8c2f5ccb0 100644
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.3
+ 2.1.4
xgboost4j-spark
xgboost4j-spark_2.12
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index 228f225ad5cd..fd3de238a128 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.3
+ 2.1.4
xgboost4j
xgboost4j_2.12
- 2.1.3
+ 2.1.4
jar
diff --git a/plugin/sycl/common/hist_util.cc b/plugin/sycl/common/hist_util.cc
index 2f2417f3a29a..89b354425f34 100644
--- a/plugin/sycl/common/hist_util.cc
+++ b/plugin/sycl/common/hist_util.cc
@@ -9,7 +9,7 @@
#include "../data/gradient_index.h"
#include "hist_util.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/common/hist_util.h b/plugin/sycl/common/hist_util.h
index aa9b4f5817bb..59f05e3e5c81 100644
--- a/plugin/sycl/common/hist_util.h
+++ b/plugin/sycl/common/hist_util.h
@@ -15,7 +15,7 @@
#include "../../src/common/hist_util.h"
#include "../data/gradient_index.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/common/partition_builder.h b/plugin/sycl/common/partition_builder.h
index c520ff31fb8e..813ca5324238 100644
--- a/plugin/sycl/common/partition_builder.h
+++ b/plugin/sycl/common/partition_builder.h
@@ -25,7 +25,7 @@
#include "../data/gradient_index.h"
#include "../tree/expand_entry.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/common/row_set.h b/plugin/sycl/common/row_set.h
index 574adbf8d9b9..67734f321f7d 100644
--- a/plugin/sycl/common/row_set.h
+++ b/plugin/sycl/common/row_set.h
@@ -15,7 +15,7 @@
#include "../data.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/data.h b/plugin/sycl/data.h
index 8f4bb2516f05..5df15a9b7223 100644
--- a/plugin/sycl/data.h
+++ b/plugin/sycl/data.h
@@ -22,7 +22,7 @@
#include "../../src/common/threading_utils.h"
-#include "CL/sycl.hpp"
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/data/gradient_index.cc b/plugin/sycl/data/gradient_index.cc
index e193b66894c9..0e6871154207 100644
--- a/plugin/sycl/data/gradient_index.cc
+++ b/plugin/sycl/data/gradient_index.cc
@@ -8,7 +8,7 @@
#include "gradient_index.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/data/gradient_index.h b/plugin/sycl/data/gradient_index.h
index 13577025caa0..61a5ea2492e9 100644
--- a/plugin/sycl/data/gradient_index.h
+++ b/plugin/sycl/data/gradient_index.h
@@ -10,7 +10,7 @@
#include "../data.h"
#include "../../src/common/hist_util.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/device_manager.h b/plugin/sycl/device_manager.h
index 84d4b24c0aa8..6e411d8c0a11 100644
--- a/plugin/sycl/device_manager.h
+++ b/plugin/sycl/device_manager.h
@@ -10,7 +10,7 @@
#include
#include
-#include
+#include
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
diff --git a/plugin/sycl/objective/multiclass_obj.cc b/plugin/sycl/objective/multiclass_obj.cc
index 5dcc8c3de599..669aeabd546b 100644
--- a/plugin/sycl/objective/multiclass_obj.cc
+++ b/plugin/sycl/objective/multiclass_obj.cc
@@ -23,7 +23,7 @@
#include "../../../src/objective/multiclass_param.h"
#include "../device_manager.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/objective/regression_obj.cc b/plugin/sycl/objective/regression_obj.cc
index 82467a7c4848..eb9ba1a615d9 100644
--- a/plugin/sycl/objective/regression_obj.cc
+++ b/plugin/sycl/objective/regression_obj.cc
@@ -24,12 +24,13 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wtautological-constant-compare"
#include "../../../src/objective/regression_loss.h"
+#include "../../../src/objective/adaptive.h"
#pragma GCC diagnostic pop
#include "../../../src/objective/regression_param.h"
#include "../device_manager.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/predictor/predictor.cc b/plugin/sycl/predictor/predictor.cc
index c941bca102e7..5b8f7b1ca9c8 100755
--- a/plugin/sycl/predictor/predictor.cc
+++ b/plugin/sycl/predictor/predictor.cc
@@ -10,7 +10,7 @@
#include
#include
-#include
+#include
#include "../data.h"
diff --git a/plugin/sycl/tree/param.h b/plugin/sycl/tree/param.h
index a83a7ad138ab..3c30c4e5c822 100644
--- a/plugin/sycl/tree/param.h
+++ b/plugin/sycl/tree/param.h
@@ -19,7 +19,7 @@
#include "../src/tree/param.h"
#pragma GCC diagnostic pop
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/plugin/sycl/tree/split_evaluator.h b/plugin/sycl/tree/split_evaluator.h
index 2f1e8c7c4e66..7565298748bd 100644
--- a/plugin/sycl/tree/split_evaluator.h
+++ b/plugin/sycl/tree/split_evaluator.h
@@ -21,7 +21,7 @@
#include "../../src/common/math.h"
#include "../../src/tree/param.h"
-#include
+#include
namespace xgboost {
namespace sycl {
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index e6cfde49d7c9..5a3b897d3f7d 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -14,7 +14,7 @@ authors = [
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
]
-version = "2.1.3"
+version = "2.1.4"
requires-python = ">=3.8"
license = { text = "Apache-2.0" }
classifiers = [
@@ -62,6 +62,8 @@ extension-pkg-whitelist = ["numpy"]
disable = [
"attribute-defined-outside-init",
"import-outside-toplevel",
+ "too-few-public-methods",
+ "too-many-ancestors",
"too-many-nested-blocks",
"unexpected-special-method-signature",
"unsubscriptable-object",
diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION
index ac2cdeba0137..7d2ed7c70205 100644
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-2.1.3
+2.1.4
diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py
index 729750f1f354..36063d137d1c 100644
--- a/python-package/xgboost/compat.py
+++ b/python-package/xgboost/compat.py
@@ -45,32 +45,43 @@ def lazy_isinstance(instance: Any, module: str, name: str) -> bool:
# sklearn
try:
+ from sklearn import __version__ as _sklearn_version
from sklearn.base import BaseEstimator as XGBModelBase
from sklearn.base import ClassifierMixin as XGBClassifierBase
from sklearn.base import RegressorMixin as XGBRegressorBase
- from sklearn.preprocessing import LabelEncoder
try:
- from sklearn.model_selection import KFold as XGBKFold
from sklearn.model_selection import StratifiedKFold as XGBStratifiedKFold
except ImportError:
- from sklearn.cross_validation import KFold as XGBKFold
from sklearn.cross_validation import StratifiedKFold as XGBStratifiedKFold
+ # sklearn.utils Tags types can be imported unconditionally once
+ # xgboost's minimum scikit-learn version is 1.6 or higher
+ try:
+ from sklearn.utils import Tags as _sklearn_Tags
+ except ImportError:
+ _sklearn_Tags = object
+
SKLEARN_INSTALLED = True
except ImportError:
SKLEARN_INSTALLED = False
# used for compatibility without sklearn
- XGBModelBase = object
- XGBClassifierBase = object
- XGBRegressorBase = object
- LabelEncoder = object
+ class XGBModelBase: # type: ignore[no-redef]
+ """Dummy class for sklearn.base.BaseEstimator."""
+
+ class XGBClassifierBase: # type: ignore[no-redef]
+ """Dummy class for sklearn.base.ClassifierMixin."""
+
+ class XGBRegressorBase: # type: ignore[no-redef]
+ """Dummy class for sklearn.base.RegressorMixin."""
- XGBKFold = None
XGBStratifiedKFold = None
+ _sklearn_Tags = object
+ _sklearn_version = object
+
_logger = logging.getLogger(__name__)
diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
index 118b0013d3d8..8115e7dcdaee 100644
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -410,7 +410,7 @@ def c_array(
def from_array_interface(interface: dict) -> NumpyOrCupy:
"""Convert array interface to numpy or cupy array"""
- class Array: # pylint: disable=too-few-public-methods
+ class Array:
"""Wrapper type for communicating with numpy and cupy."""
_interface: Optional[dict] = None
diff --git a/python-package/xgboost/dask/__init__.py b/python-package/xgboost/dask/__init__.py
index 44eae0c51837..0e30f6d7f679 100644
--- a/python-package/xgboost/dask/__init__.py
+++ b/python-package/xgboost/dask/__init__.py
@@ -1,8 +1,6 @@
# pylint: disable=too-many-arguments, too-many-locals
# pylint: disable=missing-class-docstring, invalid-name
# pylint: disable=too-many-lines
-# pylint: disable=too-few-public-methods
-# pylint: disable=import-error
"""
Dask extensions for distributed training
----------------------------------------
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 560a3a8ed285..f5b6152eaf51 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -29,7 +29,14 @@
# Do not use class names on scikit-learn directly. Re-define the classes on
# .compat to guarantee the behavior without scikit-learn
-from .compat import SKLEARN_INSTALLED, XGBClassifierBase, XGBModelBase, XGBRegressorBase
+from .compat import (
+ SKLEARN_INSTALLED,
+ XGBClassifierBase,
+ XGBModelBase,
+ XGBRegressorBase,
+ _sklearn_Tags,
+ _sklearn_version,
+)
from .config import config_context
from .core import (
Booster,
@@ -45,7 +52,7 @@
from .training import train
-class XGBRankerMixIn: # pylint: disable=too-few-public-methods
+class XGBRankerMixIn:
"""MixIn for ranking, defines the _estimator_type usually defined in scikit-learn
base classes.
@@ -69,7 +76,7 @@ def _can_use_qdm(tree_method: Optional[str]) -> bool:
return tree_method in ("hist", "gpu_hist", None, "auto")
-class _SklObjWProto(Protocol): # pylint: disable=too-few-public-methods
+class _SklObjWProto(Protocol):
def __call__(
self,
y_true: ArrayLike,
@@ -782,11 +789,52 @@ def __init__(
def _more_tags(self) -> Dict[str, bool]:
"""Tags used for scikit-learn data validation."""
- tags = {"allow_nan": True, "no_validation": True}
+ tags = {"allow_nan": True, "no_validation": True, "sparse": True}
if hasattr(self, "kwargs") and self.kwargs.get("updater") == "shotgun":
tags["non_deterministic"] = True
+
+ tags["categorical"] = self.enable_categorical
+ return tags
+
+ @staticmethod
+ def _update_sklearn_tags_from_dict(
+ *,
+ tags: _sklearn_Tags,
+ tags_dict: Dict[str, bool],
+ ) -> _sklearn_Tags:
+ """Update ``sklearn.utils.Tags`` inherited from ``scikit-learn`` base classes.
+
+ ``scikit-learn`` 1.6 introduced a dataclass-based interface for estimator tags.
+ ref: https://github.com/scikit-learn/scikit-learn/pull/29677
+
+ This method handles updating that instance based on the values in
+ ``self._more_tags()``.
+
+ """
+ tags.non_deterministic = tags_dict.get("non_deterministic", False)
+ tags.no_validation = tags_dict["no_validation"]
+ tags.input_tags.allow_nan = tags_dict["allow_nan"]
+ tags.input_tags.sparse = tags_dict["sparse"]
+ tags.input_tags.categorical = tags_dict["categorical"]
return tags
+ def __sklearn_tags__(self) -> _sklearn_Tags:
+ # XGBModelBase.__sklearn_tags__() cannot be called unconditionally,
+ # because that method isn't defined for scikit-learn<1.6
+ if not hasattr(XGBModelBase, "__sklearn_tags__"):
+ err_msg = (
+ "__sklearn_tags__() should not be called when using scikit-learn<1.6. "
+ f"Detected version: {_sklearn_version}"
+ )
+ raise AttributeError(err_msg)
+
+ # take whatever tags are provided by BaseEstimator, then modify
+ # them with XGBoost-specific values
+ return self._update_sklearn_tags_from_dict(
+ tags=super().__sklearn_tags__(), # pylint: disable=no-member
+ tags_dict=self._more_tags(),
+ )
+
def __sklearn_is_fitted__(self) -> bool:
return hasattr(self, "_Booster")
@@ -841,13 +889,27 @@ def get_params(self, deep: bool = True) -> Dict[str, Any]:
"""Get parameters."""
# Based on: https://stackoverflow.com/questions/59248211
# The basic flow in `get_params` is:
- # 0. Return parameters in subclass first, by using inspect.
- # 1. Return parameters in `XGBModel` (the base class).
+ # 0. Return parameters in subclass (self.__class__) first, by using inspect.
+ # 1. Return parameters in all parent classes (especially `XGBModel`).
# 2. Return whatever in `**kwargs`.
# 3. Merge them.
+ #
+ # This needs to accommodate being called recursively in the following
+ # inheritance graphs (and similar for classification and ranking):
+ #
+ # XGBRFRegressor -> XGBRegressor -> XGBModel -> BaseEstimator
+ # XGBRegressor -> XGBModel -> BaseEstimator
+ # XGBModel -> BaseEstimator
+ #
params = super().get_params(deep)
cp = copy.copy(self)
- cp.__class__ = cp.__class__.__bases__[0]
+ # If the immediate parent defines get_params(), use that.
+ if callable(getattr(cp.__class__.__bases__[0], "get_params", None)):
+ cp.__class__ = cp.__class__.__bases__[0]
+ # Otherwise, skip it and assume the next class will have it.
+ # This is here primarily for cases where the first class in MRO is a scikit-learn mixin.
+ else:
+ cp.__class__ = cp.__class__.__bases__[1]
params.update(cp.__class__.get_params(cp, deep))
# if kwargs is a dict, update params accordingly
if hasattr(self, "kwargs") and isinstance(self.kwargs, dict):
@@ -1431,7 +1493,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) ->
Number of boosting rounds.
""",
)
-class XGBClassifier(XGBModel, XGBClassifierBase):
+class XGBClassifier(XGBClassifierBase, XGBModel):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
@_deprecate_positional_args
def __init__(
@@ -1447,6 +1509,12 @@ def _more_tags(self) -> Dict[str, bool]:
tags["multilabel"] = True
return tags
+ def __sklearn_tags__(self) -> _sklearn_Tags:
+ tags = super().__sklearn_tags__()
+ tags_dict = self._more_tags()
+ tags.classifier_tags.multi_label = tags_dict["multilabel"]
+ return tags
+
@_deprecate_positional_args
def fit(
self,
@@ -1717,7 +1785,7 @@ def fit(
"Implementation of the scikit-learn API for XGBoost regression.",
["estimators", "model", "objective"],
)
-class XGBRegressor(XGBModel, XGBRegressorBase):
+class XGBRegressor(XGBRegressorBase, XGBModel):
# pylint: disable=missing-docstring
@_deprecate_positional_args
def __init__(
@@ -1731,6 +1799,13 @@ def _more_tags(self) -> Dict[str, bool]:
tags["multioutput_only"] = False
return tags
+ def __sklearn_tags__(self) -> _sklearn_Tags:
+ tags = super().__sklearn_tags__()
+ tags_dict = self._more_tags()
+ tags.target_tags.multi_output = tags_dict["multioutput"]
+ tags.target_tags.single_output = not tags_dict["multioutput_only"]
+ return tags
+
@xgboost_model_doc(
"scikit-learn API for XGBoost random forest regression.",
@@ -1858,7 +1933,7 @@ def _get_qid(
`qid` can be a special column of input `X` instead of a separated parameter, see
:py:meth:`fit` for more info.""",
)
-class XGBRanker(XGBModel, XGBRankerMixIn):
+class XGBRanker(XGBRankerMixIn, XGBModel):
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
@_deprecate_positional_args
def __init__(self, *, objective: str = "rank:ndcg", **kwargs: Any):
diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py
index 591144cd1c27..5caecc7fbd71 100644
--- a/python-package/xgboost/spark/core.py
+++ b/python-package/xgboost/spark/core.py
@@ -2,8 +2,8 @@
import base64
-# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
-# pylint: disable=too-few-public-methods, too-many-lines, too-many-branches
+# pylint: disable=fixme, protected-access, no-member, invalid-name
+# pylint: disable=too-many-lines, too-many-branches
import json
import logging
import os
diff --git a/python-package/xgboost/spark/estimator.py b/python-package/xgboost/spark/estimator.py
index 51e2e946f8a5..522b74b7b83b 100644
--- a/python-package/xgboost/spark/estimator.py
+++ b/python-package/xgboost/spark/estimator.py
@@ -1,7 +1,6 @@
"""Xgboost pyspark integration submodule for estimator API."""
-# pylint: disable=too-many-ancestors
-# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
+# pylint: disable=fixme, protected-access, no-member, invalid-name
# pylint: disable=unused-argument, too-many-locals
import warnings
diff --git a/python-package/xgboost/spark/params.py b/python-package/xgboost/spark/params.py
index a177c73fe413..f173d3301286 100644
--- a/python-package/xgboost/spark/params.py
+++ b/python-package/xgboost/spark/params.py
@@ -2,7 +2,6 @@
from typing import Dict
-# pylint: disable=too-few-public-methods
from pyspark.ml.param import TypeConverters
from pyspark.ml.param.shared import Param, Params
diff --git a/python-package/xgboost/spark/utils.py b/python-package/xgboost/spark/utils.py
index 0a421031ecd4..177df99c74ce 100644
--- a/python-package/xgboost/spark/utils.py
+++ b/python-package/xgboost/spark/utils.py
@@ -43,7 +43,7 @@ def _get_default_params_from_func(
return filtered_params_dict
-class CommunicatorContext(CCtx): # pylint: disable=too-few-public-methods
+class CommunicatorContext(CCtx):
"""Context with PySpark specific task ID."""
def __init__(self, context: BarrierTaskContext, **args: Any) -> None:
diff --git a/python-package/xgboost/testing/data.py b/python-package/xgboost/testing/data.py
index 4071219c44ef..91b9ca6d330a 100644
--- a/python-package/xgboost/testing/data.py
+++ b/python-package/xgboost/testing/data.py
@@ -564,7 +564,7 @@ def is_binary(self) -> bool:
return self.max_rel == 1
-class PBM: # pylint: disable=too-few-public-methods
+class PBM:
"""Simulate click data with position bias model. There are other models available in
`ULTRA `_ like the cascading model.
diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh
index a0462d738754..fcbde49f2e19 100755
--- a/tests/buildkite/conftest.sh
+++ b/tests/buildkite/conftest.sh
@@ -22,9 +22,9 @@ function set_buildkite_env_vars_in_container {
set -x
-CUDA_VERSION=11.8.0
-NCCL_VERSION=2.16.5-1
-RAPIDS_VERSION=24.06
+CUDA_VERSION=12.8.0
+NCCL_VERSION=2.25.1-1
+RAPIDS_VERSION=24.12
DEV_RAPIDS_VERSION=24.06
SPARK_VERSION=3.5.1
JDK_VERSION=8
diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml
index ee9637b8bd25..7721a3d5a30a 100644
--- a/tests/buildkite/pipeline.yml
+++ b/tests/buildkite/pipeline.yml
@@ -21,11 +21,6 @@ steps:
queue: linux-amd64-cpu
- wait
#### -------- BUILD --------
- - label: ":console: Run clang-tidy"
- command: "tests/buildkite/run-clang-tidy.sh"
- key: run-clang-tidy
- agents:
- queue: linux-amd64-cpu
- label: ":console: Build CPU"
command: "tests/buildkite/build-cpu.sh"
key: build-cpu
@@ -41,11 +36,6 @@ steps:
key: build-cuda
agents:
queue: linux-amd64-cpu
- - label: ":console: Build CUDA with RMM"
- command: "tests/buildkite/build-cuda-with-rmm.sh"
- key: build-cuda-with-rmm
- agents:
- queue: linux-amd64-cpu
- label: ":console: Build R package with CUDA"
command: "tests/buildkite/build-gpu-rpkg.sh"
key: build-gpu-rpkg
diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh
index d7197db2efce..e5e42db105cf 100755
--- a/tests/buildkite/test-cpp-gpu.sh
+++ b/tests/buildkite/test-cpp-gpu.sh
@@ -4,6 +4,9 @@ set -euo pipefail
source tests/buildkite/conftest.sh
+# Work around https://github.com/dmlc/xgboost/issues/11154
+export CI_DOCKER_EXTRA_PARAMS_INIT='-e NCCL_RAS_ENABLE=0'
+
echo "--- Run Google Tests with CUDA, using a GPU"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost
@@ -12,13 +15,3 @@ tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
build/testxgboost
-
-echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
-rm -rfv build/
-buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
-chmod +x build/testxgboost
-tests/ci_build/ci_build.sh gpu --use-gpus \
- --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
- --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
- --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
- build/testxgboost --use-rmm-pool
diff --git a/tests/buildkite/test-cpp-mgpu.sh b/tests/buildkite/test-cpp-mgpu.sh
index 65614b191d04..2f9e037a8394 100755
--- a/tests/buildkite/test-cpp-mgpu.sh
+++ b/tests/buildkite/test-cpp-mgpu.sh
@@ -4,8 +4,9 @@ set -euo pipefail
source tests/buildkite/conftest.sh
+# Work around https://github.com/dmlc/xgboost/issues/11154
# Allocate extra space in /dev/shm to enable NCCL
-export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
+export CI_DOCKER_EXTRA_PARAMS_INIT='-e NCCL_RAS_ENABLE=0 --shm-size=4g'
echo "--- Run Google Tests with CUDA, using multiple GPUs"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu
index eab55672ec32..8cfac11bb20a 100644
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@@ -6,7 +6,7 @@ ARG NCCL_VERSION_ARG
# Environment
ENV DEBIAN_FRONTEND noninteractive
-SHELL ["/bin/bash", "-c"] # Use Bash as shell
+SHELL ["/bin/bash", "-c"]
# Install all basic requirements
RUN \
@@ -23,11 +23,12 @@ ENV PATH=/opt/miniforge/bin:$PATH
RUN \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \
- python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
+ python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cuda-version=$CUDA_VERSION_ARG \
"nccl>=${NCCL_SHORT_VER}" \
dask \
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
- numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz "hypothesis<=6.112" \
+ numpy pytest pytest-timeout scipy scikit-learn \
+ pandas matplotlib wheel python-kubernetes urllib3 graphviz "hypothesis<=6.112" \
"pyspark>=3.4.0" cloudpickle cuda-python && \
mamba clean --all && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
diff --git a/tests/ci_build/conda_env/linux_sycl_test.yml b/tests/ci_build/conda_env/linux_sycl_test.yml
index e22240a2685c..69df013675a1 100644
--- a/tests/ci_build/conda_env/linux_sycl_test.yml
+++ b/tests/ci_build/conda_env/linux_sycl_test.yml
@@ -19,3 +19,4 @@ dependencies:
- pytest-cov
- dpcpp_linux-64
- onedpl-devel
+- intel-openmp
diff --git a/tests/ci_build/conda_env/win64_test.yml b/tests/ci_build/conda_env/win64_test.yml
index f131c6585026..f40e65a633c7 100644
--- a/tests/ci_build/conda_env/win64_test.yml
+++ b/tests/ci_build/conda_env/win64_test.yml
@@ -2,7 +2,7 @@ name: win64_env
channels:
- conda-forge
dependencies:
-- python=3.8
+- python=3.10
- numpy
- scipy
- matplotlib
diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh
index dd43f43a9ce6..620ba6047c74 100755
--- a/tests/ci_build/test_python.sh
+++ b/tests/ci_build/test_python.sh
@@ -67,6 +67,7 @@ case "$suite" in
set -x
install_xgboost
setup_pyspark_envs
+ export NCCL_RAS_ENABLE=0
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark
diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt
index 2748e13098b6..e36ffae9dfe9 100644
--- a/tests/cpp/CMakeLists.txt
+++ b/tests/cpp/CMakeLists.txt
@@ -70,6 +70,8 @@ target_include_directories(testxgboost
${xgboost_SOURCE_DIR}/rabit/include)
target_link_libraries(testxgboost
PRIVATE
+ $
+ $
GTest::gtest GTest::gmock)
set_output_directory(testxgboost ${xgboost_BINARY_DIR})
diff --git a/tests/cpp/collective/test_worker.h b/tests/cpp/collective/test_worker.h
index 4fd982b06f35..3dee34268c37 100644
--- a/tests/cpp/collective/test_worker.h
+++ b/tests/cpp/collective/test_worker.h
@@ -150,13 +150,13 @@ template
void TestDistributedGlobal(std::int32_t n_workers, WorkerFn worker_fn, bool need_finalize = true,
std::chrono::seconds test_timeout = std::chrono::seconds{30}) {
system::SocketStartup();
- std::chrono::seconds timeout{1};
+ std::chrono::seconds poll_timeout{5};
std::string host;
auto rc = GetHostAddress(&host);
SafeColl(rc);
- RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)};
+ RabitTracker tracker{MakeTrackerConfig(host, n_workers, poll_timeout)};
auto fut = tracker.Run();
std::vector workers;
@@ -165,7 +165,7 @@ void TestDistributedGlobal(std::int32_t n_workers, WorkerFn worker_fn, bool need
for (std::int32_t i = 0; i < n_workers; ++i) {
workers.emplace_back([=] {
auto fut = std::async(std::launch::async, [=] {
- auto config = MakeDistributedTestConfig(host, port, timeout, i);
+ auto config = MakeDistributedTestConfig(host, port, poll_timeout, i);
Init(config);
worker_fn();
if (need_finalize) {
diff --git a/tests/cpp/common/test_quantile.cc b/tests/cpp/common/test_quantile.cc
index fef7db9dc445..1ef6572599fc 100644
--- a/tests/cpp/common/test_quantile.cc
+++ b/tests/cpp/common/test_quantile.cc
@@ -283,22 +283,22 @@ void TestColSplitQuantile(size_t rows, size_t cols) {
}
} // anonymous namespace
-TEST(Quantile, ColSplitBasic) {
+TEST(Quantile, ColumnSplitBasic) {
constexpr size_t kRows = 10, kCols = 10;
TestColSplitQuantile(kRows, kCols);
}
-TEST(Quantile, ColSplit) {
+TEST(Quantile, ColumnSplit) {
constexpr size_t kRows = 4000, kCols = 200;
TestColSplitQuantile(kRows, kCols);
}
-TEST(Quantile, ColSplitSortedBasic) {
+TEST(Quantile, ColumnSplitSortedBasic) {
constexpr size_t kRows = 10, kCols = 10;
TestColSplitQuantile(kRows, kCols);
}
-TEST(Quantile, ColSplitSorted) {
+TEST(Quantile, ColumnSplitSorted) {
constexpr size_t kRows = 4000, kCols = 200;
TestColSplitQuantile(kRows, kCols);
}
diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc
index 0e52c9e23b2b..4db2aa5da8d1 100644
--- a/tests/cpp/tree/test_approx.cc
+++ b/tests/cpp/tree/test_approx.cc
@@ -123,7 +123,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
}
} // anonymous namespace
-TEST(Approx, PartitionerColSplit) {
+TEST(Approx, PartitionerColumnSplit) {
size_t n_samples = 1024, n_features = 16, base_rowid = 0;
auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
auto hess = GenerateHess(n_samples);
@@ -153,7 +153,7 @@ TEST(Approx, PartitionerColSplit) {
}
namespace {
-class TestApproxColSplit : public ::testing::TestWithParam> {
+class TestApproxColumnSplit : public ::testing::TestWithParam> {
public:
void Run() {
auto [categorical, sparsity] = GetParam();
@@ -162,9 +162,9 @@ class TestApproxColSplit : public ::testing::TestWithParamRun(); }
+TEST_P(TestApproxColumnSplit, Basic) { this->Run(); }
-INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColSplit, ::testing::ValuesIn([]() {
+INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColumnSplit, ::testing::ValuesIn([]() {
std::vector> params;
for (auto categorical : {true, false}) {
for (auto sparsity : {0.0f, 0.6f}) {
diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc
index 8e524dc7e979..2d372b368cab 100644
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -198,12 +198,12 @@ void TestColumnSplitPartitioner(bst_target_t n_targets) {
}
} // anonymous namespace
-TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner(1); }
+TEST(QuantileHist, PartitionerColumnSplit) { TestColumnSplitPartitioner(1); }
-TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner(3); }
+TEST(QuantileHist, MultiPartitionerColumnSplit) { TestColumnSplitPartitioner(3); }
namespace {
-class TestHistColSplit : public ::testing::TestWithParam> {
+class TestHistColumnSplit : public ::testing::TestWithParam> {
public:
void Run() {
auto [n_targets, categorical, sparsity] = GetParam();
@@ -212,9 +212,9 @@ class TestHistColSplit : public ::testing::TestWithParamRun(); }
+TEST_P(TestHistColumnSplit, Basic) { this->Run(); }
-INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColSplit, ::testing::ValuesIn([]() {
+INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColumnSplit, ::testing::ValuesIn([]() {
std::vector> params;
for (auto categorical : {true, false}) {
for (auto sparsity : {0.0f, 0.6f}) {
diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py
index a1bc13cb8ec8..1a2128b8612a 100644
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -453,7 +453,7 @@ def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None:
)
@settings(deadline=None, max_examples=20, print_blob=True)
def test_predict_categorical_split(self, df):
- from sklearn.metrics import mean_squared_error
+ from sklearn.metrics import root_mean_squared_error
df = df.astype("category")
x0, x1 = df["x0"].to_numpy(), df["x1"].to_numpy()
@@ -480,7 +480,7 @@ def test_predict_categorical_split(self, df):
)
bst.set_param({"device": "cuda:0"})
pred = bst.predict(dtrain)
- rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
+ rmse = root_mean_squared_error(y_true=y, y_pred=pred)
np.testing.assert_almost_equal(
rmse, eval_history["train"]["rmse"][-1], decimal=5
)
@@ -576,10 +576,9 @@ def test_dtypes(self):
# boolean
orig = cp.random.binomial(1, 0.5, size=rows * cols).reshape(rows, cols)
predt_orig = booster.inplace_predict(orig)
- for dtype in [cp.bool8, cp.bool_]:
- X = cp.array(orig, dtype=dtype)
- predt = booster.inplace_predict(X)
- cp.testing.assert_allclose(predt, predt_orig)
+ X = cp.array(orig, dtype=cp.bool_)
+ predt = booster.inplace_predict(X)
+ cp.testing.assert_allclose(predt, predt_orig)
# unsupported types
for dtype in [
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 61f33832ab48..6cb4a58a0de7 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -2,6 +2,7 @@
import os
import pickle
import random
+import re
import tempfile
import warnings
from typing import Callable, Optional
@@ -795,6 +796,32 @@ def get_tm(clf: xgb.XGBClassifier) -> str:
assert clf.get_params()["tree_method"] is None
+def test_get_params_works_as_expected():
+ # XGBModel -> BaseEstimator
+ params = xgb.XGBModel(max_depth=2).get_params()
+ assert params["max_depth"] == 2
+ # 'objective' defaults to None in the signature of XGBModel
+ assert params["objective"] is None
+
+ # XGBRegressor -> XGBModel -> BaseEstimator
+ params = xgb.XGBRegressor(max_depth=3).get_params()
+ assert params["max_depth"] == 3
+ # 'objective' defaults to 'reg:squarederror' in the signature of XGBRegressor
+ assert params["objective"] == "reg:squarederror"
+ # 'colsample_bynode' defaults to 'None' for XGBModel (which XGBRegressor inherits from), so it
+ # should be in get_params() output
+ assert params["colsample_bynode"] is None
+
+ # XGBRFRegressor -> XGBRegressor -> XGBModel -> BaseEstimator
+ params = xgb.XGBRFRegressor(max_depth=4, objective="reg:tweedie").get_params()
+ assert params["max_depth"] == 4
+ # 'objective' is a keyword argument for XGBRegressor, so it should be in get_params() output
+ # ... but values passed through kwargs should override the default from the signature of XGBRegressor
+ assert params["objective"] == "reg:tweedie"
+ # 'colsample_bynode' defaults to 0.8 for XGBRFRegressor...that should be preferred to the None from XGBRegressor
+ assert params["colsample_bynode"] == 0.8
+
+
def test_kwargs_error():
params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1}
with pytest.raises(TypeError):
@@ -1284,7 +1311,7 @@ def test_data_initialization() -> None:
validate_data_initialization(xgb.QuantileDMatrix, xgb.XGBClassifier, X, y)
-@parametrize_with_checks([xgb.XGBRegressor()])
+@parametrize_with_checks([xgb.XGBRegressor(enable_categorical=True)])
def test_estimator_reg(estimator, check):
if os.environ["PYTEST_CURRENT_TEST"].find("check_supervised_y_no_nan") != -1:
# The test uses float64 and requires the error message to contain:
@@ -1477,10 +1504,62 @@ def test_tags() -> None:
assert tags["multioutput"] is True
assert tags["multioutput_only"] is False
- for clf in [xgb.XGBClassifier()]:
+ for clf in [xgb.XGBClassifier(), xgb.XGBRFClassifier()]:
tags = clf._more_tags()
assert "multioutput" not in tags
assert tags["multilabel"] is True
tags = xgb.XGBRanker()._more_tags()
assert "multioutput" not in tags
+
+
+# the try-excepts in this test should be removed once xgboost's
+# minimum supported scikit-learn version is at least 1.6
+def test_sklearn_tags():
+
+ def _assert_has_xgbmodel_tags(tags):
+ # values set by XGBModel.__sklearn_tags__()
+ assert tags.non_deterministic is False
+ assert tags.no_validation is True
+ assert tags.input_tags.allow_nan is True
+
+ for reg in [xgb.XGBRegressor(), xgb.XGBRFRegressor()]:
+ try:
+ # if no AttributeError was thrown, we must be using scikit-learn>=1.6,
+ # and so the actual effects of __sklearn_tags__() should be tested
+ tags = reg.__sklearn_tags__()
+ _assert_has_xgbmodel_tags(tags)
+ # regressor-specific values
+ assert tags.estimator_type == "regressor"
+ assert tags.regressor_tags is not None
+ assert tags.classifier_tags is None
+ assert tags.target_tags.multi_output is True
+ assert tags.target_tags.single_output is True
+ except AttributeError as err:
+ # only the exact error we expected to be raised should be raised
+ assert bool(re.search(r"__sklearn_tags__.* should not be called", str(err)))
+
+ for clf in [xgb.XGBClassifier(), xgb.XGBRFClassifier()]:
+ try:
+ # if no AttributeError was thrown, we must be using scikit-learn>=1.6,
+ # and so the actual effects of __sklearn_tags__() should be tested
+ tags = clf.__sklearn_tags__()
+ _assert_has_xgbmodel_tags(tags)
+ # classifier-specific values
+ assert tags.estimator_type == "classifier"
+ assert tags.regressor_tags is None
+ assert tags.classifier_tags is not None
+ assert tags.classifier_tags.multi_label is True
+ except AttributeError as err:
+ # only the exact error we expected to be raised should be raised
+ assert bool(re.search(r"__sklearn_tags__.* should not be called", str(err)))
+
+ for rnk in [xgb.XGBRanker(),]:
+ try:
+ # if no AttributeError was thrown, we must be using scikit-learn>=1.6,
+ # and so the actual effects of __sklearn_tags__() should be tested
+ tags = rnk.__sklearn_tags__()
+ _assert_has_xgbmodel_tags(tags)
+ except AttributeError as err:
+ # only the exact error we expected to be raised should be raised
+ assert bool(re.search(r"__sklearn_tags__.* should not be called", str(err)))
diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
index 5b6400424591..53629b00d5b8 100644
--- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
@@ -10,6 +10,7 @@
import pytest
from hypothesis import given, note, settings, strategies
from hypothesis._settings import duration
+from packaging.version import parse as parse_version
import xgboost as xgb
from xgboost import testing as tm
@@ -19,7 +20,7 @@
pytestmark = [
pytest.mark.skipif(**tm.no_dask()),
pytest.mark.skipif(**tm.no_dask_cuda()),
- tm.timeout(60),
+ tm.timeout(120),
]
from ..test_with_dask.test_with_dask import generate_array
@@ -41,6 +42,7 @@
try:
import cudf
import dask.dataframe as dd
+ from dask import __version__ as dask_version
from dask import array as da
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
@@ -48,7 +50,12 @@
from xgboost import dask as dxgb
from xgboost.testing.dask import check_init_estimation, check_uneven_nan
except ImportError:
- pass
+ dask_version = None
+
+
+dask_version_ge110 = dask_version and parse_version(dask_version) >= parse_version(
+ "2024.11.0"
+)
def run_with_dask_dataframe(DMatrixT: Type, client: Client) -> None:
@@ -99,6 +106,8 @@ def is_df(part: T) -> T:
cp.testing.assert_allclose(predt.values.compute(), single_node)
+ # Work around https://github.com/dmlc/xgboost/issues/10752
+ X.columns = X.columns.astype("object")
# Make sure the output can be integrated back to original dataframe
X["predict"] = predictions
X["inplace_predict"] = series_predictions
@@ -376,6 +385,9 @@ def test_early_stopping(self, local_cuda_client: Client) -> None:
dump = booster.get_dump(dump_format="json")
assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
+ @pytest.mark.xfail(
+ dask_version_ge110, reason="Test cannot pass with Dask 2024.11.0+"
+ )
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.parametrize("model", ["boosting"])
def test_dask_classifier(self, model: str, local_cuda_client: Client) -> None: