diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml index a2b8b1e6d214..612834bd0301 100644 --- a/.github/workflows/jvm_tests.yml +++ b/.github/workflows/jvm_tests.yml @@ -30,7 +30,7 @@ jobs: - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest activate-environment: jvm_tests environment-file: tests/ci_build/conda_env/jvm_tests.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 621c8b465f9b..c866182706f0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -81,7 +81,7 @@ jobs: submodules: 'true' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest activate-environment: linux_sycl_test environment-file: tests/ci_build/conda_env/linux_sycl_test.yml @@ -123,7 +123,7 @@ jobs: submodules: 'true' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest activate-environment: cpp_test environment-file: tests/ci_build/conda_env/cpp_test.yml diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index e3b60f12cc1e..4d6e38d24e30 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -26,7 +26,7 @@ jobs: submodules: 'true' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest activate-environment: python_lint environment-file: tests/ci_build/conda_env/python_lint.yml @@ -58,7 +58,7 @@ jobs: submodules: 'true' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest activate-environment: sdist_test environment-file: tests/ci_build/conda_env/sdist_test.yml @@ -130,7 +130,7 @@ jobs: - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest activate-environment: macos_cpu_test environment-file: tests/ci_build/conda_env/macos_cpu_test.yml @@ -227,7 +227,7 @@ jobs: - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest activate-environment: linux_cpu_test environment-file: tests/ci_build/conda_env/linux_cpu_test.yml @@ -280,7 +280,7 @@ jobs: - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest activate-environment: linux_sycl_test environment-file: tests/ci_build/conda_env/linux_sycl_test.yml diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml index cd611f46371f..4fdd3e9373cc 100644 --- a/.github/workflows/python_wheels.yml +++ b/.github/workflows/python_wheels.yml @@ -34,7 +34,7 @@ jobs: run: brew install libomp - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest python-version: 3.9 use-mamba: true diff --git a/.gitignore b/.gitignore index 4480c81029a5..3725dd1603d9 100644 --- a/.gitignore +++ b/.gitignore @@ -139,11 +139,13 @@ credentials.csv .bloop # python tests +*.bin demo/**/*.txt *.dmatrix .hypothesis __MACOSX/ model*.json +/tests/python/models/models/ # R tests *.htm diff --git a/CMakeLists.txt b/CMakeLists.txt index 0df914467e6b..5fb464b673b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ if(PLUGIN_SYCL) string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() -project(xgboost LANGUAGES CXX C VERSION 2.1.3) +project(xgboost LANGUAGES CXX C VERSION 2.1.4) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") @@ -327,7 +327,6 @@ if(PLUGIN_RMM) list(REMOVE_ITEM rmm_link_libs CUDA::cudart) list(APPEND rmm_link_libs CUDA::cudart_static) set_target_properties(rmm::rmm PROPERTIES INTERFACE_LINK_LIBRARIES "${rmm_link_libs}") - get_target_property(rmm_link_libs rmm::rmm INTERFACE_LINK_LIBRARIES) endif() if(PLUGIN_SYCL) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 1c58e6166b3b..f77ca73ec343 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 2.1.3.1 -Date: 2024-11-26 +Version: 2.1.4.1 +Date: 2025-02-06 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), diff --git a/R-package/configure b/R-package/configure index e353456c9390..f1f005752270 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for xgboost 2.1.3. +# Generated by GNU Autoconf 2.71 for xgboost 2.1.4. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -607,8 +607,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='2.1.3' -PACKAGE_STRING='xgboost 2.1.3' +PACKAGE_VERSION='2.1.4' +PACKAGE_STRING='xgboost 2.1.4' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1259,7 +1259,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 2.1.3 to adapt to many kinds of systems. +\`configure' configures xgboost 2.1.4 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1321,7 +1321,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 2.1.3:";; + short | recursive ) echo "Configuration of xgboost 2.1.4:";; esac cat <<\_ACEOF @@ -1404,7 +1404,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 2.1.3 +xgboost configure 2.1.4 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1603,7 +1603,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 2.1.3, which was +It was created by xgboost $as_me 2.1.4, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3709,7 +3709,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 2.1.3, which was +This file was extended by xgboost $as_me 2.1.4, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3773,7 +3773,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -xgboost config.status 2.1.3 +xgboost config.status 2.1.4 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index 06e1b5386974..bc3354795643 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[2.1.3],[],[xgboost],[]) +AC_INIT([xgboost],[2.1.4],[],[xgboost],[]) : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index b12302a166c0..4e66c15186eb 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -55,7 +55,9 @@ function(compute_cmake_cuda_archs archs) # Set up defaults based on CUDA varsion if(NOT CMAKE_CUDA_ARCHITECTURES) - if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") + if(CUDA_VERSION VERSION_GREATER_EQUAL "12.8") + set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90 100 120) + elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90) elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80) diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py index f53835ffbee9..7c48074e561e 100644 --- a/demo/dask/gpu_training.py +++ b/demo/dask/gpu_training.py @@ -50,8 +50,8 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d .. versionadded:: 1.2.0 """ - X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X)) - y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y)) + X = dd.from_dask_array(X).to_backend("cudf") + y = dd.from_dask_array(y).to_backend("cudf") # `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not # be used for anything else other than training unless a reference is specified. See diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index 4d3ed975fa4a..3cccc74f3e59 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 2 /* NOLINT */ #define XGBOOST_VER_MINOR 1 /* NOLINT */ -#define XGBOOST_VER_PATCH 3 /* NOLINT */ +#define XGBOOST_VER_PATCH 4 /* NOLINT */ #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 31af7cdaf608..14092a82176f 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.3 + 2.1.4 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 31ec46a04d60..876de03a8e45 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,11 +6,11 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.3 + 2.1.4 xgboost4j-example xgboost4j-example_2.12 - 2.1.3 + 2.1.4 jar diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 7bbcd5407bff..6569dc831530 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,12 +6,12 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.3 + 2.1.4 xgboost4j-flink xgboost4j-flink_2.12 - 2.1.3 + 2.1.4 2.2.0 diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 6a0ff0e7905f..493b6c5d00a0 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,11 +6,11 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.3 + 2.1.4 xgboost4j-gpu_2.12 xgboost4j-gpu - 2.1.3 + 2.1.4 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 92fe0b6994aa..8826259d7bf5 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.3 + 2.1.4 xgboost4j-spark-gpu xgboost4j-spark-gpu_2.12 diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index b5a401ee477b..0ca8c2f5ccb0 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.3 + 2.1.4 xgboost4j-spark xgboost4j-spark_2.12 diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 228f225ad5cd..fd3de238a128 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,11 +6,11 @@ ml.dmlc xgboost-jvm_2.12 - 2.1.3 + 2.1.4 xgboost4j xgboost4j_2.12 - 2.1.3 + 2.1.4 jar diff --git a/plugin/sycl/common/hist_util.cc b/plugin/sycl/common/hist_util.cc index 2f2417f3a29a..89b354425f34 100644 --- a/plugin/sycl/common/hist_util.cc +++ b/plugin/sycl/common/hist_util.cc @@ -9,7 +9,7 @@ #include "../data/gradient_index.h" #include "hist_util.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/common/hist_util.h b/plugin/sycl/common/hist_util.h index aa9b4f5817bb..59f05e3e5c81 100644 --- a/plugin/sycl/common/hist_util.h +++ b/plugin/sycl/common/hist_util.h @@ -15,7 +15,7 @@ #include "../../src/common/hist_util.h" #include "../data/gradient_index.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/common/partition_builder.h b/plugin/sycl/common/partition_builder.h index c520ff31fb8e..813ca5324238 100644 --- a/plugin/sycl/common/partition_builder.h +++ b/plugin/sycl/common/partition_builder.h @@ -25,7 +25,7 @@ #include "../data/gradient_index.h" #include "../tree/expand_entry.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/common/row_set.h b/plugin/sycl/common/row_set.h index 574adbf8d9b9..67734f321f7d 100644 --- a/plugin/sycl/common/row_set.h +++ b/plugin/sycl/common/row_set.h @@ -15,7 +15,7 @@ #include "../data.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/data.h b/plugin/sycl/data.h index 8f4bb2516f05..5df15a9b7223 100644 --- a/plugin/sycl/data.h +++ b/plugin/sycl/data.h @@ -22,7 +22,7 @@ #include "../../src/common/threading_utils.h" -#include "CL/sycl.hpp" +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/data/gradient_index.cc b/plugin/sycl/data/gradient_index.cc index e193b66894c9..0e6871154207 100644 --- a/plugin/sycl/data/gradient_index.cc +++ b/plugin/sycl/data/gradient_index.cc @@ -8,7 +8,7 @@ #include "gradient_index.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/data/gradient_index.h b/plugin/sycl/data/gradient_index.h index 13577025caa0..61a5ea2492e9 100644 --- a/plugin/sycl/data/gradient_index.h +++ b/plugin/sycl/data/gradient_index.h @@ -10,7 +10,7 @@ #include "../data.h" #include "../../src/common/hist_util.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/device_manager.h b/plugin/sycl/device_manager.h index 84d4b24c0aa8..6e411d8c0a11 100644 --- a/plugin/sycl/device_manager.h +++ b/plugin/sycl/device_manager.h @@ -10,7 +10,7 @@ #include #include -#include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wtautological-constant-compare" diff --git a/plugin/sycl/objective/multiclass_obj.cc b/plugin/sycl/objective/multiclass_obj.cc index 5dcc8c3de599..669aeabd546b 100644 --- a/plugin/sycl/objective/multiclass_obj.cc +++ b/plugin/sycl/objective/multiclass_obj.cc @@ -23,7 +23,7 @@ #include "../../../src/objective/multiclass_param.h" #include "../device_manager.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/objective/regression_obj.cc b/plugin/sycl/objective/regression_obj.cc index 82467a7c4848..eb9ba1a615d9 100644 --- a/plugin/sycl/objective/regression_obj.cc +++ b/plugin/sycl/objective/regression_obj.cc @@ -24,12 +24,13 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wtautological-constant-compare" #include "../../../src/objective/regression_loss.h" +#include "../../../src/objective/adaptive.h" #pragma GCC diagnostic pop #include "../../../src/objective/regression_param.h" #include "../device_manager.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/predictor/predictor.cc b/plugin/sycl/predictor/predictor.cc index c941bca102e7..5b8f7b1ca9c8 100755 --- a/plugin/sycl/predictor/predictor.cc +++ b/plugin/sycl/predictor/predictor.cc @@ -10,7 +10,7 @@ #include #include -#include +#include #include "../data.h" diff --git a/plugin/sycl/tree/param.h b/plugin/sycl/tree/param.h index a83a7ad138ab..3c30c4e5c822 100644 --- a/plugin/sycl/tree/param.h +++ b/plugin/sycl/tree/param.h @@ -19,7 +19,7 @@ #include "../src/tree/param.h" #pragma GCC diagnostic pop -#include +#include namespace xgboost { namespace sycl { diff --git a/plugin/sycl/tree/split_evaluator.h b/plugin/sycl/tree/split_evaluator.h index 2f1e8c7c4e66..7565298748bd 100644 --- a/plugin/sycl/tree/split_evaluator.h +++ b/plugin/sycl/tree/split_evaluator.h @@ -21,7 +21,7 @@ #include "../../src/common/math.h" #include "../../src/tree/param.h" -#include +#include namespace xgboost { namespace sycl { diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index e6cfde49d7c9..5a3b897d3f7d 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -14,7 +14,7 @@ authors = [ { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } ] -version = "2.1.3" +version = "2.1.4" requires-python = ">=3.8" license = { text = "Apache-2.0" } classifiers = [ @@ -62,6 +62,8 @@ extension-pkg-whitelist = ["numpy"] disable = [ "attribute-defined-outside-init", "import-outside-toplevel", + "too-few-public-methods", + "too-many-ancestors", "too-many-nested-blocks", "unexpected-special-method-signature", "unsubscriptable-object", diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index ac2cdeba0137..7d2ed7c70205 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -2.1.3 +2.1.4 diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index 729750f1f354..36063d137d1c 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -45,32 +45,43 @@ def lazy_isinstance(instance: Any, module: str, name: str) -> bool: # sklearn try: + from sklearn import __version__ as _sklearn_version from sklearn.base import BaseEstimator as XGBModelBase from sklearn.base import ClassifierMixin as XGBClassifierBase from sklearn.base import RegressorMixin as XGBRegressorBase - from sklearn.preprocessing import LabelEncoder try: - from sklearn.model_selection import KFold as XGBKFold from sklearn.model_selection import StratifiedKFold as XGBStratifiedKFold except ImportError: - from sklearn.cross_validation import KFold as XGBKFold from sklearn.cross_validation import StratifiedKFold as XGBStratifiedKFold + # sklearn.utils Tags types can be imported unconditionally once + # xgboost's minimum scikit-learn version is 1.6 or higher + try: + from sklearn.utils import Tags as _sklearn_Tags + except ImportError: + _sklearn_Tags = object + SKLEARN_INSTALLED = True except ImportError: SKLEARN_INSTALLED = False # used for compatibility without sklearn - XGBModelBase = object - XGBClassifierBase = object - XGBRegressorBase = object - LabelEncoder = object + class XGBModelBase: # type: ignore[no-redef] + """Dummy class for sklearn.base.BaseEstimator.""" + + class XGBClassifierBase: # type: ignore[no-redef] + """Dummy class for sklearn.base.ClassifierMixin.""" + + class XGBRegressorBase: # type: ignore[no-redef] + """Dummy class for sklearn.base.RegressorMixin.""" - XGBKFold = None XGBStratifiedKFold = None + _sklearn_Tags = object + _sklearn_version = object + _logger = logging.getLogger(__name__) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 118b0013d3d8..8115e7dcdaee 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -410,7 +410,7 @@ def c_array( def from_array_interface(interface: dict) -> NumpyOrCupy: """Convert array interface to numpy or cupy array""" - class Array: # pylint: disable=too-few-public-methods + class Array: """Wrapper type for communicating with numpy and cupy.""" _interface: Optional[dict] = None diff --git a/python-package/xgboost/dask/__init__.py b/python-package/xgboost/dask/__init__.py index 44eae0c51837..0e30f6d7f679 100644 --- a/python-package/xgboost/dask/__init__.py +++ b/python-package/xgboost/dask/__init__.py @@ -1,8 +1,6 @@ # pylint: disable=too-many-arguments, too-many-locals # pylint: disable=missing-class-docstring, invalid-name # pylint: disable=too-many-lines -# pylint: disable=too-few-public-methods -# pylint: disable=import-error """ Dask extensions for distributed training ---------------------------------------- diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 560a3a8ed285..f5b6152eaf51 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -29,7 +29,14 @@ # Do not use class names on scikit-learn directly. Re-define the classes on # .compat to guarantee the behavior without scikit-learn -from .compat import SKLEARN_INSTALLED, XGBClassifierBase, XGBModelBase, XGBRegressorBase +from .compat import ( + SKLEARN_INSTALLED, + XGBClassifierBase, + XGBModelBase, + XGBRegressorBase, + _sklearn_Tags, + _sklearn_version, +) from .config import config_context from .core import ( Booster, @@ -45,7 +52,7 @@ from .training import train -class XGBRankerMixIn: # pylint: disable=too-few-public-methods +class XGBRankerMixIn: """MixIn for ranking, defines the _estimator_type usually defined in scikit-learn base classes. @@ -69,7 +76,7 @@ def _can_use_qdm(tree_method: Optional[str]) -> bool: return tree_method in ("hist", "gpu_hist", None, "auto") -class _SklObjWProto(Protocol): # pylint: disable=too-few-public-methods +class _SklObjWProto(Protocol): def __call__( self, y_true: ArrayLike, @@ -782,11 +789,52 @@ def __init__( def _more_tags(self) -> Dict[str, bool]: """Tags used for scikit-learn data validation.""" - tags = {"allow_nan": True, "no_validation": True} + tags = {"allow_nan": True, "no_validation": True, "sparse": True} if hasattr(self, "kwargs") and self.kwargs.get("updater") == "shotgun": tags["non_deterministic"] = True + + tags["categorical"] = self.enable_categorical + return tags + + @staticmethod + def _update_sklearn_tags_from_dict( + *, + tags: _sklearn_Tags, + tags_dict: Dict[str, bool], + ) -> _sklearn_Tags: + """Update ``sklearn.utils.Tags`` inherited from ``scikit-learn`` base classes. + + ``scikit-learn`` 1.6 introduced a dataclass-based interface for estimator tags. + ref: https://github.com/scikit-learn/scikit-learn/pull/29677 + + This method handles updating that instance based on the values in + ``self._more_tags()``. + + """ + tags.non_deterministic = tags_dict.get("non_deterministic", False) + tags.no_validation = tags_dict["no_validation"] + tags.input_tags.allow_nan = tags_dict["allow_nan"] + tags.input_tags.sparse = tags_dict["sparse"] + tags.input_tags.categorical = tags_dict["categorical"] return tags + def __sklearn_tags__(self) -> _sklearn_Tags: + # XGBModelBase.__sklearn_tags__() cannot be called unconditionally, + # because that method isn't defined for scikit-learn<1.6 + if not hasattr(XGBModelBase, "__sklearn_tags__"): + err_msg = ( + "__sklearn_tags__() should not be called when using scikit-learn<1.6. " + f"Detected version: {_sklearn_version}" + ) + raise AttributeError(err_msg) + + # take whatever tags are provided by BaseEstimator, then modify + # them with XGBoost-specific values + return self._update_sklearn_tags_from_dict( + tags=super().__sklearn_tags__(), # pylint: disable=no-member + tags_dict=self._more_tags(), + ) + def __sklearn_is_fitted__(self) -> bool: return hasattr(self, "_Booster") @@ -841,13 +889,27 @@ def get_params(self, deep: bool = True) -> Dict[str, Any]: """Get parameters.""" # Based on: https://stackoverflow.com/questions/59248211 # The basic flow in `get_params` is: - # 0. Return parameters in subclass first, by using inspect. - # 1. Return parameters in `XGBModel` (the base class). + # 0. Return parameters in subclass (self.__class__) first, by using inspect. + # 1. Return parameters in all parent classes (especially `XGBModel`). # 2. Return whatever in `**kwargs`. # 3. Merge them. + # + # This needs to accommodate being called recursively in the following + # inheritance graphs (and similar for classification and ranking): + # + # XGBRFRegressor -> XGBRegressor -> XGBModel -> BaseEstimator + # XGBRegressor -> XGBModel -> BaseEstimator + # XGBModel -> BaseEstimator + # params = super().get_params(deep) cp = copy.copy(self) - cp.__class__ = cp.__class__.__bases__[0] + # If the immediate parent defines get_params(), use that. + if callable(getattr(cp.__class__.__bases__[0], "get_params", None)): + cp.__class__ = cp.__class__.__bases__[0] + # Otherwise, skip it and assume the next class will have it. + # This is here primarily for cases where the first class in MRO is a scikit-learn mixin. + else: + cp.__class__ = cp.__class__.__bases__[1] params.update(cp.__class__.get_params(cp, deep)) # if kwargs is a dict, update params accordingly if hasattr(self, "kwargs") and isinstance(self.kwargs, dict): @@ -1431,7 +1493,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) -> Number of boosting rounds. """, ) -class XGBClassifier(XGBModel, XGBClassifierBase): +class XGBClassifier(XGBClassifierBase, XGBModel): # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes @_deprecate_positional_args def __init__( @@ -1447,6 +1509,12 @@ def _more_tags(self) -> Dict[str, bool]: tags["multilabel"] = True return tags + def __sklearn_tags__(self) -> _sklearn_Tags: + tags = super().__sklearn_tags__() + tags_dict = self._more_tags() + tags.classifier_tags.multi_label = tags_dict["multilabel"] + return tags + @_deprecate_positional_args def fit( self, @@ -1717,7 +1785,7 @@ def fit( "Implementation of the scikit-learn API for XGBoost regression.", ["estimators", "model", "objective"], ) -class XGBRegressor(XGBModel, XGBRegressorBase): +class XGBRegressor(XGBRegressorBase, XGBModel): # pylint: disable=missing-docstring @_deprecate_positional_args def __init__( @@ -1731,6 +1799,13 @@ def _more_tags(self) -> Dict[str, bool]: tags["multioutput_only"] = False return tags + def __sklearn_tags__(self) -> _sklearn_Tags: + tags = super().__sklearn_tags__() + tags_dict = self._more_tags() + tags.target_tags.multi_output = tags_dict["multioutput"] + tags.target_tags.single_output = not tags_dict["multioutput_only"] + return tags + @xgboost_model_doc( "scikit-learn API for XGBoost random forest regression.", @@ -1858,7 +1933,7 @@ def _get_qid( `qid` can be a special column of input `X` instead of a separated parameter, see :py:meth:`fit` for more info.""", ) -class XGBRanker(XGBModel, XGBRankerMixIn): +class XGBRanker(XGBRankerMixIn, XGBModel): # pylint: disable=missing-docstring,too-many-arguments,invalid-name @_deprecate_positional_args def __init__(self, *, objective: str = "rank:ndcg", **kwargs: Any): diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index 591144cd1c27..5caecc7fbd71 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -2,8 +2,8 @@ import base64 -# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name -# pylint: disable=too-few-public-methods, too-many-lines, too-many-branches +# pylint: disable=fixme, protected-access, no-member, invalid-name +# pylint: disable=too-many-lines, too-many-branches import json import logging import os diff --git a/python-package/xgboost/spark/estimator.py b/python-package/xgboost/spark/estimator.py index 51e2e946f8a5..522b74b7b83b 100644 --- a/python-package/xgboost/spark/estimator.py +++ b/python-package/xgboost/spark/estimator.py @@ -1,7 +1,6 @@ """Xgboost pyspark integration submodule for estimator API.""" -# pylint: disable=too-many-ancestors -# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name +# pylint: disable=fixme, protected-access, no-member, invalid-name # pylint: disable=unused-argument, too-many-locals import warnings diff --git a/python-package/xgboost/spark/params.py b/python-package/xgboost/spark/params.py index a177c73fe413..f173d3301286 100644 --- a/python-package/xgboost/spark/params.py +++ b/python-package/xgboost/spark/params.py @@ -2,7 +2,6 @@ from typing import Dict -# pylint: disable=too-few-public-methods from pyspark.ml.param import TypeConverters from pyspark.ml.param.shared import Param, Params diff --git a/python-package/xgboost/spark/utils.py b/python-package/xgboost/spark/utils.py index 0a421031ecd4..177df99c74ce 100644 --- a/python-package/xgboost/spark/utils.py +++ b/python-package/xgboost/spark/utils.py @@ -43,7 +43,7 @@ def _get_default_params_from_func( return filtered_params_dict -class CommunicatorContext(CCtx): # pylint: disable=too-few-public-methods +class CommunicatorContext(CCtx): """Context with PySpark specific task ID.""" def __init__(self, context: BarrierTaskContext, **args: Any) -> None: diff --git a/python-package/xgboost/testing/data.py b/python-package/xgboost/testing/data.py index 4071219c44ef..91b9ca6d330a 100644 --- a/python-package/xgboost/testing/data.py +++ b/python-package/xgboost/testing/data.py @@ -564,7 +564,7 @@ def is_binary(self) -> bool: return self.max_rel == 1 -class PBM: # pylint: disable=too-few-public-methods +class PBM: """Simulate click data with position bias model. There are other models available in `ULTRA `_ like the cascading model. diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index a0462d738754..fcbde49f2e19 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -22,9 +22,9 @@ function set_buildkite_env_vars_in_container { set -x -CUDA_VERSION=11.8.0 -NCCL_VERSION=2.16.5-1 -RAPIDS_VERSION=24.06 +CUDA_VERSION=12.8.0 +NCCL_VERSION=2.25.1-1 +RAPIDS_VERSION=24.12 DEV_RAPIDS_VERSION=24.06 SPARK_VERSION=3.5.1 JDK_VERSION=8 diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml index ee9637b8bd25..7721a3d5a30a 100644 --- a/tests/buildkite/pipeline.yml +++ b/tests/buildkite/pipeline.yml @@ -21,11 +21,6 @@ steps: queue: linux-amd64-cpu - wait #### -------- BUILD -------- - - label: ":console: Run clang-tidy" - command: "tests/buildkite/run-clang-tidy.sh" - key: run-clang-tidy - agents: - queue: linux-amd64-cpu - label: ":console: Build CPU" command: "tests/buildkite/build-cpu.sh" key: build-cpu @@ -41,11 +36,6 @@ steps: key: build-cuda agents: queue: linux-amd64-cpu - - label: ":console: Build CUDA with RMM" - command: "tests/buildkite/build-cuda-with-rmm.sh" - key: build-cuda-with-rmm - agents: - queue: linux-amd64-cpu - label: ":console: Build R package with CUDA" command: "tests/buildkite/build-gpu-rpkg.sh" key: build-gpu-rpkg diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh index d7197db2efce..e5e42db105cf 100755 --- a/tests/buildkite/test-cpp-gpu.sh +++ b/tests/buildkite/test-cpp-gpu.sh @@ -4,6 +4,9 @@ set -euo pipefail source tests/buildkite/conftest.sh +# Work around https://github.com/dmlc/xgboost/issues/11154 +export CI_DOCKER_EXTRA_PARAMS_INIT='-e NCCL_RAS_ENABLE=0' + echo "--- Run Google Tests with CUDA, using a GPU" buildkite-agent artifact download "build/testxgboost" . --step build-cuda chmod +x build/testxgboost @@ -12,13 +15,3 @@ tests/ci_build/ci_build.sh gpu --use-gpus \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \ build/testxgboost - -echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled" -rm -rfv build/ -buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm -chmod +x build/testxgboost -tests/ci_build/ci_build.sh gpu --use-gpus \ - --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ - --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ - --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \ - build/testxgboost --use-rmm-pool diff --git a/tests/buildkite/test-cpp-mgpu.sh b/tests/buildkite/test-cpp-mgpu.sh index 65614b191d04..2f9e037a8394 100755 --- a/tests/buildkite/test-cpp-mgpu.sh +++ b/tests/buildkite/test-cpp-mgpu.sh @@ -4,8 +4,9 @@ set -euo pipefail source tests/buildkite/conftest.sh +# Work around https://github.com/dmlc/xgboost/issues/11154 # Allocate extra space in /dev/shm to enable NCCL -export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g' +export CI_DOCKER_EXTRA_PARAMS_INIT='-e NCCL_RAS_ENABLE=0 --shm-size=4g' echo "--- Run Google Tests with CUDA, using multiple GPUs" buildkite-agent artifact download "build/testxgboost" . --step build-cuda diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index eab55672ec32..8cfac11bb20a 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -6,7 +6,7 @@ ARG NCCL_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive -SHELL ["/bin/bash", "-c"] # Use Bash as shell +SHELL ["/bin/bash", "-c"] # Install all basic requirements RUN \ @@ -23,11 +23,12 @@ ENV PATH=/opt/miniforge/bin:$PATH RUN \ export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \ - python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ + python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cuda-version=$CUDA_VERSION_ARG \ "nccl>=${NCCL_SHORT_VER}" \ dask \ dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ - numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz "hypothesis<=6.112" \ + numpy pytest pytest-timeout scipy scikit-learn \ + pandas matplotlib wheel python-kubernetes urllib3 graphviz "hypothesis<=6.112" \ "pyspark>=3.4.0" cloudpickle cuda-python && \ mamba clean --all && \ conda run --no-capture-output -n gpu_test pip install buildkite-test-collector diff --git a/tests/ci_build/conda_env/linux_sycl_test.yml b/tests/ci_build/conda_env/linux_sycl_test.yml index e22240a2685c..69df013675a1 100644 --- a/tests/ci_build/conda_env/linux_sycl_test.yml +++ b/tests/ci_build/conda_env/linux_sycl_test.yml @@ -19,3 +19,4 @@ dependencies: - pytest-cov - dpcpp_linux-64 - onedpl-devel +- intel-openmp diff --git a/tests/ci_build/conda_env/win64_test.yml b/tests/ci_build/conda_env/win64_test.yml index f131c6585026..f40e65a633c7 100644 --- a/tests/ci_build/conda_env/win64_test.yml +++ b/tests/ci_build/conda_env/win64_test.yml @@ -2,7 +2,7 @@ name: win64_env channels: - conda-forge dependencies: -- python=3.8 +- python=3.10 - numpy - scipy - matplotlib diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh index dd43f43a9ce6..620ba6047c74 100755 --- a/tests/ci_build/test_python.sh +++ b/tests/ci_build/test_python.sh @@ -67,6 +67,7 @@ case "$suite" in set -x install_xgboost setup_pyspark_envs + export NCCL_RAS_ENABLE=0 pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index 2748e13098b6..e36ffae9dfe9 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -70,6 +70,8 @@ target_include_directories(testxgboost ${xgboost_SOURCE_DIR}/rabit/include) target_link_libraries(testxgboost PRIVATE + $ + $ GTest::gtest GTest::gmock) set_output_directory(testxgboost ${xgboost_BINARY_DIR}) diff --git a/tests/cpp/collective/test_worker.h b/tests/cpp/collective/test_worker.h index 4fd982b06f35..3dee34268c37 100644 --- a/tests/cpp/collective/test_worker.h +++ b/tests/cpp/collective/test_worker.h @@ -150,13 +150,13 @@ template void TestDistributedGlobal(std::int32_t n_workers, WorkerFn worker_fn, bool need_finalize = true, std::chrono::seconds test_timeout = std::chrono::seconds{30}) { system::SocketStartup(); - std::chrono::seconds timeout{1}; + std::chrono::seconds poll_timeout{5}; std::string host; auto rc = GetHostAddress(&host); SafeColl(rc); - RabitTracker tracker{MakeTrackerConfig(host, n_workers, timeout)}; + RabitTracker tracker{MakeTrackerConfig(host, n_workers, poll_timeout)}; auto fut = tracker.Run(); std::vector workers; @@ -165,7 +165,7 @@ void TestDistributedGlobal(std::int32_t n_workers, WorkerFn worker_fn, bool need for (std::int32_t i = 0; i < n_workers; ++i) { workers.emplace_back([=] { auto fut = std::async(std::launch::async, [=] { - auto config = MakeDistributedTestConfig(host, port, timeout, i); + auto config = MakeDistributedTestConfig(host, port, poll_timeout, i); Init(config); worker_fn(); if (need_finalize) { diff --git a/tests/cpp/common/test_quantile.cc b/tests/cpp/common/test_quantile.cc index fef7db9dc445..1ef6572599fc 100644 --- a/tests/cpp/common/test_quantile.cc +++ b/tests/cpp/common/test_quantile.cc @@ -283,22 +283,22 @@ void TestColSplitQuantile(size_t rows, size_t cols) { } } // anonymous namespace -TEST(Quantile, ColSplitBasic) { +TEST(Quantile, ColumnSplitBasic) { constexpr size_t kRows = 10, kCols = 10; TestColSplitQuantile(kRows, kCols); } -TEST(Quantile, ColSplit) { +TEST(Quantile, ColumnSplit) { constexpr size_t kRows = 4000, kCols = 200; TestColSplitQuantile(kRows, kCols); } -TEST(Quantile, ColSplitSortedBasic) { +TEST(Quantile, ColumnSplitSortedBasic) { constexpr size_t kRows = 10, kCols = 10; TestColSplitQuantile(kRows, kCols); } -TEST(Quantile, ColSplitSorted) { +TEST(Quantile, ColumnSplitSorted) { constexpr size_t kRows = 4000, kCols = 200; TestColSplitQuantile(kRows, kCols); } diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc index 0e52c9e23b2b..4db2aa5da8d1 100644 --- a/tests/cpp/tree/test_approx.cc +++ b/tests/cpp/tree/test_approx.cc @@ -123,7 +123,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared } } // anonymous namespace -TEST(Approx, PartitionerColSplit) { +TEST(Approx, PartitionerColumnSplit) { size_t n_samples = 1024, n_features = 16, base_rowid = 0; auto const Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true); auto hess = GenerateHess(n_samples); @@ -153,7 +153,7 @@ TEST(Approx, PartitionerColSplit) { } namespace { -class TestApproxColSplit : public ::testing::TestWithParam> { +class TestApproxColumnSplit : public ::testing::TestWithParam> { public: void Run() { auto [categorical, sparsity] = GetParam(); @@ -162,9 +162,9 @@ class TestApproxColSplit : public ::testing::TestWithParamRun(); } +TEST_P(TestApproxColumnSplit, Basic) { this->Run(); } -INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColSplit, ::testing::ValuesIn([]() { +INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColumnSplit, ::testing::ValuesIn([]() { std::vector> params; for (auto categorical : {true, false}) { for (auto sparsity : {0.0f, 0.6f}) { diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 8e524dc7e979..2d372b368cab 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -198,12 +198,12 @@ void TestColumnSplitPartitioner(bst_target_t n_targets) { } } // anonymous namespace -TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner(1); } +TEST(QuantileHist, PartitionerColumnSplit) { TestColumnSplitPartitioner(1); } -TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner(3); } +TEST(QuantileHist, MultiPartitionerColumnSplit) { TestColumnSplitPartitioner(3); } namespace { -class TestHistColSplit : public ::testing::TestWithParam> { +class TestHistColumnSplit : public ::testing::TestWithParam> { public: void Run() { auto [n_targets, categorical, sparsity] = GetParam(); @@ -212,9 +212,9 @@ class TestHistColSplit : public ::testing::TestWithParamRun(); } +TEST_P(TestHistColumnSplit, Basic) { this->Run(); } -INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColSplit, ::testing::ValuesIn([]() { +INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColumnSplit, ::testing::ValuesIn([]() { std::vector> params; for (auto categorical : {true, false}) { for (auto sparsity : {0.0f, 0.6f}) { diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index a1bc13cb8ec8..1a2128b8612a 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -453,7 +453,7 @@ def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None: ) @settings(deadline=None, max_examples=20, print_blob=True) def test_predict_categorical_split(self, df): - from sklearn.metrics import mean_squared_error + from sklearn.metrics import root_mean_squared_error df = df.astype("category") x0, x1 = df["x0"].to_numpy(), df["x1"].to_numpy() @@ -480,7 +480,7 @@ def test_predict_categorical_split(self, df): ) bst.set_param({"device": "cuda:0"}) pred = bst.predict(dtrain) - rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False) + rmse = root_mean_squared_error(y_true=y, y_pred=pred) np.testing.assert_almost_equal( rmse, eval_history["train"]["rmse"][-1], decimal=5 ) @@ -576,10 +576,9 @@ def test_dtypes(self): # boolean orig = cp.random.binomial(1, 0.5, size=rows * cols).reshape(rows, cols) predt_orig = booster.inplace_predict(orig) - for dtype in [cp.bool8, cp.bool_]: - X = cp.array(orig, dtype=dtype) - predt = booster.inplace_predict(X) - cp.testing.assert_allclose(predt, predt_orig) + X = cp.array(orig, dtype=cp.bool_) + predt = booster.inplace_predict(X) + cp.testing.assert_allclose(predt, predt_orig) # unsupported types for dtype in [ diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 61f33832ab48..6cb4a58a0de7 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -2,6 +2,7 @@ import os import pickle import random +import re import tempfile import warnings from typing import Callable, Optional @@ -795,6 +796,32 @@ def get_tm(clf: xgb.XGBClassifier) -> str: assert clf.get_params()["tree_method"] is None +def test_get_params_works_as_expected(): + # XGBModel -> BaseEstimator + params = xgb.XGBModel(max_depth=2).get_params() + assert params["max_depth"] == 2 + # 'objective' defaults to None in the signature of XGBModel + assert params["objective"] is None + + # XGBRegressor -> XGBModel -> BaseEstimator + params = xgb.XGBRegressor(max_depth=3).get_params() + assert params["max_depth"] == 3 + # 'objective' defaults to 'reg:squarederror' in the signature of XGBRegressor + assert params["objective"] == "reg:squarederror" + # 'colsample_bynode' defaults to 'None' for XGBModel (which XGBRegressor inherits from), so it + # should be in get_params() output + assert params["colsample_bynode"] is None + + # XGBRFRegressor -> XGBRegressor -> XGBModel -> BaseEstimator + params = xgb.XGBRFRegressor(max_depth=4, objective="reg:tweedie").get_params() + assert params["max_depth"] == 4 + # 'objective' is a keyword argument for XGBRegressor, so it should be in get_params() output + # ... but values passed through kwargs should override the default from the signature of XGBRegressor + assert params["objective"] == "reg:tweedie" + # 'colsample_bynode' defaults to 0.8 for XGBRFRegressor...that should be preferred to the None from XGBRegressor + assert params["colsample_bynode"] == 0.8 + + def test_kwargs_error(): params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1} with pytest.raises(TypeError): @@ -1284,7 +1311,7 @@ def test_data_initialization() -> None: validate_data_initialization(xgb.QuantileDMatrix, xgb.XGBClassifier, X, y) -@parametrize_with_checks([xgb.XGBRegressor()]) +@parametrize_with_checks([xgb.XGBRegressor(enable_categorical=True)]) def test_estimator_reg(estimator, check): if os.environ["PYTEST_CURRENT_TEST"].find("check_supervised_y_no_nan") != -1: # The test uses float64 and requires the error message to contain: @@ -1477,10 +1504,62 @@ def test_tags() -> None: assert tags["multioutput"] is True assert tags["multioutput_only"] is False - for clf in [xgb.XGBClassifier()]: + for clf in [xgb.XGBClassifier(), xgb.XGBRFClassifier()]: tags = clf._more_tags() assert "multioutput" not in tags assert tags["multilabel"] is True tags = xgb.XGBRanker()._more_tags() assert "multioutput" not in tags + + +# the try-excepts in this test should be removed once xgboost's +# minimum supported scikit-learn version is at least 1.6 +def test_sklearn_tags(): + + def _assert_has_xgbmodel_tags(tags): + # values set by XGBModel.__sklearn_tags__() + assert tags.non_deterministic is False + assert tags.no_validation is True + assert tags.input_tags.allow_nan is True + + for reg in [xgb.XGBRegressor(), xgb.XGBRFRegressor()]: + try: + # if no AttributeError was thrown, we must be using scikit-learn>=1.6, + # and so the actual effects of __sklearn_tags__() should be tested + tags = reg.__sklearn_tags__() + _assert_has_xgbmodel_tags(tags) + # regressor-specific values + assert tags.estimator_type == "regressor" + assert tags.regressor_tags is not None + assert tags.classifier_tags is None + assert tags.target_tags.multi_output is True + assert tags.target_tags.single_output is True + except AttributeError as err: + # only the exact error we expected to be raised should be raised + assert bool(re.search(r"__sklearn_tags__.* should not be called", str(err))) + + for clf in [xgb.XGBClassifier(), xgb.XGBRFClassifier()]: + try: + # if no AttributeError was thrown, we must be using scikit-learn>=1.6, + # and so the actual effects of __sklearn_tags__() should be tested + tags = clf.__sklearn_tags__() + _assert_has_xgbmodel_tags(tags) + # classifier-specific values + assert tags.estimator_type == "classifier" + assert tags.regressor_tags is None + assert tags.classifier_tags is not None + assert tags.classifier_tags.multi_label is True + except AttributeError as err: + # only the exact error we expected to be raised should be raised + assert bool(re.search(r"__sklearn_tags__.* should not be called", str(err))) + + for rnk in [xgb.XGBRanker(),]: + try: + # if no AttributeError was thrown, we must be using scikit-learn>=1.6, + # and so the actual effects of __sklearn_tags__() should be tested + tags = rnk.__sklearn_tags__() + _assert_has_xgbmodel_tags(tags) + except AttributeError as err: + # only the exact error we expected to be raised should be raised + assert bool(re.search(r"__sklearn_tags__.* should not be called", str(err))) diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py index 5b6400424591..53629b00d5b8 100644 --- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py +++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py @@ -10,6 +10,7 @@ import pytest from hypothesis import given, note, settings, strategies from hypothesis._settings import duration +from packaging.version import parse as parse_version import xgboost as xgb from xgboost import testing as tm @@ -19,7 +20,7 @@ pytestmark = [ pytest.mark.skipif(**tm.no_dask()), pytest.mark.skipif(**tm.no_dask_cuda()), - tm.timeout(60), + tm.timeout(120), ] from ..test_with_dask.test_with_dask import generate_array @@ -41,6 +42,7 @@ try: import cudf import dask.dataframe as dd + from dask import __version__ as dask_version from dask import array as da from dask.distributed import Client from dask_cuda import LocalCUDACluster @@ -48,7 +50,12 @@ from xgboost import dask as dxgb from xgboost.testing.dask import check_init_estimation, check_uneven_nan except ImportError: - pass + dask_version = None + + +dask_version_ge110 = dask_version and parse_version(dask_version) >= parse_version( + "2024.11.0" +) def run_with_dask_dataframe(DMatrixT: Type, client: Client) -> None: @@ -99,6 +106,8 @@ def is_df(part: T) -> T: cp.testing.assert_allclose(predt.values.compute(), single_node) + # Work around https://github.com/dmlc/xgboost/issues/10752 + X.columns = X.columns.astype("object") # Make sure the output can be integrated back to original dataframe X["predict"] = predictions X["inplace_predict"] = series_predictions @@ -376,6 +385,9 @@ def test_early_stopping(self, local_cuda_client: Client) -> None: dump = booster.get_dump(dump_format="json") assert len(dump) - booster.best_iteration == early_stopping_rounds + 1 + @pytest.mark.xfail( + dask_version_ge110, reason="Test cannot pass with Dask 2024.11.0+" + ) @pytest.mark.skipif(**tm.no_cudf()) @pytest.mark.parametrize("model", ["boosting"]) def test_dask_classifier(self, model: str, local_cuda_client: Client) -> None: