onnx · xadupre · Dec 16, 2023 · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023
diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml
@@ -15,13 +15,23 @@ jobs:
   strategy:
     matrix:
 
+      Python311-1150-RT1163-xgb2-lgbm40:
+        python.version: '3.11'
+        ONNX_PATH: 'onnx==1.15.0'
+        ONNXRT_PATH: 'onnxruntime==1.16.3'
+        COREML_PATH: NONE
+        lightgbm.version: '>=4.0'
+        xgboost.version: '>=2'
+        numpy.version: ''
+        scipy.version: ''
+
       Python311-1150-RT1160-xgb175-lgbm40:
         python.version: '3.11'
         ONNX_PATH: 'onnx==1.15.0'
         ONNXRT_PATH: 'onnxruntime==1.16.2'
         COREML_PATH: NONE
         lightgbm.version: '>=4.0'
-        xgboost.version: '>=1.7.5'
+        xgboost.version: '==1.7.5'
         numpy.version: ''
         scipy.version: ''
 
@@ -31,7 +41,7 @@ jobs:
         ONNXRT_PATH: 'onnxruntime==1.16.2'
         COREML_PATH: NONE
         lightgbm.version: '>=4.0'
-        xgboost.version: '>=1.7.5'
+        xgboost.version: '==1.7.5'
         numpy.version: ''
         scipy.version: ''
 
@@ -41,7 +51,7 @@ jobs:
         ONNXRT_PATH: 'onnxruntime==1.15.1'
         COREML_PATH: NONE
         lightgbm.version: '<4.0'
-        xgboost.version: '>=1.7.5'
+        xgboost.version: '==1.7.5'
         numpy.version: ''
         scipy.version: ''
 
@@ -51,7 +61,7 @@ jobs:
         ONNXRT_PATH: 'onnxruntime==1.14.0'
         COREML_PATH: NONE
         lightgbm.version: '<4.0'
-        xgboost.version: '>=1.7.5'
+        xgboost.version: '==1.7.5'
         numpy.version: ''
         scipy.version: ''
 
@@ -61,7 +71,7 @@ jobs:
         ONNXRT_PATH: 'onnxruntime==1.15.1'
         COREML_PATH: NONE
         lightgbm.version: '>=4.0'
-        xgboost.version: '>=1.7.5'
+        xgboost.version: '==1.7.5'
         numpy.version: ''
         scipy.version: '==1.8.0'
 

diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml
@@ -21,34 +21,39 @@ jobs:
         ONNXRT_PATH: 'onnxruntime==1.16.2'
         COREML_PATH: NONE
         numpy.version: ''
+        xgboost.version: '2.0.2'
 
       Python311-1141-RT1162:
         python.version: '3.11'
         ONNX_PATH: 'onnx==1.14.1'
         ONNXRT_PATH: 'onnxruntime==1.16.2'
         COREML_PATH: NONE
         numpy.version: ''
+        xgboost.version: '1.7.5'
 
       Python310-1141-RT1151:
         python.version: '3.10'
         ONNX_PATH: 'onnx==1.14.1'
         ONNXRT_PATH: 'onnxruntime==1.15.1'
         COREML_PATH: NONE
         numpy.version: ''
+        xgboost.version: '1.7.5'
 
       Python310-1141-RT1140:
         python.version: '3.10'
         ONNX_PATH: 'onnx==1.14.1'
         ONNXRT_PATH: onnxruntime==1.14.0
         COREML_PATH: NONE
         numpy.version: ''
+        xgboost.version: '1.7.5'
 
       Python39-1141-RT1140:
         python.version: '3.9'
         ONNX_PATH: 'onnx==1.14.1'
         ONNXRT_PATH: onnxruntime==1.14.0
         COREML_PATH: NONE
         numpy.version: ''
+        xgboost.version: '1.7.5'
 
     maxParallel: 3
 
@@ -74,6 +79,8 @@ jobs:
   - script: |
       call activate py$(python.version)
       python -m pip install --upgrade scikit-learn
+      python -m pip install --upgrade lightgbm
+      python -m pip install "xgboost=$(xgboost.version)"
     displayName: 'Install scikit-learn'
 
   - script: |

diff --git a/CHANGELOGS.md b/CHANGELOGS.md
@@ -2,6 +2,8 @@
 
 ## 1.12.0
 
+* Fix discrepancies with XGBRegressor and xgboost > 2
+  [#670](https://github.com/onnx/onnxmltools/pull/670)
 * Support count:poisson for XGBRegressor
   [#666](https://github.com/onnx/onnxmltools/pull/666)
 * Supports XGBRFClassifier and XGBRFRegressor

diff --git a/onnxmltools/convert/xgboost/_parse.py b/onnxmltools/convert/xgboost/_parse.py
@@ -69,9 +69,20 @@ def _get_attributes(booster):
         except AttributeError:
             ntrees = trees // num_class if num_class > 0 else trees
     else:
-        trees = len(res)
-        ntrees = booster.best_ntree_limit
-        num_class = trees // ntrees
+        config = json.loads(booster.save_config())["learner"]["learner_model_param"]
+        if "num_class" in config:
+            num_class = int(config["num_class"])
+            ntrees = len(res)
+            num_class = 1
+        else:
+            trees = len(res)
+            if hasattr(booster, "best_ntree_limit"):
+                ntrees = booster.best_ntree_limit
+            elif hasattr(booster, "best_iteration"):
+                ntrees = booster.best_iteration
+            else:
+                raise RuntimeError("Unable to guess the number of classes.")
+            num_class = trees // ntrees
         if num_class == 0:
             raise RuntimeError(
                 "Unable to retrieve the number of classes, trees=%d, ntrees=%d."
@@ -137,7 +148,7 @@ def __init__(self, booster):
             self.operator_name = "XGBRegressor"
 
     def get_xgb_params(self):
-        return self.kwargs
+        return {k: v for k, v in self.kwargs.items() if v is not None}
 
     def get_booster(self):
         return self.booster_

diff --git a/onnxmltools/convert/xgboost/common.py b/onnxmltools/convert/xgboost/common.py
@@ -3,6 +3,7 @@
 """
 Common function to converters and shape calculators.
 """
+import json
 
 
 def get_xgb_params(xgb_node):
@@ -15,8 +16,30 @@ def get_xgb_params(xgb_node):
     else:
         # XGBoost < 0.7
         params = xgb_node.__dict__
-
+    if hasattr("xgb_node", "save_config"):
+        config = json.loads(xgb_node.save_config())
+    else:
+        config = json.loads(xgb_node.get_booster().save_config())
+    num_class = int(config["learner"]["learner_model_param"]["num_class"])
+    params = {k: v for k, v in params.items() if v is not None}
+    params["num_class"] = num_class
     if "n_estimators" not in params and hasattr(xgb_node, "n_estimators"):
         # xgboost >= 1.0.2
-        params["n_estimators"] = xgb_node.n_estimators
+        if xgb_node.n_estimators is not None:
+            params["n_estimators"] = xgb_node.n_estimators
+    if params.get("base_score", None) is None:
+        # xgboost >= 2.0
+        params["base_score"] = float(
+            config["learner"]["learner_model_param"]["base_score"]
+        )
     return params
+
+
+def get_n_estimators_classifier(xgb_node, params, js_trees):
+    if "n_estimators" not in params:
+        config = json.loads(xgb_node.get_booster().save_config())
+        num_class = int(config["learner"]["learner_model_param"]["num_class"])
+        if num_class == 0:
+            return len(js_trees)
+        return len(js_trees) // num_class
+    return params["n_estimators"]
diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py
@@ -10,7 +10,7 @@
 except ImportError:
     XGBRFClassifier = None
 from ...common._registration import register_converter
-from ..common import get_xgb_params
+from ..common import get_xgb_params, get_n_estimators_classifier
 
 
 class XGBConverter:
@@ -293,11 +293,18 @@ def convert(scope, operator, container):
         objective, base_score, js_trees = XGBConverter.common_members(xgb_node, inputs)
 
         params = XGBConverter.get_xgb_params(xgb_node)
+        n_estimators = get_n_estimators_classifier(xgb_node, params, js_trees)
+
         attr_pairs = XGBClassifierConverter._get_default_tree_attribute_pairs()
         XGBConverter.fill_tree_attributes(
             js_trees, attr_pairs, [1 for _ in js_trees], True
         )
-        ncl = (max(attr_pairs["class_treeids"]) + 1) // params["n_estimators"]
+        if "num_class" in params:
+            ncl = params["num_class"]
+            n_estimators = len(js_trees) // ncl
+        else:
+            ncl = (max(attr_pairs["class_treeids"]) + 1) // n_estimators
+            print("**", params)
 
         bst = xgb_node.get_booster()
         best_ntree_limit = getattr(bst, "best_ntree_limit", len(js_trees)) * ncl
@@ -310,6 +317,7 @@ def convert(scope, operator, container):
 
         if len(attr_pairs["class_treeids"]) == 0:
             raise RuntimeError("XGBoost model is empty.")
+
         if ncl <= 1:
             ncl = 2
             if objective != "binary:hinge":
@@ -330,8 +338,10 @@ def convert(scope, operator, container):
             attr_pairs["class_ids"] = [v % ncl for v in attr_pairs["class_treeids"]]
 
         classes = xgb_node.classes_
-        if np.issubdtype(classes.dtype, np.floating) or np.issubdtype(
-            classes.dtype, np.integer
+        if (
+            np.issubdtype(classes.dtype, np.floating)
+            or np.issubdtype(classes.dtype, np.integer)
+            or np.issubdtype(classes.dtype, np.bool_)
         ):
             attr_pairs["classlabels_int64s"] = classes.astype("int")
         else:
@@ -373,7 +383,7 @@ def convert(scope, operator, container):
                     "Where", [greater, one, zero], operator.output_full_names[1]
                 )
         elif objective in ("multi:softprob", "multi:softmax"):
-            ncl = len(js_trees) // params["n_estimators"]
+            ncl = len(js_trees) // n_estimators
             if objective == "multi:softmax":
                 attr_pairs["post_transform"] = "NONE"
             container.add_node(
@@ -385,7 +395,7 @@ def convert(scope, operator, container):
                 **attr_pairs,
             )
         elif objective == "reg:logistic":
-            ncl = len(js_trees) // params["n_estimators"]
+            ncl = len(js_trees) // n_estimators
             if ncl == 1:
                 ncl = 2
             container.add_node(

diff --git a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
+import json
 import numpy as np
 from ...common._registration import register_shape_calculator
 from ...common.utils import check_input_and_output_numbers, check_input_and_output_types
@@ -8,7 +9,7 @@
     Int64TensorType,
     StringTensorType,
 )
-from ..common import get_xgb_params
+from ..common import get_xgb_params, get_n_estimators_classifier
 
 
 def calculate_xgboost_classifier_output_shapes(operator):
@@ -22,13 +23,20 @@ def calculate_xgboost_classifier_output_shapes(operator):
     params = get_xgb_params(xgb_node)
     booster = xgb_node.get_booster()
     booster.attributes()
-    ntrees = len(booster.get_dump(with_stats=True, dump_format="json"))
+    js_trees = booster.get_dump(with_stats=True, dump_format="json")
+    ntrees = len(js_trees)
     objective = params["objective"]
+    n_estimators = get_n_estimators_classifier(xgb_node, params, js_trees)
+    config = json.loads(xgb_node.get_booster().save_config())
+    num_class = int(config["learner"]["learner_model_param"]["num_class"])
 
-    if objective == "binary:logistic":
+    if num_class is not None:
+        ncl = num_class
+        n_estimators = ntrees // ncl
+    elif objective == "binary:logistic":
         ncl = 2
     else:
-        ncl = ntrees // params["n_estimators"]
+        ncl = ntrees // n_estimators
         if objective == "reg:logistic" and ncl == 1:
             ncl = 2
     classes = xgb_node.classes_

diff --git a/onnxmltools/utils/utils_backend.py b/onnxmltools/utils/utils_backend.py
@@ -188,6 +188,8 @@ def compare_outputs(expected, output, **kwargs):
     Disc = kwargs.pop("Disc", False)
     Mism = kwargs.pop("Mism", False)
     Opp = kwargs.pop("Opp", False)
+    if hasattr(expected, "dtype") and expected.dtype == numpy.bool_:
+        expected = expected.astype(numpy.int64)
     if Opp and not NoProb:
         raise ValueError("Opp is only available if NoProb is True")
 

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -16,5 +16,5 @@ ruff
 scikit-learn>=1.2.0
 scipy
 wheel
-xgboost==1.7.5
+xgboost
 onnxruntime
diff --git a/tests/h2o/test_h2o_converters.py b/tests/h2o/test_h2o_converters.py
@@ -223,7 +223,6 @@ def test_h2o_classifier_multi_cat(self):
         train, test = _prepare_one_hot("airlines.csv", y)
         gbm = H2OGradientBoostingEstimator(ntrees=8, max_depth=5)
         mojo_path = _make_mojo(gbm, train, y=train.columns.index(y))
-        print("****", mojo_path)
         onnx_model = _convert_mojo(mojo_path)
         self.assertIsNot(onnx_model, None)
         dump_data_and_model(

diff --git a/tests/xgboost/test_xgboost_converters.py b/tests/xgboost/test_xgboost_converters.py
@@ -677,5 +677,5 @@ def test_xgb_classifier_hinge(self):
 
 
 if __name__ == "__main__":
-    TestXGBoostModels().test_xgb_regressor_poisson()
+    TestXGBoostModels().test_xgb_best_tree_limit()
     unittest.main(verbosity=2)