From d6ea5cc1ed1c7c24f593ffb142ceaabd04e7bef3 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 16 Jan 2022 11:31:40 +0800 Subject: [PATCH] Cover approx tree method for categorical data tests. (#7569) * Add tree to df tests. * Add plotting tests. * Add histogram tests. --- tests/python-gpu/test_gpu_parse_tree.py | 21 ++++------------- tests/python-gpu/test_gpu_plotting.py | 31 ++++--------------------- tests/python/test_parse_tree.py | 24 ++++++++++++++++++- tests/python/test_plotting.py | 24 ++++++++++++++++++- 4 files changed, 55 insertions(+), 45 deletions(-) diff --git a/tests/python-gpu/test_gpu_parse_tree.py b/tests/python-gpu/test_gpu_parse_tree.py index 8033fb9852d1..1c55acc8f33a 100644 --- a/tests/python-gpu/test_gpu_parse_tree.py +++ b/tests/python-gpu/test_gpu_parse_tree.py @@ -1,25 +1,14 @@ import sys -import pytest -import xgboost as xgb sys.path.append("tests/python") -import testing as tm +from test_parse_tree import TestTreesToDataFrame def test_tree_to_df_categorical(): - X, y = tm.make_categorical(100, 10, 31, False) - Xy = xgb.DMatrix(X, y, enable_categorical=True) - booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10) - df = booster.trees_to_dataframe() - for _, x in df.iterrows(): - if x["Feature"] != "Leaf": - assert len(x["Category"]) == 1 + cputest = TestTreesToDataFrame() + cputest.run_tree_to_df_categorical("gpu_hist") def test_split_value_histograms(): - X, y = tm.make_categorical(1000, 10, 13, False) - reg = xgb.XGBRegressor(tree_method="gpu_hist", enable_categorical=True) - reg.fit(X, y) - - with pytest.raises(ValueError, match="doesn't"): - reg.get_booster().get_split_value_histogram("3", bins=5) + cputest = TestTreesToDataFrame() + cputest.run_split_value_histograms("gpu_hist") diff --git a/tests/python-gpu/test_gpu_plotting.py b/tests/python-gpu/test_gpu_plotting.py index 4bfda2dbd388..f12f895a0a24 100644 --- a/tests/python-gpu/test_gpu_plotting.py +++ b/tests/python-gpu/test_gpu_plotting.py @@ -1,40 +1,17 @@ import sys -import xgboost as xgb import pytest -import json sys.path.append("tests/python") import testing as tm - -try: - import matplotlib - - matplotlib.use("Agg") - from matplotlib.axes import Axes - from graphviz import Source -except ImportError: - pass +import test_plotting as tp pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz())) class TestPlotting: + cputest = tp.TestPlotting() + @pytest.mark.skipif(**tm.no_pandas()) def test_categorical(self): - X, y = tm.make_categorical(1000, 31, 19, onehot=False) - reg = xgb.XGBRegressor( - enable_categorical=True, n_estimators=10, tree_method="gpu_hist" - ) - reg.fit(X, y) - trees = reg.get_booster().get_dump(dump_format="json") - for tree in trees: - j_tree = json.loads(tree) - assert "leaf" in j_tree.keys() or isinstance( - j_tree["split_condition"], list - ) - - graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1) - assert isinstance(graph, Source) - ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1) - assert isinstance(ax, Axes) + self.cputest.run_categorical("gpu_hist") diff --git a/tests/python/test_parse_tree.py b/tests/python/test_parse_tree.py index 90180cf6a094..4957b93bfe04 100644 --- a/tests/python/test_parse_tree.py +++ b/tests/python/test_parse_tree.py @@ -12,7 +12,6 @@ class TestTreesToDataFrame: - def build_model(self, max_depth, num_round): dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') param = {'max_depth': max_depth, 'objective': 'binary:logistic', @@ -48,3 +47,26 @@ def test_trees_to_dataframe(self): # test for equality of covers cover_from_df = df.Cover.sum() assert np.allclose(cover_from_dump, cover_from_df) + + def run_tree_to_df_categorical(self, tree_method: str) -> None: + X, y = tm.make_categorical(100, 10, 31, False) + Xy = xgb.DMatrix(X, y, enable_categorical=True) + booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10) + df = booster.trees_to_dataframe() + for _, x in df.iterrows(): + if x["Feature"] != "Leaf": + assert len(x["Category"]) >= 1 + + def test_tree_to_df_categorical(self) -> None: + self.run_tree_to_df_categorical("approx") + + def run_split_value_histograms(self, tree_method) -> None: + X, y = tm.make_categorical(1000, 10, 13, False) + reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True) + reg.fit(X, y) + + with pytest.raises(ValueError, match="doesn't"): + reg.get_booster().get_split_value_histogram("3", bins=5) + + def test_split_value_histograms(self): + self.run_split_value_histograms("approx") diff --git a/tests/python/test_plotting.py b/tests/python/test_plotting.py index c536bc0c7ffa..0167fb62df13 100644 --- a/tests/python/test_plotting.py +++ b/tests/python/test_plotting.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +import json import numpy as np import xgboost as xgb import testing as tm @@ -73,3 +73,25 @@ def test_importance_plot_lim(self): ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71)) assert ax.get_xlim() == (0., 5.) assert ax.get_ylim() == (10., 71.) + + def run_categorical(self, tree_method: str) -> None: + X, y = tm.make_categorical(1000, 31, 19, onehot=False) + reg = xgb.XGBRegressor( + enable_categorical=True, n_estimators=10, tree_method=tree_method + ) + reg.fit(X, y) + trees = reg.get_booster().get_dump(dump_format="json") + for tree in trees: + j_tree = json.loads(tree) + assert "leaf" in j_tree.keys() or isinstance( + j_tree["split_condition"], list + ) + + graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1) + assert isinstance(graph, Source) + ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1) + assert isinstance(ax, Axes) + + @pytest.mark.skipif(**tm.no_pandas()) + def test_categorical(self) -> None: + self.run_categorical("approx")