Skip to content

Commit

Permalink
Cover approx tree method for categorical data tests. (#7569)
Browse files Browse the repository at this point in the history
* Add tree to df tests.
* Add plotting tests.
* Add histogram tests.
  • Loading branch information
trivialfis committed Jan 16, 2022
1 parent 465dc63 commit d6ea5cc
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 45 deletions.
21 changes: 5 additions & 16 deletions tests/python-gpu/test_gpu_parse_tree.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,14 @@
import sys
import pytest
import xgboost as xgb

sys.path.append("tests/python")
import testing as tm
from test_parse_tree import TestTreesToDataFrame


def test_tree_to_df_categorical():
X, y = tm.make_categorical(100, 10, 31, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
df = booster.trees_to_dataframe()
for _, x in df.iterrows():
if x["Feature"] != "Leaf":
assert len(x["Category"]) == 1
cputest = TestTreesToDataFrame()
cputest.run_tree_to_df_categorical("gpu_hist")


def test_split_value_histograms():
X, y = tm.make_categorical(1000, 10, 13, False)
reg = xgb.XGBRegressor(tree_method="gpu_hist", enable_categorical=True)
reg.fit(X, y)

with pytest.raises(ValueError, match="doesn't"):
reg.get_booster().get_split_value_histogram("3", bins=5)
cputest = TestTreesToDataFrame()
cputest.run_split_value_histograms("gpu_hist")
31 changes: 4 additions & 27 deletions tests/python-gpu/test_gpu_plotting.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,17 @@
import sys
import xgboost as xgb
import pytest
import json

sys.path.append("tests/python")
import testing as tm

try:
import matplotlib

matplotlib.use("Agg")
from matplotlib.axes import Axes
from graphviz import Source
except ImportError:
pass
import test_plotting as tp


pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))


class TestPlotting:
cputest = tp.TestPlotting()

@pytest.mark.skipif(**tm.no_pandas())
def test_categorical(self):
X, y = tm.make_categorical(1000, 31, 19, onehot=False)
reg = xgb.XGBRegressor(
enable_categorical=True, n_estimators=10, tree_method="gpu_hist"
)
reg.fit(X, y)
trees = reg.get_booster().get_dump(dump_format="json")
for tree in trees:
j_tree = json.loads(tree)
assert "leaf" in j_tree.keys() or isinstance(
j_tree["split_condition"], list
)

graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
assert isinstance(graph, Source)
ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
assert isinstance(ax, Axes)
self.cputest.run_categorical("gpu_hist")
24 changes: 23 additions & 1 deletion tests/python/test_parse_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@


class TestTreesToDataFrame:

def build_model(self, max_depth, num_round):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
param = {'max_depth': max_depth, 'objective': 'binary:logistic',
Expand Down Expand Up @@ -48,3 +47,26 @@ def test_trees_to_dataframe(self):
# test for equality of covers
cover_from_df = df.Cover.sum()
assert np.allclose(cover_from_dump, cover_from_df)

def run_tree_to_df_categorical(self, tree_method: str) -> None:
X, y = tm.make_categorical(100, 10, 31, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
df = booster.trees_to_dataframe()
for _, x in df.iterrows():
if x["Feature"] != "Leaf":
assert len(x["Category"]) >= 1

def test_tree_to_df_categorical(self) -> None:
self.run_tree_to_df_categorical("approx")

def run_split_value_histograms(self, tree_method) -> None:
X, y = tm.make_categorical(1000, 10, 13, False)
reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
reg.fit(X, y)

with pytest.raises(ValueError, match="doesn't"):
reg.get_booster().get_split_value_histogram("3", bins=5)

def test_split_value_histograms(self):
self.run_split_value_histograms("approx")
24 changes: 23 additions & 1 deletion tests/python/test_plotting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
import json
import numpy as np
import xgboost as xgb
import testing as tm
Expand Down Expand Up @@ -73,3 +73,25 @@ def test_importance_plot_lim(self):
ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
assert ax.get_xlim() == (0., 5.)
assert ax.get_ylim() == (10., 71.)

def run_categorical(self, tree_method: str) -> None:
X, y = tm.make_categorical(1000, 31, 19, onehot=False)
reg = xgb.XGBRegressor(
enable_categorical=True, n_estimators=10, tree_method=tree_method
)
reg.fit(X, y)
trees = reg.get_booster().get_dump(dump_format="json")
for tree in trees:
j_tree = json.loads(tree)
assert "leaf" in j_tree.keys() or isinstance(
j_tree["split_condition"], list
)

graph = xgb.to_graphviz(reg, num_trees=len(j_tree) - 1)
assert isinstance(graph, Source)
ax = xgb.plot_tree(reg, num_trees=len(j_tree) - 1)
assert isinstance(ax, Axes)

@pytest.mark.skipif(**tm.no_pandas())
def test_categorical(self) -> None:
self.run_categorical("approx")

0 comments on commit d6ea5cc

Please sign in to comment.