From 31bcb459d44dafd3cb0e825bca67e5560db2e9e2 Mon Sep 17 00:00:00 2001 From: btrotta Date: Sat, 29 May 2021 14:21:58 +1000 Subject: [PATCH 1/6] Add linear leaf models to json output --- include/LightGBM/tree.h | 3 ++ src/io/tree.cpp | 38 +++++++++++++++++++++++-- tests/python_package_test/test_basic.py | 15 ++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h index 4f5ede83102b..4f072a6406e3 100644 --- a/include/LightGBM/tree.h +++ b/include/LightGBM/tree.h @@ -243,6 +243,9 @@ class Tree { /*! \brief Serialize this object to json*/ std::string ToJSON() const; + /*! \brief Serialize linear model of tree node to json*/ + std::string LinearModelToJSON(int index) const; + /*! \brief Serialize this object to if-else statement*/ std::string ToIfElse(int index, bool predict_leaf_index) const; diff --git a/src/io/tree.cpp b/src/io/tree.cpp index 67e02af20cd8..47c8fe36377d 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -417,11 +417,39 @@ std::string Tree::ToJSON() const { str_buf << "\"num_cat\":" << num_cat_ << "," << '\n'; str_buf << "\"shrinkage\":" << shrinkage_ << "," << '\n'; if (num_leaves_ == 1) { - str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << "}" << '\n'; + if (is_linear_) { + str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << ", " << "\n"; + str_buf << LinearModelToJSON(0); + } else { + str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << "}" << '\n'; + } } else { str_buf << "\"tree_structure\":" << NodeToJSON(0) << '\n'; } + return str_buf.str(); +} +std::string Tree::LinearModelToJSON(int index) const { + std::stringstream str_buf; + Common::C_stringstream(str_buf); + str_buf << std::setprecision(std::numeric_limits::digits10 + 2); + str_buf << "\"leaf_const\":" << leaf_const_[index] << "," << "\n"; + int num_features = static_cast(leaf_features_[index].size()); + if (num_features > 0) { + str_buf << "\"leaf_features\":["; + for (int i = 0; i < num_features - 1; ++i) { + str_buf << leaf_features_[index][i] << ", "; + } + str_buf << leaf_features_[index][num_features - 1] << "]" << ", " << "\n"; + str_buf << "\"leaf_coeff\":["; + for (int i = 0; i < num_features - 1; ++i) { + str_buf << leaf_coeff_[index][i] << ", "; + } + str_buf << leaf_coeff_[index][num_features - 1] << "]" << "\n"; + } else { + str_buf << "\"leaf_features\":[],\n"; + str_buf << "\"leaf_coeff\":[]\n"; + } return str_buf.str(); } @@ -479,10 +507,14 @@ std::string Tree::NodeToJSON(int index) const { str_buf << "\"leaf_index\":" << index << "," << '\n'; str_buf << "\"leaf_value\":" << leaf_value_[index] << "," << '\n'; str_buf << "\"leaf_weight\":" << leaf_weight_[index] << "," << '\n'; - str_buf << "\"leaf_count\":" << leaf_count_[index] << '\n'; + if (is_linear_) { + str_buf << "\"leaf_count\":" << leaf_count_[index] << "," << '\n'; + str_buf << LinearModelToJSON(index); + } else { + str_buf << "\"leaf_count\":" << leaf_count_[index] << '\n'; + } str_buf << "}"; } - return str_buf.str(); } diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index b92c7998e554..22faf227d355 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -411,3 +411,18 @@ def test_list_to_1d_numpy(y, dtype): result = lgb.basic.list_to_1d_numpy(y, dtype=dtype) assert result.size == 10 assert result.dtype == dtype + + +def test_dump_model(): + X, y = load_breast_cancer(return_X_y=True) + train_data = lgb.Dataset(X, label=y) + params = { + "objective": "binary", + "verbose": -1 + } + bst = lgb.train(params, train_data, num_boost_round=5) + bst.dump_model(5, 0) + params['linear_tree'] = True + train_data = lgb.Dataset(X, label=y) + bst = lgb.train(params, train_data, num_boost_round=5) + bst.dump_model(5, 0) From 1480c4ffffce06ffe80b4944cd58228cf9c1bba3 Mon Sep 17 00:00:00 2001 From: btrotta Date: Sun, 30 May 2021 12:56:13 +1000 Subject: [PATCH 2/6] Add closing bracket --- src/io/tree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/tree.cpp b/src/io/tree.cpp index 47c8fe36377d..e3c770491ff6 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -419,7 +419,7 @@ std::string Tree::ToJSON() const { if (num_leaves_ == 1) { if (is_linear_) { str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << ", " << "\n"; - str_buf << LinearModelToJSON(0); + str_buf << LinearModelToJSON(0) << "}" << "\n"; } else { str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << "}" << '\n'; } From e2a8d8172be0f17c88f71bd1a0132e1a4b09a5cf Mon Sep 17 00:00:00 2001 From: btrotta Date: Mon, 31 May 2021 18:32:30 +1000 Subject: [PATCH 3/6] Move test into test_engine.py and add asserts --- tests/python_package_test/test_basic.py | 15 --------------- tests/python_package_test/test_engine.py | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 22faf227d355..b92c7998e554 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -411,18 +411,3 @@ def test_list_to_1d_numpy(y, dtype): result = lgb.basic.list_to_1d_numpy(y, dtype=dtype) assert result.size == 10 assert result.dtype == dtype - - -def test_dump_model(): - X, y = load_breast_cancer(return_X_y=True) - train_data = lgb.Dataset(X, label=y) - params = { - "objective": "binary", - "verbose": -1 - } - bst = lgb.train(params, train_data, num_boost_round=5) - bst.dump_model(5, 0) - params['linear_tree'] = True - train_data = lgb.Dataset(X, label=y) - bst = lgb.train(params, train_data, num_boost_round=5) - bst.dump_model(5, 0) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 49e89534da51..be61a82a3193 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2793,3 +2793,21 @@ def test_reset_params_works_with_metric_num_class_and_boosting(): expected_params = dict(dataset_params, **booster_params) assert bst.params == expected_params assert new_bst.params == expected_params + + +def test_dump_model(): + X, y = load_breast_cancer(return_X_y=True) + train_data = lgb.Dataset(X, label=y) + params = { + "objective": "binary", + "verbose": -1 + } + bst = lgb.train(params, train_data, num_boost_round=5) + dumped_model = bst.dump_model(5, 0) + assert "leaf_coeff" not in str(dumped_model) + params['linear_tree'] = True + train_data = lgb.Dataset(X, label=y) + bst = lgb.train(params, train_data, num_boost_round=5) + dumped_model = bst.dump_model(5, 0) + assert "leaf_coeff" in str(dumped_model) + From 2302a470d6939647f9388de51c757131134a29aa Mon Sep 17 00:00:00 2001 From: Belinda Trotta Date: Tue, 1 Jun 2021 17:58:43 +1000 Subject: [PATCH 4/6] Update tests/python_package_test/test_engine.py Co-authored-by: Nikita Titov --- tests/python_package_test/test_engine.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index be61a82a3193..e0fd2230cfdd 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2810,4 +2810,3 @@ def test_dump_model(): bst = lgb.train(params, train_data, num_boost_round=5) dumped_model = bst.dump_model(5, 0) assert "leaf_coeff" in str(dumped_model) - From fd43e2cb921edcccb849ae53b83da2b6c5c5ca6f Mon Sep 17 00:00:00 2001 From: Belinda Trotta Date: Tue, 1 Jun 2021 17:59:09 +1000 Subject: [PATCH 5/6] Update tests/python_package_test/test_engine.py Co-authored-by: Nikita Titov --- tests/python_package_test/test_engine.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index e0fd2230cfdd..5b3993346c25 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2803,8 +2803,12 @@ def test_dump_model(): "verbose": -1 } bst = lgb.train(params, train_data, num_boost_round=5) - dumped_model = bst.dump_model(5, 0) - assert "leaf_coeff" not in str(dumped_model) + dumped_model_str = str(bst.dump_model(5, 0)) + assert "leaf_features" not in dumped_model_str + assert "leaf_coeff" not in dumped_model_str + assert "leaf_const" not in dumped_model_str + assert "leaf_value" in dumped_model_str + assert "leaf_count" in dumped_model_str params['linear_tree'] = True train_data = lgb.Dataset(X, label=y) bst = lgb.train(params, train_data, num_boost_round=5) From 550608e956db9ac36e220a3cc6fb3bd5e9e154e1 Mon Sep 17 00:00:00 2001 From: Belinda Trotta Date: Tue, 1 Jun 2021 17:59:17 +1000 Subject: [PATCH 6/6] Update tests/python_package_test/test_engine.py Co-authored-by: Nikita Titov --- tests/python_package_test/test_engine.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 5b3993346c25..6ffec8cee7d9 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2812,5 +2812,9 @@ def test_dump_model(): params['linear_tree'] = True train_data = lgb.Dataset(X, label=y) bst = lgb.train(params, train_data, num_boost_round=5) - dumped_model = bst.dump_model(5, 0) - assert "leaf_coeff" in str(dumped_model) + dumped_model_str = str(bst.dump_model(5, 0)) + assert "leaf_features" in dumped_model_str + assert "leaf_coeff" in dumped_model_str + assert "leaf_const" in dumped_model_str + assert "leaf_value" in dumped_model_str + assert "leaf_count" in dumped_model_str