From 687390f7dc147725b58bdd5e04c7de98dd9982ee Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 7 Jun 2021 00:40:26 -0500 Subject: [PATCH] [R-package] add docs and tests on monotone constraints (fixes #4345) --- R-package/tests/testthat/test_basic.R | 125 ++++++++++++++++++++++++++ docs/Parameters.rst | 6 ++ include/LightGBM/config.h | 3 + 3 files changed, 134 insertions(+) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index a49c728d2e19..40eec6b43c49 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -2042,3 +2042,128 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai expect_equal(pred1, pred2) }) + +context("monotone constraints") + +.generate_trainset_for_monotone_constraints_tests <- function(x3_to_categorical) { + n_samples <- 3000L + x1_positively_correlated_with_y <- rnorm(n = n_samples) + x2_negatively_correlated_with_y <- rnorm(n = n_samples) + x3_negatively_correlated_with_y <- rnorm(n = n_samples) + if (x3_to_categorical) { + x3_negatively_correlated_with_y <- as.integer(abs(runif(n_samples) / 0.25)) + categorical_features <- 3L + } else { + x3_negatively_correlated_with_y <- runif(n_samples) + categorical_features <- NULL + } + X <- data.matrix( + data.frame( + list( + x1_positively_correlated_with_y + , x2_negatively_correlated_with_y + , x3_negatively_correlated_with_y + ) + ) + ) + zs <- rnorm(n = n_samples, mean = 0.0, sd = 0.01) + scales <- 10.0 * rnorm(6L + 0.5) + y <- ( + scales[1L] * x1_positively_correlated_with_y + + sin(scales[2L] * pi * x1_positively_correlated_with_y) + - scales[3L] * x2_negatively_correlated_with_y + - cos(scales[4L] * pi * x2_negatively_correlated_with_y) + - scales[5L] * x3_negatively_correlated_with_y + - cos(scales[6L] * pi * x3_negatively_correlated_with_y) + + zs + ) + return(lgb.Dataset( + data = X + , label = y + , categorical_feature = categorical_features + , free_raw_data = FALSE + )) +} + +.is_increasing <- function(y) { + return(all(diff(y) >= 0.0)) +} + +.is_decreasing <- function(y) { + return(all(diff(y) <= 0.0)) +} + +.is_non_monotone <- function(y) { + return(any(diff(y) < 0.0) & any(diff(y) > 0.0)) +} + +.is_correctly_constrained <- function(learner, x3_to_category) { + iterations <- 10L + n <- 1000L + variable_x <- seq_len(n) / n + fixed_xs_values <- seq_len(n) + for (i in seq_len(iterations)) { + monotonically_increasing_x <- data.matrix( + data.frame( + list(variable_x, fixed_x, fixed_x) + ) + ) + monotonically_increasing_y <- predict(learner, monotonically_increasing_x) + + monotonically_decreasing_x <- data.matrix( + data.frame( + list(fixed_x, variable_x, fixed_x) + ) + ) + monotonically_decreasing_y <- predict(learner, monotonically_decreasing_x) + + non_monotone_x <- data.matrix( + data.frame( + list( + fixed_x + , fixed_x + ) + ) + ) + + } +} + +for (x3_to_categorical in c(TRUE, FALSE)){ + for (monotone_constraints_method in c("basic", "intermediate", "advanced")) { + test_msg <- paste0( + "lgb.train() supports monotone constraints (" + , "categoricals=" + , x3_to_categorical + , ", method=" + , monotone_constraints_method + , ")" + ) + test_that(test_msg, { + set.seed(708L) + dtrain <- .generate_trainset_for_monotone_constraints_tests( + x3_to_categorical = x3_to_categorical + ) + params <- list( + min_data = 20L + , num_leaves = 20L + , use_missing = FALSE + ) + unconstrained_model <- lgb.train( + params = params + , data = dtrain + , obj = "regression_l2" + , nrounds = 10L + ) + params[["monotone_constraints"]] <- c(1L, -1L, 0L) + params[["monotone_constraints_method"]] <- monotone_constraints_method + constrained_model <- lgb.train( + params = params + , data = dtrain + , obj = "regression_l2" + , nrounds = 10L + ) + X <- dtrain$.__enclos_env__$private$raw_data + }) + } +} diff --git a/docs/Parameters.rst b/docs/Parameters.rst index db4673b8dcff..722d3c191200 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -514,6 +514,12 @@ Learning Control Parameters - you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for 1st feature, non-constraint for 2nd feature and increasing for the 3rd feature + - in the CLI or C++, use a string like ``"-1,0,1"`` + + - in the Python package, can use either a string or a list like ``[-1, 0, 1]`` + + - in the R package, can use either a string or a vector like ``c(-1, 0, 1)`` + - ``monotone_constraints_method`` :raw-html:`🔗︎`, default = ``basic``, type = enum, options: ``basic``, ``intermediate``, ``advanced``, aliases: ``monotone_constraining_method``, ``mc_method`` - used only if ``monotone_constraints`` is set diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 33649e1fc2cd..2eb1577bfa74 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -471,6 +471,9 @@ struct Config { // desc = used for constraints of monotonic features // desc = ``1`` means increasing, ``-1`` means decreasing, ``0`` means non-constraint // desc = you need to specify all features in order. For example, ``mc=-1,0,1`` means decreasing for 1st feature, non-constraint for 2nd feature and increasing for the 3rd feature + // desc = in the CLI or C++, use a string like ``"-1,0,1"`` + // desc = in the Python package, can use either a string or a list like ``[-1, 0, 1]`` + // desc = in the R package, can use either a string or a vector like ``c(-1, 0, 1)`` std::vector monotone_constraints; // type = enum