Skip to content

Commit

Permalink
[R-package] [c++] add tighter multithreading control, avoid global Op…
Browse files Browse the repository at this point in the history
…enMP side effects (fixes #4705, fixes #5102)
  • Loading branch information
jameslamb committed Dec 5, 2023
1 parent f5b6bd6 commit 7f0de8f
Show file tree
Hide file tree
Showing 57 changed files with 411 additions and 16 deletions.
1 change: 0 additions & 1 deletion .ci/lint-cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ get_omp_pragmas_without_num_threads() {
--include='*.hpp' \
'pragma omp parallel' \
| grep -v ' num_threads' \
| grep -v 'openmp_wrapper.h'
}
PROBLEMATIC_LINES=$(
get_omp_pragmas_without_num_threads
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ file(
src/objective/*.cpp
src/network/*.cpp
src/treelearner/*.cpp
src/utils/*.cpp
if(USE_CUDA)
src/treelearner/*.cu
src/boosting/cuda/*.cpp
Expand Down
2 changes: 2 additions & 0 deletions R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ S3method(print,lgb.Booster)
S3method(set_field,lgb.Dataset)
S3method(slice,lgb.Dataset)
S3method(summary,lgb.Booster)
export(getLGBMthreads)
export(get_field)
export(lgb.Dataset)
export(lgb.Dataset.construct)
Expand All @@ -35,6 +36,7 @@ export(lgb.train)
export(lightgbm)
export(readRDS.lgb.Booster)
export(saveRDS.lgb.Booster)
export(setLGBMthreads)
export(set_field)
export(slice)
import(methods)
Expand Down
1 change: 1 addition & 0 deletions R-package/R/aliases.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
, "max_bin"
, "max_bin_by_feature"
, "min_data_in_bin"
, "num_threads"
, "pre_partition"
, "precise_float_parser"
, "two_round"
Expand Down
12 changes: 12 additions & 0 deletions R-package/R/lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,8 @@ NULL
#' the factor levels not being present in the output.
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -1082,6 +1084,8 @@ predict.lgb.Booster <- function(object,
#' \link{predict.lgb.Booster}.
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' library(lightgbm)
#' data(mtcars)
#' X <- as.matrix(mtcars[, -1L])
Expand Down Expand Up @@ -1224,6 +1228,8 @@ summary.lgb.Booster <- function(object, ...) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -1289,6 +1295,8 @@ lgb.load <- function(filename = NULL, model_str = NULL) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' library(lightgbm)
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
Expand Down Expand Up @@ -1346,6 +1354,8 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
#' @examples
#' \donttest{
#' library(lightgbm)
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -1396,6 +1406,8 @@ lgb.dump <- function(booster, num_iteration = NULL) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' # train a regression model
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
Expand Down
22 changes: 22 additions & 0 deletions R-package/R/lgb.Dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,8 @@ Dataset <- R6::R6Class(
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -837,6 +839,8 @@ lgb.Dataset <- function(data,
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -913,6 +917,8 @@ lgb.Dataset.create.valid <- function(dataset,
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -942,6 +948,8 @@ lgb.Dataset.construct <- function(dataset) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -975,6 +983,8 @@ dim.lgb.Dataset <- function(x) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -1045,6 +1055,8 @@ dimnames.lgb.Dataset <- function(x) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -1089,6 +1101,8 @@ slice.lgb.Dataset <- function(dataset, idxset) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -1138,6 +1152,8 @@ get_field.lgb.Dataset <- function(dataset, field_name) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -1177,6 +1193,8 @@ set_field.lgb.Dataset <- function(dataset, field_name, data) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down Expand Up @@ -1207,6 +1225,8 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' # create training Dataset
#' data(agaricus.train, package ="lightgbm")
#' train <- agaricus.train
Expand Down Expand Up @@ -1240,6 +1260,8 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down
2 changes: 2 additions & 0 deletions R-package/R/lgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ CVBooster <- R6::R6Class(
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down
2 changes: 2 additions & 0 deletions R-package/R/lgb.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down
2 changes: 2 additions & 0 deletions R-package/R/lgb.interprete.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' Logit <- function(x) log(x / (1.0 - x))
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
Expand Down
2 changes: 2 additions & 0 deletions R-package/R/lgb.model.dt.tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down
2 changes: 2 additions & 0 deletions R-package/R/lgb.plot.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down
2 changes: 2 additions & 0 deletions R-package/R/lgb.plot.interpretation.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' Logit <- function(x) {
#' log(x / (1.0 - x))
#' }
Expand Down
4 changes: 4 additions & 0 deletions R-package/R/lgb.restore_handle.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
#' @return \code{lgb.Booster} (the same `model` object that was passed as input, invisibly).
#' @seealso \link{lgb.make_serializable}, \link{lgb.drop_serialized}.
#' @examples
#' \donttest{
#' library(lightgbm)
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data("agaricus.train")
#' model <- lightgbm(
#' agaricus.train$data
Expand All @@ -33,6 +36,7 @@
#' model_new$check_null_handle()
#' lgb.restore_handle(model_new)
#' model_new$check_null_handle()
#' }
#' @export
lgb.restore_handle <- function(model) {
if (!.is_Booster(x = model)) {
Expand Down
2 changes: 2 additions & 0 deletions R-package/R/lgb.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#'
#' @examples
#' \donttest{
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down
51 changes: 51 additions & 0 deletions R-package/R/multithreading.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#' @name setLGBMThreads
#' @title Set maximum number of threads used by LightGBM
#' @description LightGBM attempts to speed up many operations by using multi-threading.
#' The number of threads used in those operations can be controlled via the
#' \code{num_threads} parameter passed through \code{params} to functions like
#' \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
#' a model from a text file) are done via code paths that don't explicitly accept thread-control
#' configuration.
#'
#' Use this function to set the maximum number of threads LightGBM will use for such operations.
#'
#' This function affects all LightGBM operations in the same process.
#'
#' So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM
#' operation in the same process will use more than 4 threads.
#'
#' Call \code{setLGBMthreads(-1)} to remove this limitation.
#' @param num_threads maximum number of threads to be used by LightGBM in multi-threaded operations
#' @return NULL
#' @seealso \link{getLGBMthreads}
#' @export
setLGBMthreads <- function(num_threads) {
.Call(
LGBM_SetMaxThreads_R,
num_threads
)
return(invisible(NULL))
}

#' @name getLGBMThreads
#' @title Get default number of threads used by LightGBM
#' @description LightGBM attempts to speed up many operations by using multi-threading.
#' The number of threads used in those operations can be controlled via the
#' \code{num_threads} parameter passed through \code{params} to functions like
#' \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
#' a model from a text file) are done via code paths that don't explicitly accept thread-control
#' configuration.
#'
#' Use this function to see the default number of threads LightGBM will use for such operations.
#' @return number of threads as an integer. \code{-1} means that in situations where parameter \code{num_threads} is
#' not explicitly supplied, LightGBM will choose a number of threads to use automatically.
#' @seealso \link{setLGBMthreads}
#' @export
getLGBMthreads <- function() {
out <- 0L
.Call(
LGBM_GetMaxThreads_R,
out
)
return(out)
}
2 changes: 2 additions & 0 deletions R-package/R/readRDS.lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#' @examples
#' \donttest{
#' library(lightgbm)
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down
2 changes: 2 additions & 0 deletions R-package/R/saveRDS.lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#' @examples
#' \donttest{
#' library(lightgbm)
#' \dontshow{setLGBMthreads(2L)}
#' \dontshow{data.table::setDTthreads(1L)}
#' data(agaricus.train, package = "lightgbm")
#' train <- agaricus.train
#' dtrain <- lgb.Dataset(train$data, label = train$label)
Expand Down
2 changes: 2 additions & 0 deletions R-package/man/dim.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions R-package/man/dimnames.lgb.Dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions R-package/man/getLGBMThreads.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions R-package/man/get_field.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7f0de8f

Please sign in to comment.