Description
The Problem: saveRDS / readRDS for lightgbm engine
-
I'm having trouble when stacking an ensemble model with "set_engine = 'lightgbm'", if we want to save the model as an RDS and read in later, the model will not predict.
-
Error message: "Error in predictor$predict(data = data, start_iteration = start_iteration, :
Attempting to use a Booster which no longer exists. This can happen if you have called Booster$finalize() or if this Booster was saved with saveRDS(). To avoid this error in the future, use saveRDS.lgb.Booster() or Booster$save_model() to save lightgbm Boosters." -
Seems to be an underlying issue with lightgbm but hoping this could be handled in stacks, maybe this bug should be posted on bonsai. Would like to easily stack cubist + lightgbm without doing a workaround.
Reproducible example
library(stacks)
library(tidymodels)
library(workflows)
library(recipes)
library(bonsai)
library(dplyr)
in_garden <- 315159
set.seed(in_garden)
r <- rnorm(n = 999)
some_data <- data.frame(
A = r,
B = r + 1.3,
C = r ^ 2
)
# introduce cv folds, recipes, param tune, and stacks --
data_split <- initial_split(some_data,
prop = .75,
strata = A)
training <- training(data_split)
testing <- testing(data_split)
# cross-validations ---
folds <- vfold_cv(training, strata = A, v = 5)
model_recipe <-
recipe(A ~ ., training) %>%
step_nzv(all_nominal()) %>%
step_dummy(all_nominal()) %>%
step_zv(all_predictors())
# gbm - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
gbm_results <-
tune_grid(
workflow() %>%
add_recipe(model_recipe) %>%
add_model(
boost_tree(
trees = tune(),
mtry = tune()) %>%
set_engine("lightgbm", nthread = 8) %>%
set_mode("regression")
),
resamples = folds,
grid = expand.grid(
trees = c(5, 50),
mtry = c(1, 2)
),
control = control_stack_grid(),
metrics = metric_set(rmse)
)
# ensembling - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
candidate_stack <-
stacks() %>%
# add_candidates(cube_results) %>%
add_candidates(gbm_results)
ensemble_mixed_model <- candidate_stack %>%
blend_predictions()
ensemble_mixed_model_fitted <- ensemble_mixed_model %>%
fit_members()
#> [LightGBM] [Warning] num_threads is set=0, nthread=8 will be ignored. Current value: num_threads=0
# save/read RDS & predict issue - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
my_temp <- tempfile(fileext = ".rds")
saveRDS(ensemble_mixed_model_fitted, file = my_temp)
remove(ensemble_mixed_model_fitted)
ensemble_mixed_model_fitted <- readRDS(my_temp)
testing %>%
bind_cols(predict(ensemble_mixed_model_fitted, .))
#> Error in predictor$predict(data = data, start_iteration = start_iteration, : Attempting to use a Booster which no longer exists. This can happen if you have called Booster$finalize() or if this Booster was saved with saveRDS(). To avoid this error in the future, use saveRDS.lgb.Booster() or Booster$save_model() to save lightgbm Boosters.
Created on 2022-08-03 by the reprex package (v2.0.1)
Session info
sessioninfo::session_info()
#> - Session info ---------------------------------------------------------------
#> setting value
#> version R version 4.1.3 (2022-03-10)
#> os Windows 10 x64 (build 19042)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.1252
#> ctype English_United States.1252
#> tz America/New_York
#> date 2022-08-03
#> pandoc 2.17.1.1 @ C:/Users/fhull/AppData/Local/Programs/RStudio/bin/quarto/bin/ (via rmarkdown)
#>
#> - Packages -------------------------------------------------------------------
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.3)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.2)
#> bonsai * 0.1.0 2022-06-23 [1] CRAN (R 4.1.3)
#> broom * 0.7.12 2022-01-28 [1] CRAN (R 4.1.3)
#> butcher 0.2.0 2022-06-14 [1] CRAN (R 4.1.3)
#> class 7.3-20 2022-01-16 [1] CRAN (R 4.1.3)
#> cli 3.2.0 2022-02-14 [1] CRAN (R 4.1.3)
#> codetools 0.2-18 2020-11-04 [1] CRAN (R 4.1.3)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.1.3)
#> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.1.3)
#> data.table 1.14.2 2021-09-27 [1] CRAN (R 4.1.3)
#> DBI 1.1.2 2021-12-20 [1] CRAN (R 4.1.3)
#> dials * 1.0.0 2022-06-14 [1] CRAN (R 4.1.3)
#> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.1.3)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.3)
#> dplyr * 1.0.8 2022-02-08 [1] CRAN (R 4.1.3)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.3)
#> evaluate 0.15 2022-02-18 [1] CRAN (R 4.1.3)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.1.3)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.3)
#> foreach 1.5.2 2022-02-02 [1] CRAN (R 4.1.3)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.3)
#> furrr 0.2.3 2021-06-25 [1] CRAN (R 4.1.3)
#> future 1.24.0 2022-02-19 [1] CRAN (R 4.1.3)
#> future.apply 1.8.1 2021-08-10 [1] CRAN (R 4.1.3)
#> generics 0.1.2 2022-01-31 [1] CRAN (R 4.1.3)
#> ggplot2 * 3.3.5 2021-06-25 [1] CRAN (R 4.1.3)
#> glmnet * 4.1-3 2021-11-02 [1] CRAN (R 4.1.3)
#> globals 0.14.0 2020-11-22 [1] CRAN (R 4.1.1)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.3)
#> gower 1.0.0 2022-02-03 [1] CRAN (R 4.1.2)
#> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.1.3)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.1.3)
#> hardhat 1.1.0 2022-06-10 [1] CRAN (R 4.1.3)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.1.3)
#> htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.3)
#> infer * 1.0.0 2021-08-13 [1] CRAN (R 4.1.3)
#> ipred 0.9-12 2021-09-15 [1] CRAN (R 4.1.3)
#> iterators 1.0.14 2022-02-05 [1] CRAN (R 4.1.3)
#> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.1.3)
#> knitr 1.39 2022-04-26 [1] CRAN (R 4.1.3)
#> lattice 0.20-45 2021-09-22 [1] CRAN (R 4.1.3)
#> lava 1.6.10 2021-09-02 [1] CRAN (R 4.1.3)
#> lhs 1.1.5 2022-03-22 [1] CRAN (R 4.1.3)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.3)
#> lightgbm * 3.3.2 2022-01-14 [1] CRAN (R 4.1.3)
#> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.1.3)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.1.3)
#> magrittr 2.0.2 2022-01-26 [1] CRAN (R 4.1.3)
#> MASS 7.3-55 2022-01-16 [1] CRAN (R 4.1.3)
#> Matrix * 1.4-0 2021-12-08 [1] CRAN (R 4.1.3)
#> modeldata * 0.1.1 2021-07-14 [1] CRAN (R 4.1.3)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.3)
#> nnet 7.3-17 2022-01-16 [1] CRAN (R 4.1.3)
#> parallelly 1.30.0 2021-12-17 [1] CRAN (R 4.1.2)
#> parsnip * 1.0.0 2022-06-16 [1] CRAN (R 4.1.3)
#> pillar 1.7.0 2022-02-01 [1] CRAN (R 4.1.3)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.3)
#> plyr 1.8.6 2020-03-03 [1] CRAN (R 4.1.3)
#> pROC 1.18.0 2021-09-03 [1] CRAN (R 4.1.3)
#> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.1.3)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.1.3)
#> R6 * 2.5.1 2021-08-19 [1] CRAN (R 4.1.3)
#> Rcpp 1.0.8.3 2022-03-17 [1] CRAN (R 4.1.3)
#> recipes * 0.2.0 2022-02-18 [1] CRAN (R 4.1.3)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.3)
#> rlang 1.0.4 2022-07-12 [1] CRAN (R 4.1.3)
#> rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.1.3)
#> rpart 4.1.16 2022-01-24 [1] CRAN (R 4.1.3)
#> rsample * 0.1.1 2021-11-08 [1] CRAN (R 4.1.3)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.3)
#> scales * 1.2.0 2022-04-13 [1] CRAN (R 4.1.3)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.3)
#> shape 1.4.6 2021-05-19 [1] CRAN (R 4.1.1)
#> stacks * 0.2.3 2022-05-12 [1] CRAN (R 4.1.3)
#> stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.1.3)
#> survival 3.2-13 2021-08-24 [1] CRAN (R 4.1.3)
#> tibble * 3.1.7 2022-05-03 [1] CRAN (R 4.1.3)
#> tidymodels * 0.2.0 2022-03-19 [1] CRAN (R 4.1.3)
#> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.1.3)
#> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.1.3)
#> timeDate 3043.102 2018-02-21 [1] CRAN (R 4.1.2)
#> tune * 0.2.0 2022-03-19 [1] CRAN (R 4.1.3)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.3)
#> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.1.3)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.3)
#> workflows * 0.2.6 2022-03-18 [1] CRAN (R 4.1.3)
#> workflowsets * 0.2.1 2022-03-15 [1] CRAN (R 4.1.3)
#> xfun 0.30 2022-03-02 [1] CRAN (R 4.1.3)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.1.2)
#> yardstick * 0.0.9 2021-11-22 [1] CRAN (R 4.1.3)
#>
#> [1] C:/Users/fhull/Documents/R/R-4.1.3/library
#>
#> ------------------------------------------------------------------------------