Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[R-package] fix warnings in demos #4569

Merged
merged 5 commits into from
Aug 29, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ coverage.xml
.hypothesis/
**/coverage.html
**/coverage.html.zip
R-package/tests/testthat/Rplots.pdf
**/Rplots.pdf

# Translations
*.mo
Expand Down Expand Up @@ -427,6 +427,7 @@ miktex*.zip
*.def

# Files created by examples and tests
*.buffer
**/lgb-Dataset.data
**/lgb.Dataset.data
**/model.txt
Expand Down
49 changes: 17 additions & 32 deletions R-package/demo/basic_walkthrough.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,33 @@ test <- agaricus.test
class(train$label)
class(train$data)

# Set parameters for model training
train_params <- list(
num_leaves = 4L
, learning_rate = 1.0
, objective = "binary"
, nthread = 2L
)

#--------------------Basic Training using lightgbm----------------
# This is the basic usage of lightgbm you can put matrix in data field
# Note: we are putting in sparse matrix here, lightgbm naturally handles sparse input
# Use sparse matrix when your feature is sparse (e.g. when you are using one-hot encoding vector)
print("Training lightgbm with sparseMatrix")
bst <- lightgbm(
data = train$data
, params = train_params
, label = train$label
, num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L
, objective = "binary"
)

# Alternatively, you can put in dense matrix, i.e. basic R-matrix
print("Training lightgbm with Matrix")
bst <- lightgbm(
data = as.matrix(train$data)
, params = train_params
, label = train$label
, num_leaves = 4L
, learning_rate = 1.0
, nrounds = 2L
, objective = "binary"
)

# You can also put in lgb.Dataset object, which stores label, data and other meta datas needed for advanced features
Expand All @@ -45,42 +49,32 @@ dtrain <- lgb.Dataset(
)
bst <- lightgbm(
data = dtrain
, num_leaves = 4L
, learning_rate = 1.0
, params = train_params
, nrounds = 2L
, objective = "binary"
)

# Verbose = 0,1,2
print("Train lightgbm with verbose 0, no message")
bst <- lightgbm(
data = dtrain
, num_leaves = 4L
, learning_rate = 1.0
, params = train_params
, nrounds = 2L
, objective = "binary"
, verbose = 0L
)

print("Train lightgbm with verbose 1, print evaluation metric")
bst <- lightgbm(
data = dtrain
, num_leaves = 4L
, learning_rate = 1.0
, params = train_params
, nrounds = 2L
, nthread = 2L
, objective = "binary"
, verbose = 1L
)

print("Train lightgbm with verbose 2, also print information about tree")
bst <- lightgbm(
data = dtrain
, num_leaves = 4L
, learning_rate = 1.0
, params = train_params
, nrounds = 2L
, nthread = 2L
, objective = "binary"
, verbose = 2L
)

Expand Down Expand Up @@ -126,25 +120,19 @@ valids <- list(train = dtrain, test = dtest)
print("Train lightgbm using lgb.train with valids")
bst <- lgb.train(
data = dtrain
, num_leaves = 4L
, learning_rate = 1.0
, params = train_params
, nrounds = 2L
, valids = valids
, nthread = 2L
, objective = "binary"
)

# We can change evaluation metrics, or use multiple evaluation metrics
print("Train lightgbm using lgb.train with valids, watch logloss and error")
bst <- lgb.train(
data = dtrain
, num_leaves = 4L
, learning_rate = 1.0
, params = train_params
, nrounds = 2L
, valids = valids
, eval = c("binary_error", "binary_logloss")
, nthread = 2L
, objective = "binary"
)

# lgb.Dataset can also be saved using lgb.Dataset.save
Expand All @@ -154,12 +142,9 @@ lgb.Dataset.save(dtrain, "dtrain.buffer")
dtrain2 <- lgb.Dataset("dtrain.buffer")
bst <- lgb.train(
data = dtrain2
, num_leaves = 4L
, learning_rate = 1.0
, params = train_params
, nrounds = 2L
, valids = valids
, nthread = 2L
, objective = "binary"
)

# information can be extracted from lgb.Dataset using getinfo
Expand Down
1 change: 0 additions & 1 deletion R-package/demo/categorical_features_rules.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ params <- list(
, metric = "l2"
, min_data = 1L
, learning_rate = 0.1
, min_data = 0L
, min_hessian = 1.0
, max_depth = 2L
)
Expand Down
2 changes: 1 addition & 1 deletion R-package/demo/early_stopping.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ bst <- lgb.train(
, dtrain
, num_round
, valids
, objective = logregobj
, obj = logregobj
, eval = evalerror
, early_stopping_round = 3L
)
31 changes: 21 additions & 10 deletions R-package/demo/leaf_stability.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,21 @@ test <- agaricus.test
dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)

# setup parameters and we train a model
params <- list(objective = "regression", metric = "l2")
params <- list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 0.1
, bagging_fraction = 0.1
, bagging_freq = 1L
, bagging_seed = 1L
)
valids <- list(test = dtest)
model <- lgb.train(
params
, dtrain
, 50L
, valids
, min_data = 1L
, learning_rate = 0.1
, bagging_fraction = 0.1
, bagging_freq = 1L
, bagging_seed = 1L
)

# We create a data.frame with the following structure:
Expand Down Expand Up @@ -141,13 +144,17 @@ table(new_data$binned)
.depth_density_plot(df = new_data)

# Now, let's show with other parameters
params <- list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
model2 <- lgb.train(
params
, dtrain
, 100L
, valids
, min_data = 1L
, learning_rate = 1.0
)

# We create the data structure, but for model2
Expand Down Expand Up @@ -193,13 +200,17 @@ table(new_data2$binned)
.depth_density_plot(df = new_data2)

# Now, try with very severe overfitting
params <- list(
objective = "regression"
, metric = "l2"
, min_data = 1L
, learning_rate = 1.0
)
model3 <- lgb.train(
params
, dtrain
, 1000L
, valids
, min_data = 1L
, learning_rate = 1.0
)

# We create the data structure, but for model3
Expand Down
24 changes: 15 additions & 9 deletions R-package/demo/multiclass.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,18 @@ dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5
valids <- list(test = dtest)

# Method 1 of training
params <- list(objective = "multiclass", metric = "multi_error", num_class = 3L)
params <- list(
objective = "multiclass"
, metric = "multi_error"
, num_class = 3L
, min_data = 1L
, learning_rate = 1.0
)
model <- lgb.train(
params
, dtrain
, 100L
, valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L
)

Expand All @@ -34,18 +38,20 @@ model <- lgb.train(
my_preds <- predict(model, test[, 1L:4L])

# Method 2 of training, identical
model <- lgb.train(
list()
, dtrain
, 100L
, valids
, min_data = 1L
params <- list(
min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L
, objective = "multiclass"
, metric = "multi_error"
, num_class = 3L
)
model <- lgb.train(
params
, dtrain
, 100L
, valids
jameslamb marked this conversation as resolved.
Show resolved Hide resolved
)

# We can predict on test data, identical
my_preds <- predict(model, test[, 1L:4L])
Expand Down
28 changes: 16 additions & 12 deletions R-package/demo/multiclass_custom_objective.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,20 @@ valids <- list(train = dtrain, test = dtest)
# Method 1 of training with built-in multiclass objective
# Note: need to turn off boost from average to match custom objective
# (https://github.com/microsoft/LightGBM/issues/1846)
params <- list(
min_data = 1L
, learning_rate = 1.0
, num_class = 3L
, boost_from_average = FALSE
, metric = "multi_logloss"
)
model_builtin <- lgb.train(
list()
params
, dtrain
, boost_from_average = FALSE
, 100L
, valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L
, objective = "multiclass"
, metric = "multi_logloss"
, num_class = 3L
, obj = "multiclass"
)

preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
Expand Down Expand Up @@ -92,17 +94,19 @@ custom_multiclass_metric <- function(preds, dtrain) {
))
}

params <- list(
min_data = 1L
, learning_rate = 1.0
, num_class = 3L
)
model_custom <- lgb.train(
list()
params
, dtrain
, 100L
, valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L
, objective = custom_multiclass_obj
, obj = custom_multiclass_obj
, eval = custom_multiclass_metric
, num_class = 3L
)

preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE, reshape = TRUE)
Expand Down
12 changes: 6 additions & 6 deletions R-package/demo/weight_param.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ params <- list(
, num_leaves = 7L
, max_depth = 3L
, nthread = 1L
, min_data = 1L
, learning_rate = 1.0
)
model <- lgb.train(
params
, dtrain
, 50L
, valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L
)
weight_loss <- as.numeric(model$record_evals$test$l2$eval)
Expand All @@ -58,14 +58,14 @@ params <- list(
, num_leaves = 7L
, max_depth = 3L
, nthread = 1L
, min_data = 1L
, learning_rate = 1.0
)
model <- lgb.train(
params
, dtrain
, 50L
, valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L
)
small_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
Expand Down Expand Up @@ -94,14 +94,14 @@ params <- list(
, num_leaves = 7L
, max_depth = 3L
, nthread = 1L
, min_data = 1L
, learning_rate = 1.0
)
model <- lgb.train(
params
, dtrain
, 50L
, valids
, min_data = 1L
, learning_rate = 1.0
, early_stopping_rounds = 10L
)
large_weight_loss <- as.numeric(model$record_evals$test$l2$eval)
Expand Down