diff --git a/NEWS.md b/NEWS.md index 84a485f51..4dc381af6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,13 @@ -# parsnip (development version) +# parsnip 0.1.3 * A `glance()` method for `model_fit` objects was added (#325) * Specific `tidy()` methods for `glmnet` models fit via `parsnip` were created so that the coefficients for the specific fitted `parsnip` model are returned. +## Fixes + + * `glmnet` models were fitting two intercepts (#349) + # parsnip 0.1.2 ## Breaking Changes diff --git a/R/fit_helpers.R b/R/fit_helpers.R index 45ea0697d..d5cdb08f1 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -63,6 +63,15 @@ xy_xy <- function(object, env, control, target = "none", ...) { rlang::abort("For classification models, the outcome should be a factor.") } + encoding_info <- + get_encoding(class(object)[1]) %>% + dplyr::filter(mode == object$mode, engine == object$engine) + + remove_intercept <- encoding_info %>% dplyr::pull(remove_intercept) + if (remove_intercept) { + env$x <- env$x[, colnames(env$x) != "(Intercept)", drop = FALSE] + } + # if descriptors are needed, update descr_env with the calculated values if (requires_descrs(object)) { data_stats <- get_descr_xy(env$x, env$y) diff --git a/tests/testthat/test_linear_reg_glmnet.R b/tests/testthat/test_linear_reg_glmnet.R deleted file mode 100644 index 3cb0cfadd..000000000 --- a/tests/testthat/test_linear_reg_glmnet.R +++ /dev/null @@ -1,301 +0,0 @@ -library(testthat) -library(parsnip) -library(rlang) -library(tidyr) - -# ------------------------------------------------------------------------------ - -context("linear regression execution with glmnet") -source(test_path("helper-objects.R")) -hpc <- hpc_data[1:150, c(2:5, 8)] - - -num_pred <- c("compounds", "iterations", "num_pending") -hpc_bad_form <- as.formula(class ~ term) -hpc_basic <- linear_reg(penalty = .1, mixture = .3) %>% - set_engine("glmnet", nlambda = 15) -no_lambda <- linear_reg(mixture = .3) %>% - set_engine("glmnet") - -# ------------------------------------------------------------------------------ - -test_that('glmnet execution', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - expect_error( - res <- fit_xy( - hpc_basic, - control = ctrl, - x = hpc[, num_pred], - y = hpc$input_fields - ), - regexp = NA - ) - - expect_true(has_multi_predict(res)) - expect_equal(multi_predict_args(res), "penalty") - - expect_error( - fit( - hpc_basic, - hpc_bad_form, - data = hpc, - control = ctrl - ) - ) - - glmnet_xy_catch <- fit_xy( - hpc_basic, - x = hpc[, num_pred], - y = factor(hpc$input_fields), - control = caught_ctrl - ) - expect_true(inherits(glmnet_xy_catch$fit, "try-error")) - -}) - -test_that('glmnet prediction, single lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - res_xy <- fit_xy( - hpc_basic, - control = ctrl, - x = hpc[, num_pred], - y = hpc$input_fields - ) - - # glmn_mod <- glmnet::glmnet(x = as.matrix(hpc[, num_pred]), y = hpc$input_fields, - # alpha = .3, nlambda = 15) - - uni_pred <- c(640.599944271351, 196.646976529848, 186.279646400216, 194.673852228774, - 198.126819755653) - - expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred, tolerance = 0.0001) - - res_form <- fit( - hpc_basic, - input_fields ~ log(compounds) + class, - data = hpc, - control = ctrl - ) - - form_pred <- c(570.504089227118, 162.413061474088, 167.022896537861, 157.609071878082, - 165.887783741483) - - expect_equal(form_pred, predict(res_form, hpc[1:5,])$.pred, tolerance = 0.0001) -}) - - -test_that('glmnet prediction, multiple lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - lams <- c(.01, 0.1) - - hpc_mult <- linear_reg(penalty = lams, mixture = .3) %>% - set_engine("glmnet") - - res_xy <- fit_xy( - hpc_mult, - control = ctrl, - x = hpc[, num_pred], - y = hpc$input_fields - ) - - # mult_pred <- - # predict(res_xy$fit, - # newx = as.matrix(hpc[1:5, num_pred]), - # s = lams) - # mult_pred <- stack(as.data.frame(mult_pred)) - # mult_pred$penalty <- rep(lams, each = 5) - # mult_pred$rows <- rep(1:5, 2) - # mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] - # mult_pred <- mult_pred[, c("penalty", "values")] - # names(mult_pred) <- c("penalty", ".pred") - # mult_pred <- tibble::as_tibble(mult_pred) - mult_pred <- - tibble::tribble( - ~penalty, ~.pred, - 0.01, 639.672880668187, - 0.1, 639.672880668187, - 0.01, 197.744613311359, - 0.1, 197.744613311359, - 0.01, 187.737940787615, - 0.1, 187.737940787615, - 0.01, 195.780487678662, - 0.1, 195.780487678662, - 0.01, 199.217707535882, - 0.1, 199.217707535882 - ) - - expect_equal( - as.data.frame(mult_pred), - multi_predict(res_xy, new_data = hpc[1:5, num_pred], lambda = lams) %>% - unnest(cols = c(.pred)) %>% - as.data.frame(), - tolerance = 0.0001 - ) - - res_form <- fit( - hpc_mult, - input_fields ~ log(compounds) + class, - data = hpc, - control = ctrl - ) - - # form_mat <- model.matrix(input_fields ~ log(compounds) + class, data = hpc) - # form_mat <- form_mat[1:5, -1] - # - # form_pred <- - # predict(res_form$fit, - # newx = form_mat, - # s = lams) - # form_pred <- stack(as.data.frame(form_pred)) - # form_pred$penalty <- rep(lams, each = 5) - # form_pred$rows <- rep(1:5, 2) - # form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] - # form_pred <- form_pred[, c("penalty", "values")] - # names(form_pred) <- c("penalty", ".pred") - # form_pred <- tibble::as_tibble(form_pred) - - form_pred <- - tibble::tribble( - ~penalty, ~.pred, - 0.01, 570.474473760044, - 0.1, 570.474473760044, - 0.01, 164.040104978709, - 0.1, 164.040104978709, - 0.01, 168.709676954287, - 0.1, 168.709676954287, - 0.01, 159.173862504055, - 0.1, 159.173862504055, - 0.01, 167.559854709074, - 0.1, 167.559854709074 - ) - - expect_equal( - as.data.frame(form_pred), - multi_predict(res_form, new_data = hpc[1:5, ], lambda = lams) %>% - unnest(cols = c(.pred)) %>% - as.data.frame(), - tolerance = 0.0001 - ) -}) - -test_that('glmnet prediction, all lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - hpc_all <- linear_reg(mixture = .3) %>% - set_engine("glmnet", nlambda = 7) - - res_xy <- fit_xy( - hpc_all, - control = ctrl, - x = hpc[, num_pred], - y = hpc$input_fields - ) - - all_pred <- predict(res_xy$fit, newx = as.matrix(hpc[1:5, num_pred])) - all_pred <- stack(as.data.frame(all_pred)) - all_pred$penalty <- rep(res_xy$fit$lambda, each = 5) - all_pred$rows <- rep(1:5, length(res_xy$fit$lambda)) - all_pred <- all_pred[order(all_pred$rows, all_pred$penalty), ] - all_pred <- all_pred[, c("penalty", "values")] - names(all_pred) <- c("penalty", ".pred") - all_pred <- tibble::as_tibble(all_pred) - - expect_equal(all_pred, multi_predict(res_xy, new_data = hpc[1:5,num_pred ]) %>% unnest(cols = c(.pred))) - - res_form <- fit( - hpc_all, - input_fields ~ log(compounds) + class, - data = hpc, - control = ctrl - ) - - form_mat <- model.matrix(input_fields ~ log(compounds) + class, data = hpc) - form_mat <- form_mat[1:5, -1] - - form_pred <- predict(res_form$fit, newx = form_mat) - form_pred <- stack(as.data.frame(form_pred)) - form_pred$penalty <- rep(res_form$fit$lambda, each = 5) - form_pred$rows <- rep(1:5, length(res_form$fit$lambda)) - form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] - form_pred <- form_pred[, c("penalty", "values")] - names(form_pred) <- c("penalty", ".pred") - form_pred <- tibble::as_tibble(form_pred) - - expect_equal(form_pred, multi_predict(res_form, hpc[1:5, c("compounds", "class")]) %>% unnest(cols = c(.pred))) -}) - - -test_that('submodel prediction', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - reg_fit <- - linear_reg() %>% - set_engine("glmnet") %>% - fit(mpg ~ ., data = mtcars[-(1:4), ]) - - pred_glmn <- predict(reg_fit$fit, as.matrix(mtcars[1:4, -1]), s = .1) - - mp_res <- multi_predict(reg_fit, new_data = mtcars[1:4, -1], penalty = .1) - mp_res <- do.call("rbind", mp_res$.pred) - expect_equal(mp_res[[".pred"]], unname(pred_glmn[,1])) - - expect_error( - multi_predict(reg_fit, newdata = mtcars[1:4, -1], penalty = .1), - "Did you mean" - ) - - reg_fit <- - linear_reg() %>% - set_engine("glmnet") %>% - fit(mpg ~ ., data = mtcars[-(1:4), ]) - - - pred_glmn_all <- - predict(reg_fit$fit, as.matrix(mtcars[1:2, -1])) %>% - as.data.frame() %>% - stack() %>% - dplyr::arrange(ind) - - - mp_res_all <- - multi_predict(reg_fit, new_data = mtcars[1:2, -1]) %>% - tidyr::unnest(cols = c(.pred)) - - expect_equal(sort(mp_res_all$.pred), sort(pred_glmn_all$values)) - -}) - - -test_that('error traps', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - expect_error( - linear_reg() %>% - set_engine("glmnet") %>% - fit(mpg ~ ., data = mtcars[-(1:4), ]) %>% - predict(mtcars[-(1:4), ], penalty = 0:1) - ) - expect_error( - linear_reg() %>% - set_engine("glmnet") %>% - fit(mpg ~ ., data = mtcars[-(1:4), ]) %>% - predict(mtcars[-(1:4), ]) - ) - -}) - diff --git a/tests/testthat/test_linear_reg_stan.R b/tests/testthat/test_linear_reg_stan.R deleted file mode 100644 index 77301be30..000000000 --- a/tests/testthat/test_linear_reg_stan.R +++ /dev/null @@ -1,138 +0,0 @@ -library(testthat) -library(parsnip) -library(rlang) - -source(test_path("helper-objects.R")) -hpc <- hpc_data[, c(2:5, 8)] - -# ------------------------------------------------------------------------------ - -context("linear regression execution with stan") - -num_pred <- c("compounds", "iterations", "num_pending") -hpc_bad_form <- as.formula(class ~ term) -hpc_basic <- linear_reg() %>% - set_engine("stan", seed = 10, chains = 1) - -ctrl <- control_parsnip(verbosity = 0L, catch = FALSE) -caught_ctrl <- control_parsnip(verbosity = 0L, catch = TRUE) -quiet_ctrl <- control_parsnip(verbosity = 0L, catch = TRUE) - -# ------------------------------------------------------------------------------ - -test_that('stan_glm execution', { - skip_if_not_installed("rstanarm") - skip_on_cran() - - expect_error( - res <- fit( - hpc_basic, - compounds ~ log(input_fields) + class, - data = hpc, - control = ctrl - ), - regexp = NA - ) - expect_error( - res <- fit_xy( - hpc_basic, - x = hpc[, num_pred], - y = hpc$input_fields, - control = ctrl - ), - regexp = NA - ) - - expect_false(has_multi_predict(res)) - expect_equal(multi_predict_args(res), NA_character_) - - expect_error( - res <- fit( - hpc_basic, - class ~ term, - data = hpc, - control = ctrl - ) - ) - -}) - - -test_that('stan prediction', { - skip_if_not_installed("rstanarm") - skip_on_cran() - - uni_pred <- c(1691.46306020449, 1494.27323520418, 1522.36011539284, 1493.39683598195, - 1494.93053462084) - inl_pred <- c(429.164145548939, 256.32488428038, 254.949927688403, 255.007333947447, - 255.336665165556) - - res_xy <- fit_xy( - linear_reg() %>% - set_engine("stan", seed = 10, chains = 1), - x = hpc[, num_pred], - y = hpc$input_fields, - control = quiet_ctrl - ) - - expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred, tolerance = 0.001) - - res_form <- fit( - hpc_basic, - compounds ~ log(input_fields) + class, - data = hpc, - control = quiet_ctrl - ) - expect_equal(inl_pred, predict(res_form, hpc[1:5, ])$.pred, tolerance = 0.001) -}) - - -test_that('stan intervals', { - skip_if_not_installed("rstanarm") - skip_on_cran() - - res_xy <- fit_xy( - linear_reg() %>% - set_engine("stan", seed = 1333, chains = 10, iter = 1000), - x = hpc[, num_pred], - y = hpc$input_fields, - control = quiet_ctrl - ) - - set.seed(1231) - confidence_parsnip <- - predict(res_xy, - new_data = hpc[1:5,], - type = "conf_int", - level = 0.93) - - set.seed(1231) - prediction_parsnip <- - predict(res_xy, - new_data = hpc[1:5,], - type = "pred_int", - level = 0.93) - - ci_lower <- c(1577.25718753727, 1382.58210286254, 1399.96490471468, 1381.56774986889, - 1383.25519963864) - ci_upper <- c(1809.28331613624, 1609.11912475981, 1646.44852457781, 1608.3327281785, - 1609.4796390366) - - pi_lower <- c(-4960.33135373564, -5123.82860109357, -5063.60881734505, -5341.21637448872, - -5184.63627366821) - pi_upper <- c(8345.56815544477, 7954.98392035813, 7890.10036321417, 7970.64062851536, - 8247.10241974192) - - expect_equivalent(confidence_parsnip$.pred_lower, ci_lower, tolerance = 1e-2) - expect_equivalent(confidence_parsnip$.pred_upper, ci_upper, tolerance = 1e-2) - - expect_equivalent(prediction_parsnip$.pred_lower, - pi_lower, - tolerance = 1e-2) - expect_equivalent(prediction_parsnip$.pred_upper, - pi_upper, - tolerance = 1e-2) -}) - - - diff --git a/tests/testthat/test_logistic_reg_glmnet.R b/tests/testthat/test_logistic_reg_glmnet.R deleted file mode 100644 index cd5d0b358..000000000 --- a/tests/testthat/test_logistic_reg_glmnet.R +++ /dev/null @@ -1,425 +0,0 @@ -library(testthat) -library(parsnip) -library(rlang) -library(tibble) -library(tidyr) - -# ------------------------------------------------------------------------------ - -context("logistic regression execution with glmnet") -source(test_path("helper-objects.R")) -hpc <- hpc_data[1:150, c(2:5, 8)] - -lending_club <- head(lending_club, 200) -lc_form <- as.formula(Class ~ log(funded_amnt) + int_rate) -num_pred <- c("funded_amnt", "annual_inc", "num_il_tl") -lc_bad_form <- as.formula(funded_amnt ~ term) -lc_basic <- logistic_reg() %>% set_engine("glmnet") - -# ------------------------------------------------------------------------------ - -test_that('glmnet execution', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - expect_error( - res <- fit_xy( - lc_basic, - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ), - regexp = NA - ) - - expect_true(has_multi_predict(res)) - expect_equal(multi_predict_args(res), "penalty") - - expect_error( - glmnet_xy_catch <- fit_xy( - lc_basic, - x = lending_club[, num_pred], - y = lending_club$total_bal_il, - control = caught_ctrl - ) - ) -}) - -test_that('glmnet prediction, one lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - xy_fit <- fit_xy( - logistic_reg(penalty = 0.1) %>% set_engine("glmnet"), - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ) - - uni_pred <- - predict(xy_fit$fit, - newx = as.matrix(lending_club[1:7, num_pred]), - s = 0.1, type = "response")[,1] - uni_pred <- ifelse(uni_pred >= 0.5, "good", "bad") - uni_pred <- factor(uni_pred, levels = levels(lending_club$Class)) - uni_pred <- unname(uni_pred) - - expect_equal(uni_pred, predict(xy_fit, lending_club[1:7, num_pred])$.pred_class) - - res_form <- fit( - logistic_reg(penalty = 0.1) %>% set_engine("glmnet"), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - form_mat <- model.matrix(Class ~ log(funded_amnt) + int_rate, data = lending_club) - form_mat <- form_mat[1:7, -1] - - form_pred <- - predict(res_form$fit, - newx = form_mat, - s = 0.1, type = "response")[,1] - form_pred <- ifelse(form_pred >= 0.5, "good", "bad") - form_pred <- factor(form_pred, levels = levels(lending_club$Class)) - form_pred <- unname(form_pred) - - expect_equal( - form_pred, - predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")], type = "class")$.pred_class - ) - -}) - - -test_that('glmnet prediction, mulitiple lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - lams <- c(0.01, 0.1) - - xy_fit <- fit_xy( - logistic_reg(penalty = lams) %>% set_engine("glmnet"), - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ) - - mult_pred <- - predict(xy_fit$fit, - newx = as.matrix(lending_club[1:7, num_pred]), - s = lams, type = "response") - mult_pred <- stack(as.data.frame(mult_pred)) - mult_pred$values <- ifelse(mult_pred$values >= 0.5, "good", "bad") - mult_pred$values <- factor(mult_pred$values, levels = levels(lending_club$Class)) - mult_pred$penalty <- rep(lams, each = 7) - mult_pred$rows <- rep(1:7, 2) - mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] - mult_pred <- mult_pred[, c("penalty", "values")] - names(mult_pred) <- c("penalty", ".pred_class") - mult_pred <- tibble::as_tibble(mult_pred) - - expect_equal( - mult_pred, - multi_predict(xy_fit, lending_club[1:7, num_pred], type = "class") %>% unnest(cols = c(.pred)) - ) - - res_form <- fit( - logistic_reg(penalty = lams) %>% set_engine("glmnet"), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - form_mat <- model.matrix(Class ~ log(funded_amnt) + int_rate, data = lending_club) - form_mat <- form_mat[1:7, -1] - - form_pred <- - predict(res_form$fit, - newx = form_mat, - s = lams) - form_pred <- stack(as.data.frame(form_pred)) - form_pred$values <- ifelse(form_pred$values >= 0.5, "good", "bad") - form_pred$values <- factor(form_pred$values, levels = levels(lending_club$Class)) - form_pred$penalty <- rep(lams, each = 7) - form_pred$rows <- rep(1:7, 2) - form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] - form_pred <- form_pred[, c("penalty", "values")] - names(form_pred) <- c("penalty", ".pred_class") - form_pred <- tibble::as_tibble(form_pred) - - expect_equal( - form_pred, - multi_predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")]) %>% unnest(cols = c(.pred)) - ) - -}) - -test_that('glmnet prediction, no lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - xy_fit <- fit_xy( - logistic_reg() %>% set_engine("glmnet", nlambda = 11), - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ) - - mult_pred <- - predict(xy_fit$fit, - newx = as.matrix(lending_club[1:7, num_pred]), - s = xy_fit$fit$lambda, type = "response") - mult_pred <- stack(as.data.frame(mult_pred)) - mult_pred$values <- ifelse(mult_pred$values >= 0.5, "good", "bad") - mult_pred$values <- factor(mult_pred$values, levels = levels(lending_club$Class)) - mult_pred$penalty <- rep(xy_fit$fit$lambda, each = 7) - mult_pred$rows <- rep(1:7, 2) - mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] - mult_pred <- mult_pred[, c("penalty", "values")] - names(mult_pred) <- c("penalty", ".pred_class") - mult_pred <- tibble::as_tibble(mult_pred) - - expect_equal(mult_pred, multi_predict(xy_fit, lending_club[1:7, num_pred]) %>% unnest(cols = c(.pred))) - - res_form <- fit( - logistic_reg() %>% set_engine("glmnet", nlambda = 11), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - form_mat <- model.matrix(Class ~ log(funded_amnt) + int_rate, data = lending_club) - form_mat <- form_mat[1:7, -1] - - form_pred <- - predict(res_form$fit, - newx = form_mat, - type = "response") - form_pred <- stack(as.data.frame(form_pred)) - form_pred$values <- ifelse(form_pred$values >= 0.5, "good", "bad") - form_pred$values <- factor(form_pred$values, levels = levels(lending_club$Class)) - form_pred$penalty <- rep(res_form$fit$lambda, each = 7) - form_pred$rows <- rep(1:7, 2) - form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] - form_pred <- form_pred[, c("penalty", "values")] - names(form_pred) <- c("penalty", ".pred_class") - form_pred <- tibble::as_tibble(form_pred) - - expect_equal( - form_pred, - multi_predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")]) %>% unnest(cols = c(.pred)) - ) - -}) - - -test_that('glmnet probabilities, one lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - xy_fit <- fit_xy( - logistic_reg(penalty = 0.1) %>% set_engine("glmnet"), - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ) - - uni_pred <- - predict(xy_fit$fit, - newx = as.matrix(lending_club[1:7, num_pred]), - s = 0.1, type = "response")[,1] - uni_pred <- tibble(.pred_bad = 1 - uni_pred, .pred_good = uni_pred) - - expect_equal( - uni_pred, - predict(xy_fit, lending_club[1:7, num_pred], type = "prob") - ) - - res_form <- fit( - logistic_reg(penalty = 0.1) %>% set_engine("glmnet"), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - form_mat <- model.matrix(Class ~ log(funded_amnt) + int_rate, data = lending_club) - form_mat <- form_mat[1:7, -1] - - form_pred <- - unname(predict(res_form$fit, - newx = form_mat, - s = 0.1, type = "response")[, 1]) - form_pred <- tibble(.pred_bad = 1 - form_pred, .pred_good = form_pred) - - expect_equal( - form_pred, - predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")], type = "prob") - ) - - one_row <- predict(res_form, lending_club[1, c("funded_amnt", "int_rate")], type = "prob") - expect_equivalent(form_pred[1,], one_row) - -}) - -test_that('glmnet probabilities, mulitiple lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - lams <- c(0.01, 0.1) - - xy_fit <- fit_xy( - logistic_reg(penalty = lams) %>% set_engine("glmnet"), - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ) - - mult_pred <- - predict(xy_fit$fit, - newx = as.matrix(lending_club[1:7, num_pred]), - s = lams, type = "response") - mult_pred <- stack(as.data.frame(mult_pred)) - mult_pred$penalty <- rep(lams, each = 7) - mult_pred$rows <- rep(1:7, 2) - mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] - mult_pred$.pred_bad <- 1 - mult_pred$values - mult_pred <- mult_pred[, c("penalty", ".pred_bad", "values")] - names(mult_pred) <- c("penalty", ".pred_bad", ".pred_good") - mult_pred <- tibble::as_tibble(mult_pred) - - expect_equal( - mult_pred, - multi_predict(xy_fit, lending_club[1:7, num_pred], lambda = lams, type = "prob") %>% - unnest(cols = c(.pred)) - ) - - res_form <- fit( - logistic_reg(penalty = lams) %>% set_engine("glmnet"), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - form_mat <- model.matrix(Class ~ log(funded_amnt) + int_rate, data = lending_club) - form_mat <- form_mat[1:7, -1] - - form_pred <- - predict(res_form$fit, - newx = form_mat, - s = lams, type = "response") - form_pred <- stack(as.data.frame(form_pred)) - form_pred$penalty <- rep(lams, each = 7) - form_pred$rows <- rep(1:7, 2) - form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] - form_pred$.pred_bad <- 1 - form_pred$values - form_pred <- form_pred[, c("penalty", ".pred_bad", "values")] - names(form_pred) <- c("penalty", ".pred_bad", ".pred_good") - form_pred <- tibble::as_tibble(form_pred) - - expect_equal( - form_pred, - multi_predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")], type = "prob") %>% - unnest(cols = c(.pred)) - ) - -}) - - -test_that('glmnet probabilities, no lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - xy_fit <- fit_xy( - logistic_reg() %>% set_engine("glmnet"), - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ) - - mult_pred <- - predict(xy_fit$fit, - newx = as.matrix(lending_club[1:7, num_pred]), - type = "response") - mult_pred <- stack(as.data.frame(mult_pred)) - mult_pred$penalty <- rep(xy_fit$fit$lambda, each = 7) - mult_pred$rows <- rep(1:7, length(xy_fit$fit$lambda)) - mult_pred <- mult_pred[order(mult_pred$rows, mult_pred$penalty), ] - mult_pred$.pred_bad <- 1 - mult_pred$values - mult_pred <- mult_pred[, c("penalty", ".pred_bad", "values")] - names(mult_pred) <- c("penalty", ".pred_bad", ".pred_good") - mult_pred <- tibble::as_tibble(mult_pred) - - expect_equal( - mult_pred, - multi_predict(xy_fit, lending_club[1:7, num_pred], type = "prob") %>% - unnest(cols = c(.pred)) - ) - - res_form <- fit( - logistic_reg() %>% set_engine("glmnet"), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - form_mat <- model.matrix(Class ~ log(funded_amnt) + int_rate, data = lending_club) - form_mat <- form_mat[1:7, -1] - - form_pred <- - predict(res_form$fit, - newx = form_mat, - type = "response") - form_pred <- stack(as.data.frame(form_pred)) - form_pred$penalty <- rep(res_form$fit$lambda, each = 7) - form_pred$rows <- rep(1:7, length(res_form$fit$lambda)) - form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ] - form_pred$.pred_bad <- 1 - form_pred$values - form_pred <- form_pred[, c("penalty", ".pred_bad", "values")] - names(form_pred) <- c("penalty", ".pred_bad", ".pred_good") - form_pred <- tibble::as_tibble(form_pred) - - expect_equal( - form_pred, - multi_predict(res_form, lending_club[1:7, c("funded_amnt", "int_rate")], type = "prob") %>% unnest(cols = c(.pred)) - ) - -}) - - -test_that('submodel prediction', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - vars <- c("female", "tenure", "total_charges", "phone_service", "monthly_charges") - class_fit <- - logistic_reg() %>% - set_engine("glmnet") %>% - fit(churn ~ ., data = wa_churn[-(1:4), c("churn", vars)]) - - pred_glmn <- predict(class_fit$fit, as.matrix(wa_churn[1:4, vars]), s = .1, type = "response") - - mp_res <- multi_predict(class_fit, new_data = wa_churn[1:4, vars], penalty = .1, type = "prob") - mp_res <- do.call("rbind", mp_res$.pred) - expect_equal(mp_res[[".pred_No"]], unname(pred_glmn[,1])) - - expect_error( - multi_predict(class_fit, newdata = wa_churn[1:4, vars], penalty = .1, type = "prob"), - "Did you mean" - ) - - # Can predict using default penalty. See #108 - expect_error( - multi_predict(class_fit, new_data = wa_churn[1:4, vars]), - NA - ) - -}) diff --git a/tests/testthat/test_logistic_reg_stan.R b/tests/testthat/test_logistic_reg_stan.R deleted file mode 100644 index 5f599300c..000000000 --- a/tests/testthat/test_logistic_reg_stan.R +++ /dev/null @@ -1,198 +0,0 @@ -library(testthat) -library(parsnip) -library(rlang) -library(tibble) - -# ------------------------------------------------------------------------------ - -context("execution tests for stan logistic regression") -source(test_path("helper-objects.R")) -hpc <- hpc_data[1:150, c(2:5, 8)] - - -lending_club <- head(lending_club, 200) -lc_form <- as.formula(Class ~ log(funded_amnt) + int_rate) -num_pred <- c("funded_amnt", "annual_inc", "num_il_tl") -lc_basic <- - logistic_reg() %>% - set_engine("stan", seed = 1333, chains = 1) - -ctrl <- control_parsnip(verbosity = 0, catch = FALSE) -caught_ctrl <- control_parsnip(verbosity = 0, catch = TRUE) -quiet_ctrl <- control_parsnip(verbosity = 0, catch = TRUE) - -# ------------------------------------------------------------------------------ - -test_that('stan_glm execution', { - skip_if_not_installed("rstanarm") - skip_on_cran() - - expect_error( - res <- fit( - lc_basic, - funded_amnt ~ term, - data = lending_club, - control = ctrl - ) - ) - - expect_error( - fit_xy( - lc_basic, - control = caught_ctrl, - x = lending_club[, num_pred], - y = lending_club$total_bal_il - ) - ) - -}) - - -test_that('stan_glm prediction', { - skip_if_not_installed("rstanarm") - skip_on_cran() - - xy_fit <- fit_xy( - logistic_reg() %>% - set_engine("stan", seed = 1333, chains = 1), - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ) - - xy_pred <- structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("bad", "good"), class = "factor") - - expect_equal(xy_pred, parsnip:::predict_class.model_fit(xy_fit, lending_club[1:7, num_pred])) - - res_form <- fit( - logistic_reg() %>% - set_engine("stan", seed = 1333, chains = 1), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - form_pred <- structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L), - .Label = c("bad", "good"), - class = "factor") - - expect_equal(form_pred, parsnip:::predict_class.model_fit(res_form, lending_club[1:7, c("funded_amnt", "int_rate")])) - -}) - - - -test_that('stan_glm probability', { - skip_if_not_installed("rstanarm") - skip_on_cran() - - xy_fit <- fit_xy( - logistic_reg() %>% - set_engine("stan", seed = 1333, chains = 1), - control = ctrl, - x = lending_club[, num_pred], - y = lending_club$Class - ) - - xy_pred <- - tibble::tribble( - ~bad, ~good, - 0.0173511241321764, 0.982648875867824, - 0.0550090130462705, 0.94499098695373, - 0.0292445716644468, 0.970755428335553, - 0.0516116810109397, 0.94838831898906, - 0.0142530690940691, 0.985746930905931, - 0.0184806465081366, 0.981519353491863, - 0.0253642111906806, 0.974635788809319 - ) - - expect_equivalent( - xy_pred %>% as.data.frame(), - parsnip:::predict_classprob.model_fit(xy_fit, lending_club[1:7, num_pred]) %>% as.data.frame(), - tolerance = 0.1 - ) - - res_form <- fit( - logistic_reg() %>% - set_engine("stan", seed = 1333, chains = 1), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - form_pred <- - tibble::tribble( - ~bad, ~good, - 0.0451516541621074, 0.954848345837893, - 0.0663232780491584, 0.933676721950842, - 0.0425128897715562, 0.957487110228444, - 0.0442197030195933, 0.955780296980407, - 0.00135166763321781, 0.998648332366782, - 0.013776487556396, 0.986223512443604, - 0.00359938202445076, 0.996400617975549 - ) - expect_equivalent( - form_pred %>% as.data.frame(), - parsnip:::predict_classprob.model_fit(res_form, lending_club[1:7, c("funded_amnt", "int_rate")]) %>% - as.data.frame(), - tolerance = 0.1 - ) -}) - - -test_that('stan intervals', { - skip_if_not_installed("rstanarm") - skip_on_cran() - - res_form <- fit( - logistic_reg() %>% - set_engine("stan", seed = 1333, chains = 1), - Class ~ log(funded_amnt) + int_rate, - data = lending_club, - control = ctrl - ) - - set.seed(555) - confidence_parsnip <- - predict(res_form, - new_data = lending_club[1:5,], - type = "conf_int", - level = 0.93, - std_error = TRUE) - - set.seed(555) - prediction_parsnip <- - predict(res_form, - new_data = lending_club[1:5,], - type = "pred_int", - level = 0.93, - std_error = TRUE) - - stan_lower <- - c(`1` = 0.913925483690233, `2` = 0.841801274737206, `3` = 0.91056642931229, - `4` = 0.913619668586545, `5` = 0.987780279394871) - stan_upper <- - c(`1` = 0.978674663115785, `2` = 0.975178762720162, `3` = 0.984417491942267, - `4` = 0.979606072215269, `5` = 0.9999049778978) - stan_std <- - c(`1` = 0.0181025303127182, `2` = 0.0388665155739319, `3` = 0.0205886091162274, - `4` = 0.0181715224502082, `5` = 0.00405145389896896) - - expect_equivalent(confidence_parsnip$.pred_lower_good, stan_lower, tolerance = 0.01) - expect_equivalent(confidence_parsnip$.pred_upper_good, stan_upper, tolerance = 0.01) - expect_equivalent(confidence_parsnip$.pred_lower_bad, 1 - stan_upper, tolerance = 0.01) - expect_equivalent(confidence_parsnip$.pred_upper_bad, 1 - stan_lower, tolerance = 0.01) - expect_equivalent(confidence_parsnip$.std_error, stan_std, tolerance = 0.001) - - stan_pred_lower <- c(`1` = 0, `2` = 0, `3` = 0, `4` = 0, `5` = 1) - stan_pred_upper <- c(`1` = 1, `2` = 1, `3` = 1, `4` = 1, `5` = 1) - stan_pred_std <- - c(`1` = 0.211744742168102, `2` = 0.265130711714607, `3` = 0.209589904165081, - `4` = 0.198389410902796, `5` = 0.0446989708829856) - expect_equivalent(prediction_parsnip$.pred_lower_good, stan_pred_lower) - expect_equivalent(prediction_parsnip$.pred_upper_good, stan_pred_upper) - expect_equivalent(prediction_parsnip$.std_error, stan_pred_std, tolerance = 0.1) -}) - - - diff --git a/tests/testthat/test_multinom_reg_glmnet.R b/tests/testthat/test_multinom_reg_glmnet.R deleted file mode 100644 index 3437e39c5..000000000 --- a/tests/testthat/test_multinom_reg_glmnet.R +++ /dev/null @@ -1,174 +0,0 @@ -library(testthat) -library(parsnip) -library(rlang) -library(tibble) -library(dplyr) - -# ------------------------------------------------------------------------------ - -context("multinom regression execution with glmnet") -source(test_path("helper-objects.R")) -hpc <- hpc_data[, c(2:5, 8)] - -rows <- c(1, 51, 101) - -# ------------------------------------------------------------------------------ - -test_that('glmnet execution', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - expect_error( - res <- fit_xy( - multinom_reg() %>% set_engine("glmnet"), - control = ctrl, - x = hpc[, 1:4], - y = hpc$class - ), - regexp = NA - ) - - expect_true(has_multi_predict(res)) - expect_equal(multi_predict_args(res), "penalty") - - expect_error( - glmnet_xy_catch <- fit_xy( - multinom_reg() %>% set_engine("glmnet"), - x = hpc[, 2:5], - y = hpc$compounds, - control = caught_ctrl - ) - ) - -}) - -test_that('glmnet prediction, one lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - xy_fit <- fit_xy( - multinom_reg(penalty = 0.1) %>% set_engine("glmnet"), - control = ctrl, - x = hpc[, 1:4], - y = hpc$class - ) - - uni_pred <- - predict(xy_fit$fit, - newx = as.matrix(hpc[rows, 1:4]), - s = xy_fit$spec$args$penalty, type = "class") - uni_pred <- factor(uni_pred[,1], levels = levels(hpc$class)) - uni_pred <- unname(uni_pred) - - expect_equal(uni_pred, predict(xy_fit, hpc[rows, 1:4], type = "class")$.pred_class) - - res_form <- fit( - multinom_reg(penalty = 0.1) %>% set_engine("glmnet"), - class ~ log(compounds) + input_fields, - data = hpc, - control = ctrl - ) - - form_mat <- model.matrix(class ~ log(compounds) + input_fields, data = hpc) - form_mat <- form_mat[rows, -1] - - form_pred <- - predict(res_form$fit, - newx = form_mat, - s = res_form$spec$args$penalty, - type = "class") - form_pred <- factor(form_pred[,1], levels = levels(hpc$class)) - expect_equal(form_pred, parsnip:::predict_class.model_fit(res_form, hpc[rows, c("compounds", "input_fields")])) - expect_equal(form_pred, predict(res_form, hpc[rows, c("compounds", "input_fields")], type = "class")$.pred_class) - -}) - - -test_that('glmnet probabilities, mulitiple lambda', { - - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - lams <- c(0.01, 0.1) - - xy_fit <- fit_xy( - multinom_reg(penalty = lams) %>% set_engine("glmnet"), - control = ctrl, - x = hpc[, 1:4], - y = hpc$class - ) - - expect_error(predict(xy_fit, hpc[rows, 1:4], type = "class")) - expect_error(predict(xy_fit, hpc[rows, 1:4], type = "prob")) - - mult_pred <- - predict(xy_fit$fit, - newx = as.matrix(hpc[rows, 1:4]), - s = lams, type = "response") - mult_pred <- apply(mult_pred, 3, as_tibble) - mult_pred <- dplyr:::bind_rows(mult_pred) - mult_probs <- mult_pred - names(mult_pred) <- paste0(".pred_", names(mult_pred)) - mult_pred$penalty <- rep(lams, each = 3) - mult_pred$row <- rep(1:3, 2) - mult_pred <- mult_pred[order(mult_pred$row, mult_pred$penalty),] - mult_pred <- split(mult_pred[, -5], mult_pred$row) - names(mult_pred) <- NULL - mult_pred <- tibble(.pred = mult_pred) - - multi_pred_res <- multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams, type = "prob") - - for (i in seq_along(multi_pred_res$.pred)) { - expect_equal( - mult_pred %>% dplyr::slice(i) %>% pull(.pred) %>% purrr::pluck(1) %>% dplyr::select(starts_with(".pred")), - multi_pred_res %>% dplyr::slice(i) %>% pull(.pred) %>% purrr::pluck(1) %>% dplyr::select(starts_with(".pred")) - ) - } - - mult_class <- factor(names(mult_probs)[apply(mult_probs, 1, which.max)], - levels = xy_fit$lvl) - mult_class <- tibble( - .pred_class = mult_class, - penalty = rep(lams, each = 3), - row = rep(1:3, 2) - ) - mult_class <- mult_class[order(mult_class$row, mult_class$penalty),] - mult_class <- split(mult_class[, -3], mult_class$row) - names(mult_class) <- NULL - mult_class <- tibble(.pred = mult_class) - - mult_class_res <- multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams) - - for (i in seq_along(mult_class_res$.pred)) { - expect_equal( - mult_class %>% dplyr::slice(i) %>% pull(.pred) %>% purrr::pluck(1) %>% dplyr::select(starts_with(".pred")), - mult_class_res %>% dplyr::slice(i) %>% pull(.pred) %>% purrr::pluck(1) %>% dplyr::select(starts_with(".pred")) - ) - } - - expect_error( - multi_predict(xy_fit, newdata = hpc[rows, 1:4], penalty = lams), - "Did you mean" - ) - - # Can predict probs with default penalty. See #108 - expect_error( - multi_predict(xy_fit, new_data = hpc[rows, 1:4], type = "prob"), - NA - ) - -}) - -test_that("class predictions are factors with all levels", { - skip_if_not_installed("glmnet") - skip_if(run_glmnet) - - basic <- multinom_reg() %>% set_engine("glmnet") %>% fit(class ~ ., data = hpc) - nd <- hpc[hpc$class == "VF", ] - yhat <- predict(basic, new_data = nd, penalty = .1) - yhat_multi <- multi_predict(basic, new_data = nd, penalty = .1)$.pred - expect_is(yhat_multi[[1]]$.pred_class, "factor") - expect_equal(levels(yhat_multi[[1]]$.pred_class), levels(hpc$class)) -}) diff --git a/tests/testthat/test_nearest_neighbor_kknn.R b/tests/testthat/test_nearest_neighbor_kknn.R index 41aab23c5..e0f317e31 100644 --- a/tests/testthat/test_nearest_neighbor_kknn.R +++ b/tests/testthat/test_nearest_neighbor_kknn.R @@ -134,7 +134,7 @@ test_that('kknn multi-predict', { ) pred_multi <- multi_predict(res_xy, hpc[hpc_te, num_pred], neighbors = k_vals) - expect_equal(pred_multi %>% unnest(cols = c(.pred)) %>% nrow(), + expect_equal(pred_multi %>% tidyr::unnest(cols = c(.pred)) %>% nrow(), length(hpc_te) * length(k_vals)) expect_equal(pred_multi %>% nrow(), length(hpc_te)) @@ -142,7 +142,7 @@ test_that('kknn multi-predict', { pred_uni_obs <- pred_multi %>% mutate(.rows = row_number()) %>% - unnest(cols = c(.pred)) %>% + tidyr::unnest(cols = c(.pred)) %>% dplyr::filter(neighbors == 3) %>% arrange(.rows) %>% dplyr::select(.pred_class) @@ -151,7 +151,7 @@ test_that('kknn multi-predict', { prob_multi <- multi_predict(res_xy, hpc[hpc_te, num_pred], neighbors = k_vals, type = "prob") - expect_equal(prob_multi %>% unnest(cols = c(.pred)) %>% nrow(), + expect_equal(prob_multi %>% tidyr::unnest(cols = c(.pred)) %>% nrow(), length(hpc_te) * length(k_vals)) expect_equal(prob_multi %>% nrow(), length(hpc_te)) @@ -159,7 +159,7 @@ test_that('kknn multi-predict', { prob_uni_obs <- prob_multi %>% mutate(.rows = row_number()) %>% - unnest(cols = c(.pred)) %>% + tidyr::unnest(cols = c(.pred)) %>% dplyr::filter(neighbors == 3) %>% arrange(.rows) %>% dplyr::select(!!names(prob_uni)) @@ -179,7 +179,7 @@ test_that('kknn multi-predict', { ) pred_multi <- multi_predict(res_xy, mtcars[cars_te, -1], neighbors = k_vals) - expect_equal(pred_multi %>% unnest(cols = c(.pred)) %>% nrow(), + expect_equal(pred_multi %>% tidyr::unnest(cols = c(.pred)) %>% nrow(), length(cars_te) * length(k_vals)) expect_equal(pred_multi %>% nrow(), length(cars_te)) @@ -187,7 +187,7 @@ test_that('kknn multi-predict', { pred_uni_obs <- pred_multi %>% mutate(.rows = row_number()) %>% - unnest(cols = c(.pred)) %>% + tidyr::unnest(cols = c(.pred)) %>% dplyr::filter(neighbors == 3) %>% arrange(.rows) %>% dplyr::select(.pred) diff --git a/tests/testthat/test_tidy_glmnet.R b/tests/testthat/test_tidy_glmnet.R deleted file mode 100644 index 7e00405ef..000000000 --- a/tests/testthat/test_tidy_glmnet.R +++ /dev/null @@ -1,58 +0,0 @@ -context("tidy glmnet models") - -test_that('linear regression', { - skip_if_not_installed("glmnet") - - ps_mod <- - linear_reg(penalty = .1) %>% - set_engine("glmnet") %>% - fit(mpg ~ ., data = mtcars) - - ps_coefs <- tidy(ps_mod) - gn_coefs <- as.matrix(coef(ps_mod$fit, s = .1)) - for(i in ps_coefs$term) { - expect_equal(ps_coefs$estimate[ps_coefs$term == i], gn_coefs[i,1]) - } -}) - -test_that('logistic regression', { - skip_if_not_installed("glmnet") - - data(two_class_dat, package = "modeldata") - - ps_mod <- - logistic_reg(penalty = .1) %>% - set_engine("glmnet") %>% - fit(Class ~ ., data = two_class_dat) - - ps_coefs <- tidy(ps_mod) - gn_coefs <- as.matrix(coef(ps_mod$fit, s = .1)) - for(i in ps_coefs$term) { - expect_equal(ps_coefs$estimate[ps_coefs$term == i], gn_coefs[i,1]) - } -}) - -test_that('multinomial regression', { - skip_if_not_installed("glmnet") - - data(penguins, package = "modeldata") - - ps_mod <- - multinom_reg(penalty = .01) %>% - set_engine("glmnet") %>% - fit(species ~ ., data = penguins) - - ps_coefs <- tidy(ps_mod) - gn_coefs <- coef(ps_mod$fit, s = .01) - gn_coefs <- purrr::map(gn_coefs, as.matrix) - for(i in unique(ps_coefs$term)) { - for(j in unique(ps_coefs$class)) { - expect_equal( - ps_coefs$estimate[ps_coefs$term == i & ps_coefs$class == j], - gn_coefs[[j]][i,1] - ) - } - } -}) - -