-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
svm_poly() #281
Comments
spsanderson
moved this from Done
to In Progress
in @spsanderson's Repository Issue Overview
May 5, 2022
Function: #' Boilerplate Workflow
#'
#' @family Boiler_Plate
#' @family SVM
#' @family poly
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details This uses `parsnip::svm_poly()` and sets the `parsnip::engine`
#' to `kernlab`.
#'
#' @seealso \url{https://parsnip.tidymodels.org/reference/svm_poly.html}
#'
#' @description This is a boilerplate function to automatically create the following:
#' - recipe
#' - model specification
#' - workflow
#' - tuned model (grid ect)
#' - calibration tibble and plot
#'
#' @param .data The data being passed to the function. The time-series object.
#' @param .date_col The column that holds the datetime.
#' @param .value_col The column that has the value
#' @param .formula The formula that is passed to the recipe like `value ~ .`
#' @param .rsamp_obj The rsample splits object
#' @param .prefix Default is `ts_smooth_es`
#' @param .tune Defaults to TRUE, this creates a tuning grid and tuned model.
#' @param .grid_size If `.tune` is TRUE then the `.grid_size` is the size of the
#' tuning grid.
#' @param .num_cores How many cores do you want to use. Default is 1
#' @param .cv_assess How many observations for assess. See [timetk::time_series_cv()]
#' @param .cv_skip How many observations to skip. See [timetk::time_series_cv()]
#' @param .cv_slice_limit How many slices to return. See [timetk::time_series_cv()]
#' @param .best_metric Default is "rmse". See [modeltime::default_forecast_accuracy_metric_set()]
#' @param .bootstrap_final Not yet implemented.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#'
#' data <- AirPassengers %>%
#' ts_to_tbl() %>%
#' select(-index)
#'
#' splits <- time_series_split(
#' data
#' , date_col
#' , assess = 12
#' , skip = 3
#' , cumulative = TRUE
#' )
#'
#' ts_auto_poly <- ts_auto_svm_poly(
#' .data = data,
#' .num_cores = 5,
#' .date_col = date_col,
#' .value_col = value,
#' .rsamp_obj = splits,
#' .formula = value ~ .,
#' .grid_size = 3
#' )
#'
#' ts_smooth_poly$recipe_info
#' }
#'
#' @return
#' A list
#'
#' @export
#'
ts_auto_svm_poly <- function(.data, .date_col, .value_col, .formula, .rsamp_obj,
.prefix = "ts_svm_poly", .tune = TRUE, .grid_size = 10,
.num_cores = 1, .cv_assess = 12, .cv_skip = 3,
.cv_slice_limit = 6, .best_metric = "rmse",
.bootstrap_final = FALSE){
# Tidyeval ----
date_col_var_expr <- rlang::enquo(.date_col)
value_col_var_expr <- rlang::enquo(.value_col)
sampling_object <- .rsamp_obj
# Cross Validation
cv_assess = as.numeric(.cv_assess)
cv_skip = as.numeric(.cv_skip)
cv_slice = as.numeric(.cv_slice_limit)
# Tuning Grid
grid_size <- as.numeric(.grid_size)
num_cores <- as.numeric(.num_cores)
best_metric <- as.character(.best_metric)
# Data and splits
splits <- .rsamp_obj
data_tbl <- dplyr::as_tibble(.data)
# Checks ----
if (rlang::quo_is_missing(date_col_var_expr)){
rlang::abort(
message = "'.date_col' must be supplied.",
use_cli_format = TRUE
)
}
if (rlang::quo_is_missing(value_col_var_expr)){
rlang::abort(
message = "'.value_col' must be supplied.",
use_cli_format = TRUE
)
}
if (!inherits(x = splits, what = "rsplit")){
rlang::abort(
message = "'.rsamp_obj' must be have class rsplit, use the rsample package.",
use_cli_format = TRUE
)
}
# Recipe ----
# Get the initial recipe call
recipe_call <- get_recipe_call(match.call())
rec_syntax <- paste0(.prefix, "_recipe") %>%
assign_value(!!recipe_call)
rec_obj <- recipes::recipe(formula = .formula, data = data_tbl)
rec_obj <- rec_obj %>%
timetk::step_timeseries_signature({{date_col_var_expr}}) %>%
timetk::step_holiday_signature({{date_col_var_expr}}) %>%
recipes::step_novel(recipes::all_nominal_predictors()) %>%
recipes::step_mutate_at(tidyselect::vars_select_helpers$where(is.character)
, fn = ~ as.factor(.)) %>%
recipes::step_dummy(recipes::all_nominal(), one_hot = TRUE) %>%
recipes::step_normalize(recipes::all_numeric_predictors(), -date_col_index.num) %>%
recipes::step_nzv(recipes::all_predictors(), -date_col_index.num) %>%
recipes::step_corr(recipes::all_numeric_predictors(), threshold = 0)
# Tune/Spec ----
if (.tune){
model_spec <- parsnip::svm_poly(
cost = tune::tune(),
degree = tune::tune(),
scale_factor = tune::tune(),
margin = tune::tune()
)
} else {
model_spec <- parsnip::svm_poly()
}
model_spec <- model_spec %>%
parsnip::set_mode(mode = "regression") %>%
parsnip::set_engine("kernlab")
# Workflow ----
wflw <- workflows::workflow() %>%
workflows::add_recipe(rec_obj) %>%
workflows::add_model(model_spec)
# Tuning Grid ----
if (.tune){
tuning_grid_spec <- dials::grid_latin_hypercube(
hardhat::extract_parameter_set_dials(model_spec),
size = grid_size
)
# Make TS CV ----
tscv <- timetk::time_series_cv(
data = rsample::training(splits),
date_var = {{date_col_var_expr}},
cumulative = TRUE,
assess = cv_assess,
skip = cv_skip,
slice_limit = cv_slice
)
# Start parallel backend
modeltime::parallel_start(num_cores)
# Tune the workflow
tuned_results <- wflw %>%
tune::tune_grid(
resamples = tscv,
grid = tuning_grid_spec,
metrics = modeltime::default_forecast_accuracy_metric_set()
)
# Stop parallel backend
modeltime::parallel_stop()
# Get the best result set by a specified metric
best_result_set <- tuned_results %>%
tune::show_best(metric = best_metric, n = 1)
# Plot results
tune_results_plt <- tuned_results %>%
tune::autoplot() +
ggplot2::theme_minimal() +
ggplot2::geom_smooth(se = FALSE)
# Make final workflow
wflw_fit <- wflw %>%
tune::finalize_workflow(
tuned_results %>%
tune::show_best(metric = best_metric, n = Inf) %>%
dplyr::slice(1)
) %>%
parsnip::fit(rsample::training(splits))
} else {
wflw_fit <- wflw %>%
parsnip::fit(rsample::training(splits))
}
# Calibrate and Plot ----
cap <- healthyR.ts::calibrate_and_plot(
wflw_fit,
.splits_obj = splits,
.data = data_tbl,
.interactive = TRUE,
.print_info = FALSE
)
# Return ----
output <- list(
recipe_info = list(
recipe_call = recipe_call,
recipe_syntax = rec_syntax,
rec_obj = rec_obj
),
model_info = list(
model_spec = model_spec,
wflw = wflw,
fitted_wflw = wflw_fit,
was_tuned = ifelse(.tune, "tuned", "not_tuned")
),
model_calibration = list(
plot = cap$plot,
calibration_tbl = cap$calibration_tbl,
model_accuracy = cap$model_accuracy
)
)
if (.tune){
output$tuned_info = list(
tuning_grid = tuning_grid_spec,
tscv = tscv,
tuned_results = tuned_results,
grid_size = grid_size,
best_metric = best_metric,
best_result_set = best_result_set,
tuning_grid_plot = tune_results_plt,
plotly_grid_plot = plotly::ggplotly(tune_results_plt)
)
}
return(invisible(output))
} Example: > ts_auto_poly
$recipe_info
$recipe_info$recipe_call
recipe(.data = data, .date_col = date_col, .value_col = value,
.formula = value ~ ., .rsamp_obj = splits, .grid_size = 3,
.num_cores = 5)
$recipe_info$recipe_syntax
[1] "ts_svm_poly_recipe <-"
[2] "\n recipe(.data = data, .date_col = date_col, .value_col = value, .formula = value ~ \n ., .rsamp_obj = splits, .grid_size = 3, .num_cores = 5)"
$recipe_info$rec_obj
Recipe
Inputs:
role #variables
outcome 1
predictor 1
Operations:
Timeseries signature features from date_col
Holiday signature features from date_col
Novel factor level assignment for recipes::all_nominal_predictors()
Variable mutation for tidyselect::vars_select_helpers$where(is.character)
Dummy variables from recipes::all_nominal()
Centering and scaling for recipes::all_numeric_predictors(), -date_col_index.num
Sparse, unbalanced variable filter on recipes::all_predictors(), -date_col_index.num
Correlation filter on recipes::all_numeric_predictors()
$model_info
$model_info$model_spec
Polynomial Support Vector Machine Specification (regression)
Main Arguments:
cost = tune::tune()
degree = tune::tune()
scale_factor = tune::tune()
margin = tune::tune()
Computational engine: kernlab
$model_info$wflw
== Workflow =========================================================================================
Preprocessor: Recipe
Model: svm_poly()
-- Preprocessor -------------------------------------------------------------------------------------
8 Recipe Steps
* step_timeseries_signature()
* step_holiday_signature()
* step_novel()
* step_mutate_at()
* step_dummy()
* step_normalize()
* step_nzv()
* step_corr()
-- Model --------------------------------------------------------------------------------------------
Polynomial Support Vector Machine Specification (regression)
Main Arguments:
cost = tune::tune()
degree = tune::tune()
scale_factor = tune::tune()
margin = tune::tune()
Computational engine: kernlab
$model_info$fitted_wflw
== Workflow [trained] ===============================================================================
Preprocessor: Recipe
Model: svm_poly()
-- Preprocessor -------------------------------------------------------------------------------------
8 Recipe Steps
* step_timeseries_signature()
* step_holiday_signature()
* step_novel()
* step_mutate_at()
* step_dummy()
* step_normalize()
* step_nzv()
* step_corr()
-- Model --------------------------------------------------------------------------------------------
Support Vector Machine object of class "ksvm"
SV type: eps-svr (regression)
parameter : epsilon = 0.152480707721164 cost C = 16.5459568852013
Polynomial kernel function.
Hyperparameters : degree = 2 scale = 0.00332334037030967 offset = 1
Number of Support Vectors : 77
Objective Function Value : -374.4418
Training error : 0.161677
$model_info$was_tuned
[1] "tuned"
$model_calibration
$model_calibration$plot
$model_calibration$calibration_tbl
# Modeltime Table
# A tibble: 1 x 5
.model_id .model .model_desc .type .calibration_data
<int> <list> <chr> <chr> <list>
1 1 <workflow> KERNLAB Test <tibble [12 x 4]>
$model_calibration$model_accuracy
# A tibble: 1 x 9
.model_id .model_desc .type mae mape mase smape rmse rsq
<int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 KERNLAB Test 62.6 11.7 1.30 12.9 88.2 0.0354
$tuned_info
$tuned_info$tuning_grid
# A tibble: 3 x 4
cost degree scale_factor margin
<dbl> <int> <dbl> <dbl>
1 0.00121 3 1.95e- 5 0.0747
2 0.481 1 1.08e-10 0.0216
3 16.5 2 3.32e- 3 0.152
$tuned_info$tscv
# Time Series Cross Validation Plan
# A tibble: 6 x 2
splits id
<list> <chr>
1 <split [120/12]> Slice1
2 <split [117/12]> Slice2
3 <split [114/12]> Slice3
4 <split [111/12]> Slice4
5 <split [108/12]> Slice5
6 <split [105/12]> Slice6
$tuned_info$tuned_results
# Tuning results
# NA
# A tibble: 6 x 4
splits id .metrics .notes
<list> <chr> <list> <list>
1 <split [120/12]> Slice1 <tibble [18 x 8]> <tibble [0 x 3]>
2 <split [117/12]> Slice2 <tibble [18 x 8]> <tibble [0 x 3]>
3 <split [114/12]> Slice3 <tibble [18 x 8]> <tibble [0 x 3]>
4 <split [111/12]> Slice4 <tibble [18 x 8]> <tibble [0 x 3]>
5 <split [108/12]> Slice5 <tibble [18 x 8]> <tibble [0 x 3]>
6 <split [105/12]> Slice6 <tibble [18 x 8]> <tibble [0 x 3]>
$tuned_info$grid_size
[1] 3
$tuned_info$best_metric
[1] "rmse"
$tuned_info$best_result_set
# A tibble: 1 x 10
cost degree scale_factor margin .metric .estimator mean n std_err .config
<dbl> <int> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 16.5 2 0.00332 0.152 rmse standard 67.5 6 2.65 Preprocessor1_Model3
$tuned_info$tuning_grid_plot
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
$tuned_info$plotly_grid_plot
There were 50 or more warnings (use warnings() to see the first 50) |
Repository owner
moved this from In Progress
to Done
in @spsanderson's Repository Issue Overview
May 17, 2022
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
No description provided.
The text was updated successfully, but these errors were encountered: