Error that occurs when using the custom metric during parallel hyperparameter tuning. #116

AKALeon · 2024-07-24T12:26:57Z

I have defined a custom metric (partial ROC AUC) myself, the code is as follows:

# Load packages
library(tidyverse)
library(tidymodels)
library(modeldata)
library(finetune)
library(baguette)
library(doParallel)
ncores = round(parallel::detectCores()/3)

# Logic for `event_level`
event_col <- function(truth, event_level) {
  if (identical(event_level, "first")) {
    levels(truth)[1]
  } else {
    levels(truth)[2]
  }
}

pauc_impl <- function(truth, estimate, estimator = 'binary', event_level) {
  
  if(estimator == "binary") {
    
    level_case = event_col(truth = truth, event_level = event_level)
    level_control = setdiff(levels(truth), level_case)
    
    result = pROC::roc(estimate,
                       response = truth,
                       levels = c(level_control, level_case),
                       partial.auc = c(0.8,1),
                       partial.auc.focus = "sensitivity")
    
    pauc_value = as.numeric(result$auc)
  }
  
  return(pauc_value)
}

pauc_vec <- function(truth,
                     estimate,
                     estimator = NULL,
                     na_rm = TRUE,
                     case_weights = NULL,
                     event_level = "first",
                     ...) {
  # calls finalize_estimator_internal() internally
  estimator <- finalize_estimator(truth, estimator, metric_class = "pauc")
  
  check_prob_metric(truth, estimate, case_weights, estimator)
  
  if (na_rm) {
    result <- yardstick_remove_missing(truth, estimate, case_weights)
    
    truth <- result$truth
    estimate <- result$estimate
    case_weights <- result$case_weights
  } else if (yardstick_any_missing(truth, estimate, case_weights)) {
    return(NA_real_)
  }
  
  pauc_impl(truth, estimate, estimator, event_level)
}



pauc <- function(data, ...) {
  UseMethod("pauc")
}

pauc <- new_prob_metric(pauc, direction = "maximize")

pauc.data.frame <- function(data,
                            truth,
                            estimate,
                            estimator = NULL,
                            na_rm = TRUE,
                            case_weights = NULL,
                            event_level = "first",
                            options = list()) {
  
  prob_metric_summarizer(
    name = "pauc",
    fn = pauc_vec,
    data = data,
    truth = !!enquo(truth),
    !!enquo(estimate),
    estimator = estimator,
    na_rm = na_rm,
    case_weights = !!enquo(case_weights),
    event_level = event_level,
    fn_options = list(options = options)
  )
  
}

I can use defined metric function pauc on the example:

pauc(data = two_class_example,truth = truth,Class1)

The results are as follows:

> pauc(data = two_class_example,truth = truth,Class1)
Setting direction: controls < cases
# A tibble: 1 × 3
  .metric .estimator .estimate
  <chr>   <chr>          <dbl>
1 pauc    binary         0.149

Then, I used tune_race_anova to tune the bag_tree model.

set.seed(123)
data("lending_club", package = "modeldata")
split <- initial_split(lending_club, strata = Class)
train <- training(split)
test  <- testing(split)

fold = vfold_cv(data = train,v = 10,strata = Class)

rec <- recipe(Class ~ ., train) %>%
  step_normalize(all_numeric())

mod <- bag_tree(tree_depth = tune()) %>%
  set_engine("rpart") %>%
  set_mode("classification")

wf_set <- workflow_set(
  preproc = list(base = rec),
  models = list(bag = mod),
  cross = TRUE)

When not using parallel computation, using the defined pauc metric works correctly:

race_result = workflow_map(wf_set,
                           fn = 'tune_race_anova',
                           resamples = fold,
                           grid = 5,
                           metrics = metric_set(pauc))

race_result %>% 
  extract_workflow_set_result(id = 'base_bag') %>% 
  show_best(metric = 'pauc')

> race_result %>% 
+   extract_workflow_set_result(id = 'base_bag') %>% 
+   show_best(metric = 'pauc')
# A tibble: 1 × 7
  tree_depth .metric .estimator   mean     n std_err .config             
       <int> <chr>   <chr>       <dbl> <int>   <dbl> <chr>               
1          6 pauc    binary     0.0647    10 0.00621 Preprocessor1_Model2

However, when I use parallel computation, an error occurs:

cl = makePSOCKcluster(ncores)
registerDoParallel(cl)

race_result = workflow_map(wf_set,
                           fn = 'tune_race_anova',
                           resamples = fold,
                           grid = 5,
                           metrics = metric_set(pauc))

stopCluster(cl)

Warning message:
All models failed. Run `show_notes(.Last.tune.result)` for more information.

> show_notes(.Last.tune.result)
unique notes:
───────────────────────────────────────────────────────────────────────────────────
Error in `metric_set()`:
! Failed to compute `pauc()`.
Caused by error in `UseMethod()`:
! no applicable method for 'pauc' applied to an object of class "c('grouped_df', 'tbl_df', 'tbl', 'data.frame')"

When I use roc_auc as the metric for parallel hyperparameter tuning, everything works fine. Therefore, I believe the source of the error is in the parallel computation.

The text was updated successfully, but these errors were encountered:

simonpcouch · 2024-07-29T14:12:03Z

Just noting that I can reproduce this.

An admittedly cumbersome way around this right now is to drop pauc in a package and then supply it to control_race(pkgs), but we ought to think about how we can better support this.

AKALeon · 2024-07-30T04:56:10Z

@simonpcouch Sorry, I should have loaded the packages at the beginning. Now these results can be reproduced.

simonpcouch · 2024-07-31T19:33:08Z

I'm not sure I understand this most recent reply, but we'll be coming back to this as we continue to improve our support for parallelism!

AKALeon · 2024-08-01T05:30:39Z

I'm not sure I understand this most recent reply, but we'll be coming back to this as we continue to improve our support for parallelism!

Anyway, thank you!

AKALeon · 2024-08-01T05:45:38Z

When I change "cl = makePSOCKcluster(ncores)" to "cl = makeForkCluster(ncores)", everything works fine!

simonpcouch · 2024-09-05T14:42:44Z

Turns out we can replicate this issue with tune alone—I just filed an issue there and will address soon! Thank you!

simonpcouch mentioned this issue Sep 5, 2024

workers error with custom metrics when using socket clusters tidymodels/tune#937

Open

simonpcouch closed this as completed Sep 5, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Error that occurs when using the custom metric during parallel hyperparameter tuning. #116

Error that occurs when using the custom metric during parallel hyperparameter tuning. #116

AKALeon commented Jul 24, 2024 •

edited

Loading

simonpcouch commented Jul 29, 2024

AKALeon commented Jul 30, 2024

simonpcouch commented Jul 31, 2024

AKALeon commented Aug 1, 2024

AKALeon commented Aug 1, 2024

simonpcouch commented Sep 5, 2024

Error that occurs when using the custom metric during parallel hyperparameter tuning. #116

Error that occurs when using the custom metric during parallel hyperparameter tuning. #116

Comments

AKALeon commented Jul 24, 2024 • edited Loading

simonpcouch commented Jul 29, 2024

AKALeon commented Jul 30, 2024

simonpcouch commented Jul 31, 2024

AKALeon commented Aug 1, 2024

AKALeon commented Aug 1, 2024

simonpcouch commented Sep 5, 2024

AKALeon commented Jul 24, 2024 •

edited

Loading