CatBoost Fails with Keyword 'groups' #304

knoam · 2021-11-23T14:10:43Z

Here's the code:

settings = {
    "time_budget": 360,  
    "metric": 'ap',   
    "task": 'classification', 
    "log_file_name": f'{output_dir}/flaml1.log',  
    "seed": 7654321,    
    "log_training_metric": True,
    "groups": group_id,
    "estimator_list": ['catboost']  
}

automl.fit(X_train=X_train, y_train=y_train, **settings)

Here's the output:

[flaml.automl: 11-23 14:03:00] {1489} INFO - Evaluation method: holdout
[flaml.automl: 11-23 14:03:05] {1540} INFO - Minimizing error metric: 1-ap
[flaml.automl: 11-23 14:03:05] {1577} INFO - List of ML learners in AutoML Run: ['catboost']
[flaml.automl: 11-23 14:03:05] {1826} INFO - iteration 0, current learner catboost

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-26-4e43c07e607c> in <module>
----> 1 automl.fit(X_train=X_train, y_train=y_train, **settings)

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
   1601             with training_log_writer(log_file_name, append_log) as save_helper:
   1602                 self._training_log = save_helper
-> 1603                 self._search()
   1604         else:
   1605             self._training_log = None

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _search(self)
   2117 
   2118         if not self._use_ray:
-> 2119             self._search_sequential()
   2120         else:
   2121             self._search_parallel()

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
   1915                 time_budget_s=min(budget_left, self._state.train_time_limit),
   1916                 verbose=max(self.verbose - 3, 0),
-> 1917                 use_ray=False,
   1918             )
   1919             time_used = time.time() - start_run_time

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/tune/tune.py in run(training_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, prune_attr, min_resource, max_resource, reduction_factor, report_intermediate_result, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray)
    402             if verbose:
    403                 logger.info(f"trial {num_trials} config: {trial_to_run.config}")
--> 404             result = training_function(trial_to_run.config)
    405             if result is not None:
    406                 if isinstance(result, dict):

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(self, estimator, config_w_resource)
    241             self.learner_classes.get(estimator),
    242             self.log_training_metric,
--> 243             self.fit_kwargs,
    244         )
    245         result = {

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs)
    435             budget=budget,
    436             log_training_metric=log_training_metric,
--> 437             fit_kwargs=fit_kwargs,
    438         )
    439     else:

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/ml.py in get_test_loss(config, estimator, X_train, y_train, X_test, y_test, weight_test, groups_test, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs)
    258     #     fit_kwargs['X_val'] = X_test
    259     #     fit_kwargs['y_val'] = y_test
--> 260     estimator.fit(X_train, y_train, budget, **fit_kwargs)
    261     test_loss, metric_for_logging, pred_time, _ = _eval_estimator(
    262         config,

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
    939                 ),
    940                 callbacks=CatBoostEstimator._callbacks(start_time, deadline),
--> 941                 **kwargs,
    942             )
    943         else:

TypeError: fit() got an unexpected keyword argument 'groups'

The text was updated successfully, but these errors were encountered:

sonichi · 2021-11-23T18:29:35Z

"groups" for catboost is not currently supported. The reason is that CatBoostClassifier does not accept groups in its fit() function.

sonichi added the good first issue Good for newcomers label Nov 23, 2021

knoam mentioned this issue Jan 30, 2023

stratified group kfold splitter #899

Merged

4 tasks

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

CatBoost Fails with Keyword 'groups' #304

CatBoost Fails with Keyword 'groups' #304

knoam commented Nov 23, 2021

sonichi commented Nov 23, 2021

CatBoost Fails with Keyword 'groups' #304

CatBoost Fails with Keyword 'groups' #304

Comments

knoam commented Nov 23, 2021

sonichi commented Nov 23, 2021