Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CatBoost Fails with Keyword 'groups' #304

Open
knoam opened this issue Nov 23, 2021 · 1 comment
Open

CatBoost Fails with Keyword 'groups' #304

knoam opened this issue Nov 23, 2021 · 1 comment
Labels
good first issue Good for newcomers

Comments

@knoam
Copy link
Collaborator

knoam commented Nov 23, 2021

Here's the code:

settings = {
    "time_budget": 360,  
    "metric": 'ap',   
    "task": 'classification', 
    "log_file_name": f'{output_dir}/flaml1.log',  
    "seed": 7654321,    
    "log_training_metric": True,
    "groups": group_id,
    "estimator_list": ['catboost']  
}

automl.fit(X_train=X_train, y_train=y_train, **settings)

Here's the output:

[flaml.automl: 11-23 14:03:00] {1489} INFO - Evaluation method: holdout
[flaml.automl: 11-23 14:03:05] {1540} INFO - Minimizing error metric: 1-ap
[flaml.automl: 11-23 14:03:05] {1577} INFO - List of ML learners in AutoML Run: ['catboost']
[flaml.automl: 11-23 14:03:05] {1826} INFO - iteration 0, current learner catboost

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-26-4e43c07e607c> in <module>
----> 1 automl.fit(X_train=X_train, y_train=y_train, **settings)

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, early_stop, append_log, auto_augment, min_sample_size, use_ray, **fit_kwargs)
   1601             with training_log_writer(log_file_name, append_log) as save_helper:
   1602                 self._training_log = save_helper
-> 1603                 self._search()
   1604         else:
   1605             self._training_log = None

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _search(self)
   2117 
   2118         if not self._use_ray:
-> 2119             self._search_sequential()
   2120         else:
   2121             self._search_parallel()

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _search_sequential(self)
   1915                 time_budget_s=min(budget_left, self._state.train_time_limit),
   1916                 verbose=max(self.verbose - 3, 0),
-> 1917                 use_ray=False,
   1918             )
   1919             time_used = time.time() - start_run_time

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/tune/tune.py in run(training_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, prune_attr, min_resource, max_resource, reduction_factor, report_intermediate_result, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray)
    402             if verbose:
    403                 logger.info(f"trial {num_trials} config: {trial_to_run.config}")
--> 404             result = training_function(trial_to_run.config)
    405             if result is not None:
    406                 if isinstance(result, dict):

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/automl.py in _compute_with_config_base(self, estimator, config_w_resource)
    241             self.learner_classes.get(estimator),
    242             self.log_training_metric,
--> 243             self.fit_kwargs,
    244         )
    245         result = {

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/ml.py in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, log_training_metric, fit_kwargs)
    435             budget=budget,
    436             log_training_metric=log_training_metric,
--> 437             fit_kwargs=fit_kwargs,
    438         )
    439     else:

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/ml.py in get_test_loss(config, estimator, X_train, y_train, X_test, y_test, weight_test, groups_test, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs)
    258     #     fit_kwargs['X_val'] = X_test
    259     #     fit_kwargs['y_val'] = y_test
--> 260     estimator.fit(X_train, y_train, budget, **fit_kwargs)
    261     test_loss, metric_for_logging, pred_time, _ = _eval_estimator(
    262         config,

~/anaconda3/envs/python3/lib/python3.6/site-packages/flaml/model.py in fit(self, X_train, y_train, budget, **kwargs)
    939                 ),
    940                 callbacks=CatBoostEstimator._callbacks(start_time, deadline),
--> 941                 **kwargs,
    942             )
    943         else:

TypeError: fit() got an unexpected keyword argument 'groups'
@sonichi
Copy link
Contributor

sonichi commented Nov 23, 2021

"groups" for catboost is not currently supported. The reason is that CatBoostClassifier does not accept groups in its fit() function.

@sonichi sonichi added the good first issue Good for newcomers label Nov 23, 2021
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
good first issue Good for newcomers
Projects
None yet
Development

No branches or pull requests

2 participants