Skip to content

Commit

Permalink
Add big feature selector benchmark (#90)
Browse files Browse the repository at this point in the history
* Push experiment code + Notebook analysis

* Add documentation for Comparing FS algs
  • Loading branch information
dunnkers authored Oct 22, 2022
1 parent c797d1b commit 1a4b27b
Show file tree
Hide file tree
Showing 19 changed files with 3,117 additions and 0 deletions.
2,343 changes: 2,343 additions & 0 deletions examples/comparing-feature-selectors/analyze-results.ipynb

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions examples/comparing-feature-selectors/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import hydra
import numpy as np
from fseval.config import PipelineConfig
from fseval.main import run_pipeline
from infinite_selection import InfFS
from sklearn.base import BaseEstimator
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif
from sklearn.preprocessing import minmax_scale
from stability_selection import StabilitySelection as RealStabilitySelection


class StabilitySelection(RealStabilitySelection):
def fit(self, X, y):
super(StabilitySelection, self).fit(X, y)
self.support_ = self.get_support()
self.feature_importances_ = np.max(self.stability_scores_, axis=1)


class InfiniteSelectionEstimator(BaseEstimator):
def fit(self, X, y):
inf = InfFS()
[RANKED, WEIGHT] = inf.infFS(X, y, alpha=0.5, supervision=1, verbose=1)

self.feature_importances_ = WEIGHT
self.ranking_ = RANKED


class Chi2Classifier(BaseEstimator):
def fit(self, X, y):
X = minmax_scale(X)
scores, _ = chi2(X, y)
self.feature_importances_ = scores


class ANOVAFValueClassifier(BaseEstimator):
def fit(self, X, y):
scores, _ = f_classif(X, y)
self.feature_importances_ = scores


class MutualInfoClassifier(BaseEstimator):
def fit(self, X, y):
scores = mutual_info_classif(X, y)
self.feature_importances_ = scores


@hydra.main(config_path="conf", config_name="my_config", version_base="1.1")
def main(cfg: PipelineConfig) -> None:
run_pipeline(cfg)


if __name__ == "__main__":
main()
13 changes: 13 additions & 0 deletions examples/comparing-feature-selectors/conf/dataset/synthetic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: My synthetic dataset
task: classification
adapter:
_target_: sklearn.datasets.make_classification
n_samples: 10000
n_informative: 2
n_classes: 2
n_features: 20
n_redundant: 0
random_state: 0
shuffle: false
feature_importances:
X[:, 0:2]: 1.0
9 changes: 9 additions & 0 deletions examples/comparing-feature-selectors/conf/my_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
defaults:
- base_pipeline_config
- _self_
- override dataset: synthetic
- override validator: knn
- override /callbacks:
- to_sql

n_bootstraps: 1
5 changes: 5 additions & 0 deletions examples/comparing-feature-selectors/conf/ranker/anova.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name: ANOVA F-value
estimator:
_target_: benchmark.ANOVAFValueClassifier
_estimator_type: classifier
estimates_feature_importances: true
11 changes: 11 additions & 0 deletions examples/comparing-feature-selectors/conf/ranker/boruta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: Boruta
estimator:
_target_: boruta.boruta_py.BorutaPy
estimator:
_target_: sklearn.ensemble.RandomForestClassifier
n_estimators: auto
_estimator_type: classifier
multioutput: false
estimates_feature_importances: false
estimates_feature_support: true
estimates_feature_ranking: true
6 changes: 6 additions & 0 deletions examples/comparing-feature-selectors/conf/ranker/chi2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: Chi-Squared
estimator:
_target_: benchmark.Chi2Classifier
_estimator_type: classifier
requires_positive_X: true
estimates_feature_importances: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: Decision Tree
estimator:
_target_: sklearn.tree.DecisionTreeClassifier
_estimator_type: classifier
multioutput: true
estimates_feature_importances: true
estimates_target: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: Infinite Selection
estimator:
_target_: benchmark.InfiniteSelectionEstimator
_estimator_type: classifier
estimates_feature_importances: true
estimates_feature_ranking: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: MultiSURF
estimator:
_target_: skrebate.MultiSURF
_estimator_type: classifier
multioutput: false
estimates_feature_importances: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: Mutual Info
estimator:
_target_: benchmark.MutualInfoClassifier
_estimator_type: classifier
multioutput: false
estimates_feature_importances: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
name: ReliefF
estimator:
_target_: skrebate.ReliefF
_estimator_type: classifier
estimates_feature_importances: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: Stability Selection
estimator:
_target_: benchmark.StabilitySelection
base_estimator:
_target_: sklearn.linear_model.LogisticRegression
penalty: l2
bootstrap_func: stratified
_estimator_type: classifier
estimates_feature_importances: true
estimates_feature_support: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: XGBoost
estimator:
_target_: xgboost.XGBClassifier
use_label_encoder: False
_estimator_type: classifier
multioutput: false
estimates_feature_importances: true
estimates_target: true
6 changes: 6 additions & 0 deletions examples/comparing-feature-selectors/conf/validator/knn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: k-NN
estimator:
_target_: sklearn.neighbors.KNeighborsClassifier
_estimator_type: classifier
multioutput: false
estimates_target: true
6 changes: 6 additions & 0 deletions examples/comparing-feature-selectors/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
fseval
-e git+https://github.com/dunnkers/infinite-selection.git@6c9db1d5fe1b12bc34eb2af5893a4f3ca385aaff#egg=infinite_selection
-e git+https://github.com/dunnkers/stability-selection.git@baf54e7526bbce57d80871fcd93cdfdd67972a43#egg=stability_selection
Boruta>=0.3
skrebate>=0.62
xgboost>=1
Loading

0 comments on commit 1a4b27b

Please sign in to comment.