Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more fixes with search spaces - wrapper, make sure all supported modu… #126

Merged
merged 1 commit into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ dask-worker-space/
target/
.venv/
build/*
*.egg
*.egg
*.coverage*
7 changes: 3 additions & 4 deletions tpot2/config/classifiers_sklearnex.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ConfigSpace import ConfigurationSpace
from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal

from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING

def get_RandomForestClassifier_ConfigurationSpace(random_state):
space = {
Expand Down Expand Up @@ -66,10 +66,9 @@ def get_NuSVC_ConfigurationSpace(random_state):
space = {
'nu': Float("nu", bounds=(0.05, 1.0)),
'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']),
'C': Float("C", bounds=(1e-4, 25), log=True),
#'C': Float("C", bounds=(1e-4, 25), log=True),
'degree': Integer("degree", bounds=(1, 4)),
#TODO work around for None value?
#'class_weight': Categorical("class_weight", [None, 'balanced']),
'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']),
'max_iter': 3000,
'tol': 0.005,
'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical
Expand Down
81 changes: 43 additions & 38 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import numpy as np
import warnings
import importlib.util

from ..search_spaces.nodes import EstimatorNode
from ..search_spaces.pipelines import ChoicePipeline, WrapperPipeline
Expand All @@ -27,7 +28,7 @@

from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
Expand Down Expand Up @@ -101,51 +102,64 @@
from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer


from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder

#MDR


all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV,
AdaBoostClassifier,
AdaBoostClassifier,MLPRegressor,
GaussianProcessRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor,
AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer,
PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis,
DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder,
]


#if mdr is installed
if 'mdr' in sys.modules:
if importlib.util.find_spec('mdr') is not None:
from mdr import MDR, ContinuousMDR
all_methods.append(MDR)
all_methods.append(ContinuousMDR)

if 'skrebate' in sys.modules:
if importlib.util.find_spec('skrebate') is not None:
from skrebate import ReliefF, SURF, SURFstar, MultiSURF
all_methods.append(ReliefF)
all_methods.append(SURF)
all_methods.append(SURFstar)
all_methods.append(MultiSURF)

if 'sklearnex' in sys.modules:
STRING_TO_CLASS = {
t.__name__: t for t in all_methods
}

if importlib.util.find_spec('sklearnex') is not None:
import sklearnex
import sklearnex.linear_model
import sklearnex.svm
import sklearnex.ensemble
import sklearnex.neighbors

all_methods.append(sklearnex.linear_model.LinearRegression)
all_methods.append(sklearnex.linear_model.Ridge)
all_methods.append(sklearnex.linear_model.Lasso)
all_methods.append(sklearnex.linear_model.ElasticNet)
all_methods.append(sklearnex.svm.SVR)
all_methods.append(sklearnex.svm.NuSVR)
all_methods.append(sklearnex.ensemble.RandomForestRegressor)
all_methods.append(sklearnex.neighbors.KNeighborsRegressor)
all_methods.append(sklearnex.ensemble.RandomForestClassifier)
all_methods.append(sklearnex.neighbors.KNeighborsClassifier)
all_methods.append(sklearnex.svm.SVC)
all_methods.append(sklearnex.svm.NuSVC)
all_methods.append(sklearnex.linear_model.LogisticRegression)

sklearnex_methods = []

sklearnex_methods.append(sklearnex.linear_model.LinearRegression)
sklearnex_methods.append(sklearnex.linear_model.Ridge)
sklearnex_methods.append(sklearnex.linear_model.Lasso)
sklearnex_methods.append(sklearnex.linear_model.ElasticNet)
sklearnex_methods.append(sklearnex.svm.SVR)
sklearnex_methods.append(sklearnex.svm.NuSVR)
sklearnex_methods.append(sklearnex.ensemble.RandomForestRegressor)
sklearnex_methods.append(sklearnex.neighbors.KNeighborsRegressor)
sklearnex_methods.append(sklearnex.ensemble.RandomForestClassifier)
sklearnex_methods.append(sklearnex.neighbors.KNeighborsClassifier)
sklearnex_methods.append(sklearnex.svm.SVC)
sklearnex_methods.append(sklearnex.svm.NuSVC)
sklearnex_methods.append(sklearnex.linear_model.LogisticRegression)

STRING_TO_CLASS.update({f"{t.__name__}_sklearnex": t for t in sklearnex_methods})


STRING_TO_CLASS = {
t.__name__: t for t in all_methods
}



Expand Down Expand Up @@ -439,15 +453,6 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st
if name in GROUPNAMES:
name_list = GROUPNAMES[name]
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

if name is None:
warnings.warn(f"name is None")
return None

if name not in STRING_TO_CLASS:
print("FOOO ", name)
warnings.warn(f"Could not find class for {name}")
return None

return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

Expand All @@ -458,21 +463,21 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
# TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params?
# TODO add other meta-estimators?
if name == "RFE_classification":
rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp)
return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp)
if name == "RFE_regression":
rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp)
return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp)
if name == "SelectFromModel_classification":
sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp)
return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp)
if name == "SelectFromModel_regression":
sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp)
return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp)

#these are nodes that have special search spaces which require custom parsing of the hyperparameters
if name == "RobustScaler":
Expand Down
20 changes: 18 additions & 2 deletions tpot2/config/tests/test_get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import tpot2.config

from ..get_configspace import STRING_TO_CLASS
from ..get_configspace import STRING_TO_CLASS, GROUPNAMES

def test_loop_through_all_hyperparameters():

Expand All @@ -22,4 +22,20 @@ def test_loop_through_all_hyperparameters():
for i in range(1):
estnode = estnode_gen.generate()
est = estnode.export_pipeline()


def test_loop_through_groupnames():

n_classes=3
n_samples=100
n_features=100
random_state=None

for groupname, group in GROUPNAMES.items():
for class_name in group:
print(class_name)
estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

#generate 10 random hyperparameters and make sure they are all valid
for i in range(100):
estnode = estnode_gen.generate()
est = estnode.export_pipeline()
4 changes: 2 additions & 2 deletions tpot2/search_spaces/nodes/estimator_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(self, method: type,
else:
rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = self.space.sample_configuration().get_dictionary()
self.hyperparameters = dict(self.space.sample_configuration())

self.check_hyperparameters_for_None()

Expand All @@ -55,7 +55,7 @@ def mutate(self, rng=None):

rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = self.space.sample_configuration().get_dictionary()
self.hyperparameters = dict(self.space.sample_configuration())

self.check_hyperparameters_for_None()
return True
Expand Down
6 changes: 3 additions & 3 deletions tpot2/search_spaces/pipelines/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ def __init__(self,
super().__init__()

self.nodegen = nodegen
self.node = np.random.default_rng(rng).choice(self.nodegen).generate()
self.node = self.nodegen.generate(rng)


self.method = method
self.space = space
rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = self.space.sample_configuration().get_dictionary()
self.hyperparameters = dict(self.space.sample_configuration())



Expand All @@ -43,7 +43,7 @@ def mutate(self, rng=None):
def _mutate_hyperparameters(self, rng=None):
rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = self.space.sample_configuration().get_dictionary()
self.hyperparameters = dict(self.space.sample_configuration())
return True

def _mutate_node(self, rng=None):
Expand Down
Loading