Skip to content

Commit

Permalink
Matthias Feurer: Merge pull request #1561 from automl/development
Browse files Browse the repository at this point in the history
  • Loading branch information
Github Actions committed Sep 20, 2022
1 parent bbd13c3 commit 32254df
Show file tree
Hide file tree
Showing 198 changed files with 19,816 additions and 8,300 deletions.
2 changes: 1 addition & 1 deletion master/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: bc5b7f4c147d1d761add96299b6fe760
config: 527eda5a6ba9fdb88855dc630d160197
tags: 645f666f9bcd5a90fca523b33c5a78b7
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=120,
per_run_time_limit=30,
tmp_folder='/tmp/autosklearn_inspect_predictions_example_tmp',
tmp_folder="/tmp/autosklearn_inspect_predictions_example_tmp",
)
automl.fit(X_train, y_train, dataset_name='Run_or_walk_information')
automl.fit(X_train, y_train, dataset_name="Run_or_walk_information")

s = automl.score(X_train, y_train)
print(f"Train score {s}")
Expand All @@ -61,16 +61,19 @@
r = permutation_importance(automl, X_test, y_test, n_repeats=10, random_state=0)
sort_idx = r.importances_mean.argsort()[::-1]

plt.boxplot(r.importances[sort_idx].T,
labels=[dataset.feature_names[i] for i in sort_idx])
plt.boxplot(
r.importances[sort_idx].T, labels=[dataset.feature_names[i] for i in sort_idx]
)

plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

for i in sort_idx[::-1]:
print(f"{dataset.feature_names[i]:10s}: {r.importances_mean[i]:.3f} +/- "
f"{r.importances_std[i]:.3f}")
print(
f"{dataset.feature_names[i]:10s}: {r.importances_mean[i]:.3f} +/- "
f"{r.importances_std[i]:.3f}"
)

############################################################################################
# Create partial dependence (PD) and individual conditional expectation (ICE) plots - part 2
Expand All @@ -90,11 +93,14 @@
# combining ICE (thin lines) and PD (thick line)

features = [1, 2]
plot_partial_dependence(automl, dataset.data,
features=features,
grid_resolution=5,
kind="both",
feature_names=dataset.feature_names)
plot_partial_dependence(
automl,
dataset.data,
features=features,
grid_resolution=5,
kind="both",
feature_names=dataset.feature_names,
)
plt.tight_layout()
plt.show()

Expand All @@ -106,9 +112,12 @@
# these features. Again, we'll look at acceleration_y and acceleration_z.

features = [[1, 2]]
plot_partial_dependence(automl, dataset.data,
features=features,
grid_resolution=5,
feature_names=dataset.feature_names)
plot_partial_dependence(
automl,
dataset.data,
features=features,
grid_resolution=5,
feature_names=dataset.feature_names,
)
plt.tight_layout()
plt.show()
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
},
"outputs": [],
"source": [
"X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\nX_train, X_test, y_train, y_test = \\\n sklearn.model_selection.train_test_split(X, y, random_state=1)"
"X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\nX_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X, y, random_state=1\n)"
]
},
{
Expand All @@ -62,7 +62,7 @@
},
"outputs": [],
"source": [
"automl = autosklearn.classification.AutoSklearnClassifier(\n time_left_for_this_task=120,\n per_run_time_limit=30,\n tmp_folder='/tmp/autosklearn_classification_example_tmp',\n)\nautoml.fit(X_train, y_train, dataset_name='breast_cancer')"
"automl = autosklearn.classification.AutoSklearnClassifier(\n time_left_for_this_task=120,\n per_run_time_limit=30,\n tmp_folder=\"/tmp/autosklearn_classification_example_tmp\",\n)\nautoml.fit(X_train, y_train, dataset_name=\"breast_cancer\")"
]
},
{
Expand Down Expand Up @@ -136,7 +136,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.8.13"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,31 @@
############################################################################
# Data Loading
# ======================================
from autosklearn.ensembles.ensemble_selection import EnsembleSelection

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1
)

############################################################################
# Build and fit the classifier
# ======================================

automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=120,
per_run_time_limit=30,
tmp_folder='/tmp/autosklearn_sequential_example_tmp',
time_left_for_this_task=60,
tmp_folder="/tmp/autosklearn_sequential_example_tmp",
# Do not construct ensembles in parallel to avoid using more than one
# core at a time. The ensemble will be constructed after auto-sklearn
# finished fitting all machine learning models.
ensemble_size=0,
ensemble_class=None,
delete_tmp_folder_after_terminate=False,
)
automl.fit(X_train, y_train, dataset_name='breast_cancer')
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# This call to fit_ensemble uses all models trained in the previous call
# to fit to build an ensemble which can be used with automl.predict()
automl.fit_ensemble(y_train, ensemble_size=50)
automl.fit_ensemble(y_train, ensemble_class=EnsembleSelection)

############################################################################
# Print the final ensemble constructed by auto-sklearn
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,21 @@ def error_wk(solution, prediction, extra_argument):
return np.mean(solution != prediction)


def metric_which_needs_x(solution, prediction, X_data, consider_col, val_threshold):
# custom function defining accuracy
assert X_data is not None
rel_idx = X_data[:, consider_col] > val_threshold
return np.mean(solution[rel_idx] == prediction[rel_idx])


############################################################################
# Data Loading
# ============

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1
)

############################################################################
# Print a list of available metrics
Expand All @@ -68,26 +76,25 @@ def error_wk(solution, prediction, extra_argument):
# First example: Use predefined accuracy metric
# =============================================

print("#"*80)
print("#" * 80)
print("Use predefined accuracy metric")
scorer = autosklearn.metrics.accuracy
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
per_run_time_limit=30,
seed=1,
metric=autosklearn.metrics.accuracy,
metric=scorer,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = sklearn.metrics.accuracy_score(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Accuracy score {score:.3f} using {metric_name}")
score = scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {scorer.name}")

############################################################################
# Second example: Use own accuracy metric
# =======================================

print("#"*80)
print("#" * 80)
print("Use self defined accuracy metric")
accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu",
Expand All @@ -99,49 +106,45 @@ def error_wk(solution, prediction, extra_argument):
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
per_run_time_limit=30,
seed=1,
metric=accuracy_scorer,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Accuracy score {score:.3f} using {metric_name:s}")
print(f"Accuracy score {score:.3f} using {accuracy_scorer.name:s}")

############################################################################
# Third example: Use own error metric
# ===================================

print("#"*80)
print("#" * 80)
print("Use self defined error metric")
error_rate = autosklearn.metrics.make_scorer(
name='error',
name="error",
score_func=error,
optimum=0,
greater_is_better=False,
needs_proba=False,
needs_threshold=False
needs_threshold=False,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
per_run_time_limit=30,
seed=1,
metric=error_rate,
)
cls.fit(X_train, y_train)

cls.predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Error score {score:.3f} using {metric_name:s}")
print(f"Error score {score:.3f} using {error_rate.name:s}")

############################################################################
# Fourth example: Use own accuracy metric with additional argument
# ================================================================

print("#"*80)
print("#" * 80)
print("Use self defined accuracy with additional argument")
accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu_add",
Expand All @@ -153,23 +156,19 @@ def error_wk(solution, prediction, extra_argument):
extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
per_run_time_limit=30,
seed=1,
metric=accuracy_scorer
time_left_for_this_task=60, per_run_time_limit=30, seed=1, metric=accuracy_scorer
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Accuracy score {score:.3f} using {metric_name:s}")
print(f"Accuracy score {score:.3f} using {accuracy_scorer.name:s}")

############################################################################
# Fifth example: Use own accuracy metric with additional argument
# ===============================================================

print("#"*80)
print("#" * 80)
print("Use self defined error with additional argument")
error_rate = autosklearn.metrics.make_scorer(
name="error_add",
Expand All @@ -182,13 +181,49 @@ def error_wk(solution, prediction, extra_argument):
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
per_run_time_limit=30,
seed=1,
metric=error_rate,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Error score {score:.3f} using {metric_name:s}")
print(f"Error score {score:.3f} using {error_rate.name:s}")


#############################################################################
# Sixth example: Use a metric with additional argument which also needs xdata
# ===========================================================================
"""
Finally, *Auto-sklearn* also support metric that require the train data (aka X_data) to
compute a value. This can be useful if one only cares about the score on a subset of the
data.
"""

accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu_X",
score_func=metric_which_needs_x,
optimum=1,
greater_is_better=True,
needs_proba=False,
needs_X=True,
needs_threshold=False,
consider_col=1,
val_threshold=18.8,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=accuracy_scorer,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = metric_which_needs_x(
y_test,
predictions,
X_data=X_test,
consider_col=1,
val_threshold=18.8,
)
print(f"Error score {score:.3f} using {accuracy_scorer.name:s}")
Loading

0 comments on commit 32254df

Please sign in to comment.