Skip to content

Commit

Permalink
Logger example (#1158)
Browse files Browse the repository at this point in the history
* loggign info

* Try order

* Try other location

* Log example debug (#15)

* Show only ensemble builder msg

* Fix title
  • Loading branch information
franchuterivera authored Jun 25, 2021
1 parent ccb419c commit 01e6e60
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 4 deletions.
31 changes: 27 additions & 4 deletions autosklearn/util/logging_.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,38 @@ def setup_logger(
with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'), 'r') as fh:
logging_config = yaml.safe_load(fh)

# Make sure we have a filename handler
if 'handlers' not in logging_config:
logging_config['handlers'] = {}
if 'file_handler' not in logging_config['handlers']:
logging_config['handlers']['file_handler'] = {
'class': 'logging.FileHandler',
'level': 'DEBUG',
'filename': 'autosklearn.log'
}
if 'distributed_logfile' not in logging_config['handlers']:
# We have to create a file handler
logging_config['handlers']['distributed_logfile'] = {
'class': 'logging.FileHandler',
'level': 'DEBUG',
'filename': 'distributed.log'
}

if filename is None:
filename = logging_config['handlers']['file_handler']['filename']
logging_config['handlers']['file_handler']['filename'] = os.path.join(
output_dir, filename
)

if distributedlog_filename is None:
distributedlog_filename = logging_config['handlers']['distributed_logfile']['filename']
logging_config['handlers']['distributed_logfile']['filename'] = os.path.join(

# Make path absolute only if required
# This is needed because this function might be called multiple times with the same
# dict, and we don't want /path/path/<name>.log but rather just /path/<name>.log
if os.path.sep not in logging_config['handlers']['file_handler']['filename']:
logging_config['handlers']['file_handler']['filename'] = os.path.join(
output_dir, filename
)
if os.path.sep not in logging_config['handlers']['distributed_logfile']['filename']:
logging_config['handlers']['distributed_logfile']['filename'] = os.path.join(
output_dir, distributedlog_filename
)
logging.config.dictConfig(logging_config)
Expand Down
105 changes: 105 additions & 0 deletions examples/40_advanced/example_debug_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# -*- encoding: utf-8 -*-
"""
=====================
Logging and debugging
=====================
This example shows how to provide a custom logging configuration to *auto-sklearn*.
We will be fitting 2 pipelines and showing any INFO-level msg on console.
Even if you do not provide a logging_configuration, autosklearn creates a log file
in the temporal working directory. This directory can be specified via the `tmp_folder`
as exemplified below.
This example also highlights additional information about *auto-sklearn* internal
directory structure.
"""
import pathlib

import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection

import autosklearn.classification


############################################################################
# Data Loading
# ============
# Load kr-vs-kp dataset from https://www.openml.org/d/3
X, y = data = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True)

X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)


############################################################################
# Create a logging config
# =======================
# *auto-sklearn* uses a default
# `logging config <https://github.com/automl/auto-sklearn/blob/master/autosklearn/util/logging.yaml>`_
# We will instead create a custom one as follows:

logging_config = {
'version': 1,
'disable_existing_loggers': True,
'formatters': {
'custom': {
# More format options are available in the official
# `documentation <https://docs.python.org/3/howto/logging-cookbook.html>`_
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
}
},

# Any INFO level msg will be printed to the console
'handlers': {
'console': {
'level': 'INFO',
'formatter': 'custom',
'class': 'logging.StreamHandler',
'stream': 'ext://sys.stdout',
},
},

'loggers': {
'': { # root logger
'level': 'DEBUG',
},
'Client-EnsembleBuilder': {
'level': 'DEBUG',
'handlers': ['console'],
},
},
}


############################################################################
# Build and fit a classifier
# ==========================
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=30,
# Bellow two flags are provided to speed up calculations
# Not recommended for a real implementation
initial_configurations_via_metalearning=0,
smac_scenario_args={'runcount_limit': 2},
# Pass the config file we created
logging_config=logging_config,
# *auto-sklearn* generates temporal files under tmp_folder
tmp_folder='./tmp_folder',
# By default tmp_folder is deleted. We will preserve it
# for debug purposes
delete_tmp_folder_after_terminate=False,
)
cls.fit(X_train, y_train, X_test, y_test)

# *auto-sklearn* generates intermediate files which can be of interest
# Dask multiprocessing information. Useful on multi-core runs:
# * tmp_folder/distributed.log
# The individual fitted estimators are written to disk on:
# * tmp_folder/.auto-sklearn/runs
# SMAC output is stored in this directory.
# For more info, you can check the `SMAC documentation <https://github.com/automl/SMAC3>`_
# * tmp_folder/smac3-output
# Auto-sklearn always outputs to this log file
# tmp_folder/AutoML*.log
for filename in pathlib.Path('./tmp_folder').glob('*'):
print(filename)

0 comments on commit 01e6e60

Please sign in to comment.