Skip to content

Commit

Permalink
Make optimizing metric customizable and add trainer whitelist functio…
Browse files Browse the repository at this point in the history
…nality (dotnet#172)
  • Loading branch information
daholste authored Feb 20, 2019
1 parent fa6e616 commit 51a613c
Show file tree
Hide file tree
Showing 17 changed files with 414 additions and 123 deletions.
26 changes: 21 additions & 5 deletions src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,33 @@ namespace Microsoft.ML.Auto
public class BinaryExperimentSettings : ExperimentSettings
{
public IProgress<RunResult<BinaryClassificationMetrics>> ProgressCallback;
public BinaryClassificationMetric OptimizingMetric;
public BinaryClassificationMetric OptimizingMetric = BinaryClassificationMetric.Accuracy;
public BinaryClassificationTrainer[] WhitelistedTrainers;
}

public enum BinaryClassificationMetric
{
Accuracy
Accuracy,
Auc,
Auprc,
F1Score,
PositivePrecision,
PositiveRecall,
NegativePrecision,
NegativeRecall,
}

public enum BinaryClassificationTrainer
{
LightGbm
AveragedPerceptron,
FastForest,
FastTree,
LightGbm,
LinearSupportVectorMachines,
LogisticRegression,
StochasticDualCoordinateAscent,
StochasticGradientDescent,
SymbolicStochasticGradientDescent,
}

public class BinaryClassificationExperiment
Expand Down Expand Up @@ -65,8 +80,9 @@ internal IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(MLContext c

// run autofit & get all pipelines run in that process
var autoFitter = new AutoFitter<BinaryClassificationMetrics>(context, TaskKind.BinaryClassification, trainData, columnInfo,
validationData, preFeaturizers, OptimizingMetric.Accuracy, _settings?.ProgressCallback,
_settings);
validationData, preFeaturizers, new OptimizingMetricInfo(_settings.OptimizingMetric), _settings.ProgressCallback,
_settings, new BinaryDataScorer(_settings.OptimizingMetric),
TrainerExtensionUtil.GetTrainerNames(_settings.WhitelistedTrainers));

return autoFitter.Fit();
}
Expand Down
24 changes: 19 additions & 5 deletions src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,31 @@ namespace Microsoft.ML.Auto
public class MulticlassExperimentSettings : ExperimentSettings
{
public IProgress<RunResult<MultiClassClassifierMetrics>> ProgressCallback;
public MulticlassClassificationMetric OptimizingMetric;
public MulticlassClassificationMetric OptimizingMetric = MulticlassClassificationMetric.AccuracyMicro;
public MulticlassClassificationTrainer[] WhitelistedTrainers;
}

public enum MulticlassClassificationMetric
{
Accuracy
AccuracyMicro,
AccuracyMacro,
LogLoss,
LogLossReduction,
TopKAccuracy,
}

public enum MulticlassClassificationTrainer
{
LightGbm
AveragedPerceptronOVA,
FastForestOVA,
FastTreeOVA,
LightGbm,
LinearSupportVectorMachinesOVA,
LogisticRegression,
LogisticRegressionOVA,
StochasticDualCoordinateAscent,
StochasticGradientDescentOVA,
SymbolicStochasticGradientDescentOVA,
}

public class MulticlassClassificationExperiment
Expand Down Expand Up @@ -65,8 +78,9 @@ internal IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(MLContext c

// run autofit & get all pipelines run in that process
var autoFitter = new AutoFitter<MultiClassClassifierMetrics>(context, TaskKind.MulticlassClassification, trainData,
columnInfo, validationData, preFeaturizers, OptimizingMetric.Accuracy,
_settings?.ProgressCallback, _settings);
columnInfo, validationData, preFeaturizers, new OptimizingMetricInfo(_settings.OptimizingMetric),
_settings.ProgressCallback, _settings, new MultiDataScorer(_settings.OptimizingMetric),
TrainerExtensionUtil.GetTrainerNames(_settings.WhitelistedTrainers));

return autoFitter.Fit();
}
Expand Down
16 changes: 12 additions & 4 deletions src/Microsoft.ML.Auto/API/RegressionExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace Microsoft.ML.Auto
public class RegressionExperimentSettings : ExperimentSettings
{
public IProgress<RunResult<RegressionMetrics>> ProgressCallback;
public RegressionMetric OptimizingMetric;
public RegressionMetric OptimizingMetric = RegressionMetric.RSquared;
public RegressionTrainer[] WhitelistedTrainers;
}

Expand All @@ -28,7 +28,14 @@ public enum RegressionMetric

public enum RegressionTrainer
{
LightGbm
FastForest,
FastTree,
FastTreeTweedie,
LightGbm,
OnlineGradientDescent,
OrdinaryLeastSquares,
PoissonRegression,
StochasticDualCoordinateAscent,
}

public class RegressionExperiment
Expand Down Expand Up @@ -68,8 +75,9 @@ internal IEnumerable<RunResult<RegressionMetrics>> Execute(MLContext context,

// run autofit & get all pipelines run in that process
var autoFitter = new AutoFitter<RegressionMetrics>(context, TaskKind.Regression, trainData, columnInfo,
validationData, preFeaturizers, OptimizingMetric.RSquared, _settings?.ProgressCallback,
_settings);
validationData, preFeaturizers, new OptimizingMetricInfo(_settings.OptimizingMetric),
_settings.ProgressCallback, _settings, new RegressionDataScorer(_settings.OptimizingMetric),
TrainerExtensionUtil.GetTrainerNames(_settings.WhitelistedTrainers));

return autoFitter.Fit();
}
Expand Down
44 changes: 14 additions & 30 deletions src/Microsoft.ML.Auto/AutoFitter/AutoFitter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
using System.Threading;
using Microsoft.Data.DataView;
using Microsoft.ML.Core.Data;
using Microsoft.ML.Data;

namespace Microsoft.ML.Auto
{
Expand All @@ -23,6 +21,8 @@ internal class AutoFitter<T> where T : class
private readonly IEstimator<ITransformer> _preFeaturizers;
private readonly IProgress<RunResult<T>> _progressCallback;
private readonly ExperimentSettings _experimentSettings;
private readonly IDataScorer<T> _dataScorer;
private readonly IEnumerable<TrainerName> _trainerWhitelist;

private IDataView _trainData;
private IDataView _validationData;
Expand All @@ -35,9 +35,11 @@ public AutoFitter(MLContext context,
ColumnInformation columnInfo,
IDataView validationData,
IEstimator<ITransformer> preFeaturizers,
OptimizingMetric metric,
OptimizingMetricInfo metricInfo,
IProgress<RunResult<T>> progressCallback,
ExperimentSettings experimentSettings)
ExperimentSettings experimentSettings,
IDataScorer<T> dataScorer,
IEnumerable<TrainerName> trainerWhitelist)
{
if (validationData == null)
{
Expand All @@ -49,11 +51,13 @@ public AutoFitter(MLContext context,
_history = new List<SuggestedPipelineResult<T>>();
_columnInfo = columnInfo;
_context = context;
_optimizingMetricInfo = new OptimizingMetricInfo(metric);
_optimizingMetricInfo = metricInfo;
_task = task;
_preFeaturizers = preFeaturizers;
_progressCallback = progressCallback;
_experimentSettings = experimentSettings ?? new ExperimentSettings();
_experimentSettings = experimentSettings;
_dataScorer = dataScorer;
_trainerWhitelist = trainerWhitelist;
}

public List<RunResult<T>> Fit()
Expand Down Expand Up @@ -81,7 +85,7 @@ public List<RunResult<T>> Fit()
var getPiplelineStopwatch = Stopwatch.StartNew();

// get next pipeline
pipeline = PipelineSuggester.GetNextInferredPipeline(_history, columns, _task, _optimizingMetricInfo.IsMaximizing);
pipeline = PipelineSuggester.GetNextInferredPipeline(_history, columns, _task, _optimizingMetricInfo.IsMaximizing, _trainerWhitelist);

getPiplelineStopwatch.Stop();

Expand Down Expand Up @@ -144,9 +148,9 @@ private SuggestedPipelineResult<T> ProcessPipeline(SuggestedPipeline pipeline)
{
var pipelineModel = pipeline.Fit(_trainData);
var scoredValidationData = pipelineModel.Transform(_validationData);
var evaluatedMetrics = GetEvaluatedMetrics(scoredValidationData);
var score = GetPipelineScore(evaluatedMetrics);
runResult = new SuggestedPipelineResult<T>(evaluatedMetrics, pipelineModel, pipeline, score, null);
var metrics = GetEvaluatedMetrics(scoredValidationData);
var score = _dataScorer.GetScore(metrics);
runResult = new SuggestedPipelineResult<T>(metrics, pipelineModel, pipeline, score, null);
}
catch(Exception ex)
{
Expand Down Expand Up @@ -177,26 +181,6 @@ private T GetEvaluatedMetrics(IDataView scoredData)
}
}

private double GetPipelineScore(object evaluatedMetrics)
{
var type = evaluatedMetrics.GetType();
if(type == typeof(BinaryClassificationMetrics))
{
return ((BinaryClassificationMetrics)evaluatedMetrics).Accuracy;
}
if (type == typeof(MultiClassClassifierMetrics))
{
return ((MultiClassClassifierMetrics)evaluatedMetrics).AccuracyMicro;
}
if (type == typeof(RegressionMetrics))
{
return ((RegressionMetrics)evaluatedMetrics).RSquared;
}

// should not be possible to reach here
throw new InvalidOperationException($"unsupported machine learning task type {_task}");
}

private void WriteIterationLog(SuggestedPipeline pipeline, SuggestedPipelineResult runResult, Stopwatch stopwatch)
{
// debug log pipeline result
Expand Down
45 changes: 45 additions & 0 deletions src/Microsoft.ML.Auto/AutoFitter/DataScorer/BinaryDataScorer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.ML.Data;

namespace Microsoft.ML.Auto
{
internal class BinaryDataScorer : IDataScorer<BinaryClassificationMetrics>
{
private readonly BinaryClassificationMetric _metric;

public BinaryDataScorer(BinaryClassificationMetric metric)
{
this._metric = metric;
}

public double GetScore(BinaryClassificationMetrics metrics)
{
switch(_metric)
{
case BinaryClassificationMetric.Accuracy:
return metrics.Accuracy;
case BinaryClassificationMetric.Auc:
return metrics.Auc;
case BinaryClassificationMetric.Auprc:
return metrics.Auprc;
case BinaryClassificationMetric.F1Score:
return metrics.F1Score;
case BinaryClassificationMetric.NegativePrecision:
return metrics.NegativePrecision;
case BinaryClassificationMetric.NegativeRecall:
return metrics.NegativeRecall;
case BinaryClassificationMetric.PositivePrecision:
return metrics.PositivePrecision;
case BinaryClassificationMetric.PositiveRecall:
return metrics.PositiveRecall;
}

// never expected to reach here
throw new NotSupportedException($"{_metric} is not a supported sweep metric");
}
}
}
11 changes: 11 additions & 0 deletions src/Microsoft.ML.Auto/AutoFitter/DataScorer/IDataScorer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

namespace Microsoft.ML.Auto
{
internal interface IDataScorer<T>
{
double GetScore(T metrics);
}
}
39 changes: 39 additions & 0 deletions src/Microsoft.ML.Auto/AutoFitter/DataScorer/MultiDataScorer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.ML.Data;

namespace Microsoft.ML.Auto
{
internal class MultiDataScorer : IDataScorer<MultiClassClassifierMetrics>
{
private readonly MulticlassClassificationMetric _metric;

public MultiDataScorer(MulticlassClassificationMetric metric)
{
this._metric = metric;
}

public double GetScore(MultiClassClassifierMetrics metrics)
{
switch (_metric)
{
case MulticlassClassificationMetric.AccuracyMacro:
return metrics.AccuracyMacro;
case MulticlassClassificationMetric.AccuracyMicro:
return metrics.AccuracyMicro;
case MulticlassClassificationMetric.LogLoss:
return metrics.LogLoss;
case MulticlassClassificationMetric.LogLossReduction:
return metrics.LogLossReduction;
case MulticlassClassificationMetric.TopKAccuracy:
return metrics.TopKAccuracy;
}

// never expected to reach here
throw new NotSupportedException($"{_metric} is not a supported sweep metric");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.ML.Data;

namespace Microsoft.ML.Auto
{
internal class RegressionDataScorer : IDataScorer<RegressionMetrics>
{
private readonly RegressionMetric _metric;

public RegressionDataScorer(RegressionMetric metric)
{
this._metric = metric;
}

public double GetScore(RegressionMetrics metrics)
{
switch(_metric)
{
case RegressionMetric.L1:
return metrics.L1;
case RegressionMetric.L2:
return metrics.L2;
case RegressionMetric.Rms:
return metrics.Rms;
case RegressionMetric.RSquared:
return metrics.RSquared;
}

// never expected to reach here
throw new NotSupportedException($"{_metric} is not a supported sweep metric");
}
}
}
Loading

0 comments on commit 51a613c

Please sign in to comment.