Skip to content

Commit

Permalink
Misc fixes (dotnet#39)
Browse files Browse the repository at this point in the history
* misc fixes -- fix bug where SMAC returning already-seen values; fix param encoding return bug in pipeline object model; nit clean-up AutoFit; return in pipeline suggester when sweeper has no next proposal; null ref fix in public object model pipeline suggester

* fix in BuildPipelineNodePropsLightGbm test, fix / use correct 'newTrainer' variable in PipelneSuggester

* SMAC perf improvement
  • Loading branch information
daholste authored and Dmitry-A committed Aug 22, 2019
1 parent e1aa92c commit 023d6ce
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 40 deletions.
8 changes: 2 additions & 6 deletions src/AutoML/AutoFitter/AutoFitter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@ public AutoFitter(MLContext context, OptimizingMetricInfo metricInfo, AutoFitSet
}

public InferredPipelineRunResult[] Fit()
{
IteratePipelinesAndFit();
return _history.ToArray();
}

private void IteratePipelinesAndFit()
{
var stopwatch = Stopwatch.StartNew();
var columns = AutoMlUtils.GetColumnInfoTuples(_context, _trainData, _label, _purposeOverrides);
Expand All @@ -68,6 +62,8 @@ private void IteratePipelinesAndFit()

} while (_history.Count < _settings.StoppingCriteria.MaxIterations &&
stopwatch.Elapsed.TotalMinutes < _settings.StoppingCriteria.TimeOutInMinutes);

return _history.ToArray();
}

private void ProcessPipeline(InferredPipeline pipeline)
Expand Down
34 changes: 27 additions & 7 deletions src/AutoML/PipelineSuggesters/PipelineSuggester.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static Pipeline GetNextPipeline(IEnumerable<PipelineRunResult> history,
{
var inferredHistory = history.Select(r => InferredPipelineRunResult.FromPipelineRunResult(r));
var nextInferredPipeline = GetNextInferredPipeline(inferredHistory, columns, task, iterationsRemaining, isMaximizingMetric);
return nextInferredPipeline.ToPipeline();
return nextInferredPipeline?.ToPipeline();
}

public static InferredPipeline GetNextInferredPipeline(IEnumerable<InferredPipelineRunResult> history,
Expand All @@ -47,21 +47,31 @@ public static InferredPipeline GetNextInferredPipeline(IEnumerable<InferredPipel
// sort top trainers by # of times they've been run, from lowest to highest
var orderedTopTrainers = OrderTrainersByNumTrials(history, topTrainers);

// keep as hashset of previously visited pipelines
var visitedPipelines = new HashSet<InferredPipeline>(history.Select(h => h.Pipeline));

// iterate over top trainers (from least run to most run),
// to find next pipeline
foreach(var trainer in orderedTopTrainers)
foreach (var trainer in orderedTopTrainers)
{
var newTrainer = trainer.Clone();

// make sure we have not seen pipeline before.
// repeat until passes or runs out of chances
var visitedPipelines = new HashSet<InferredPipeline>(history.Select(h => h.Pipeline));
const int maxNumberAttempts = 10;
var count = 0;
do
{
SampleHyperparameters(newTrainer, history, isMaximizingMetric);
// sample new hyperparameters for the learner
if (!SampleHyperparameters(newTrainer, history, isMaximizingMetric))
{
// if unable to sample new hyperparameters for the learner
// (ie SMAC returned 0 suggestions), break
break;
}

var pipeline = new InferredPipeline(transforms, newTrainer);

// make sure we have not seen pipeline before
if (!visitedPipelines.Contains(pipeline))
{
return pipeline;
Expand Down Expand Up @@ -169,7 +179,11 @@ private static IValueGenerator[] ConvertToValueGenerators(IEnumerable<SweepableP
return results;
}

private static void SampleHyperparameters(SuggestedTrainer trainer, IEnumerable<InferredPipelineRunResult> history, bool isMaximizingMetric)
/// <summary>
/// Samples new hyperparameters for the trainer, and sets them.
/// Returns true if success (new hyperparams were suggested and set). Else, returns false.
/// </summary>
private static bool SampleHyperparameters(SuggestedTrainer trainer, IEnumerable<InferredPipelineRunResult> history, bool isMaximizingMetric)
{
var sps = ConvertToValueGenerators(trainer.SweepParams);
var sweeper = new SmacSweeper(
Expand All @@ -179,14 +193,20 @@ private static void SampleHyperparameters(SuggestedTrainer trainer, IEnumerable<
});

IEnumerable<InferredPipelineRunResult> historyToUse = history
.Where(r => r.RunSucceded && r.Pipeline.Trainer.TrainerName == trainer.TrainerName && r.Pipeline.Trainer.HyperParamSet != null);
.Where(r => r.RunSucceded && r.Pipeline.Trainer.TrainerName == trainer.TrainerName && r.Pipeline.Trainer.HyperParamSet != null && r.Pipeline.Trainer.HyperParamSet.Any());

// get new set of hyperparameter values
var proposedParamSet = sweeper.ProposeSweeps(1, historyToUse.Select(h => h.ToRunResult(isMaximizingMetric))).First();
if(!proposedParamSet.Any())
{
return false;
}

// associate proposed param set with trainer, so that smart hyperparam
// sweepers (like KDO) can map them back.
trainer.SetHyperparamValues(proposedParamSet);

return true;
}

private static IEnumerable<SuggestedTransform> CalculateTransforms(MLContext context,
Expand Down
12 changes: 8 additions & 4 deletions src/AutoML/Sweepers/SmacSweeper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,17 @@ private ParameterSet[] GreedyPlusRandomSearch(ParameterSet[] parents, FastForest
for (int i = 0; i < randomConfigs.Length; i++)
configurations.Add(new Tuple<double, ParameterSet>(randomEIs[i], randomConfigs[i]));

HashSet<ParameterSet> retainedConfigs = new HashSet<ParameterSet>();
IOrderedEnumerable<Tuple<double, ParameterSet>> bestConfigurations = configurations.OrderByDescending(x => x.Item1);

foreach (Tuple<double, ParameterSet> t in bestConfigurations.Take(numOfCandidates))
retainedConfigs.Add(t.Item2);
var retainedConfigs = new HashSet<ParameterSet>(bestConfigurations.Select(x => x.Item2));

return retainedConfigs.ToArray();
// remove configurations matching previous run
foreach(var previousRun in previousRuns)
{
retainedConfigs.Remove(previousRun.ParameterSet);
}

return retainedConfigs.Take(numOfCandidates).ToArray();
}

/// <summary>
Expand Down
6 changes: 3 additions & 3 deletions src/AutoML/Sweepers/SweeperBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ protected SweeperBase(ArgumentsBase args, IValueGenerator[] sweepParameters, str

public virtual ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable<IRunResult> previousRuns = null)
{
var prevParamSets = previousRuns?.Select(r => r.ParameterSet).ToList() ?? new List<ParameterSet>();
var prevParamSets = new HashSet<ParameterSet>(previousRuns?.Select(r => r.ParameterSet).ToList() ?? new List<ParameterSet>());
var result = new HashSet<ParameterSet>();
for (int i = 0; i < maxSweeps; i++)
{
Expand All @@ -66,9 +66,9 @@ public virtual ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable<IRunResul

protected abstract ParameterSet CreateParamSet();

protected static bool AlreadyGenerated(ParameterSet paramSet, IEnumerable<ParameterSet> previousRuns)
protected static bool AlreadyGenerated(ParameterSet paramSet, ISet<ParameterSet> previousRuns)
{
return previousRuns.Any(previousRun => previousRun.Equals(paramSet));
return previousRuns.Contains(paramSet);
}
}
}
6 changes: 3 additions & 3 deletions src/AutoML/TrainerExtensions/TrainerExtensionUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,18 @@ public static IDictionary<string, object> BuildPipelineNodeProps(TrainerName tra
return BuildLightGbmPipelineNodeProps(sweepParams);
}

return sweepParams.ToDictionary(p => p.Name, p => (object)p.RawValue);
return sweepParams.ToDictionary(p => p.Name, p => (object)p.ProcessedValue());
}

private static IDictionary<string, object> BuildLightGbmPipelineNodeProps(IEnumerable<SweepableParam> sweepParams)
{
var treeBoosterParams = sweepParams.Where(p => _lightGbmTreeBoosterParamNames.Contains(p.Name));
var parentArgParams = sweepParams.Except(treeBoosterParams);

var treeBoosterProps = treeBoosterParams.ToDictionary(p => p.Name, p => (object)p.RawValue);
var treeBoosterProps = treeBoosterParams.ToDictionary(p => p.Name, p => (object)p.ProcessedValue());
var treeBoosterCustomProp = new CustomProperty("Microsoft.ML.LightGBM.TreeBooster", treeBoosterProps);

var props = parentArgParams.ToDictionary(p => p.Name, p => (object)p.RawValue);
var props = parentArgParams.ToDictionary(p => p.Name, p => (object)p.ProcessedValue());
props[LightGbmTreeBoosterPropName] = treeBoosterCustomProp;

return props;
Expand Down
4 changes: 4 additions & 0 deletions src/Test/GetNextPipelineTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ public void GetNextPipelineMock()
{
// get next pipeline
var pipeline = PipelineSuggester.GetNextPipeline(history, columns, TaskKind.BinaryClassification, maxIterations - i);
if(pipeline == null)
{
break;
}

var result = new PipelineRunResult(pipeline, AutoMlUtils.Random.NextDouble(), true);
history.Add(result);
Expand Down
64 changes: 64 additions & 0 deletions src/Test/InferredPipelineTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Microsoft.ML.Auto.Test
{
[TestClass]
public class InferredPipelineTests
{
[TestMethod]
public void InferredPipelinesHashTest()
{
var context = new MLContext();

// test same learners with no hyperparams have the same hash code
var trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
var trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
var transforms1 = new List<SuggestedTransform>();
var transforms2 = new List<SuggestedTransform>();
var inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
var inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
Assert.AreEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());

// test same learners with hyperparams set vs empty hyperparams have different hash codes
var hyperparams1 = new ParameterSet(new List<IParameterValue>() { new LongParameterValue("NumLeaves", 2) });
trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), hyperparams1);
trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
Assert.AreNotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());

// same learners with different hyperparams
hyperparams1 = new ParameterSet(new List<IParameterValue>() { new LongParameterValue("NumLeaves", 2) });
var hyperparams2 = new ParameterSet(new List<IParameterValue>() { new LongParameterValue("NumLeaves", 6) });
trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), hyperparams1);
trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), hyperparams2);
inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
Assert.AreNotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());

// same learners with same transforms
trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
transforms1 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
transforms2 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
Assert.AreEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());

// same transforms with different learners
trainer1 = new SuggestedTrainer(context, new SdcaBinaryExtension());
trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
transforms1 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
transforms2 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
Assert.AreNotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());
}
}
}
34 changes: 17 additions & 17 deletions src/Test/TrainerExtensionsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,22 @@ public void BuildPipelineNodePropsLightGbm()

var expectedJson = @"
{
""NumBoostRound"": 1,
""NumBoostRound"": 20,
""LearningRate"": 1,
""NumLeaves"": 1,
""MinDataPerLeaf"": 1,
""UseSoftmax"": 1,
""UseCat"": 1,
""UseMissing"": 1,
""MinDataPerGroup"": 1,
""MaxCatThreshold"": 1,
""CatSmooth"": 1,
""CatL2"": 1,
""MinDataPerLeaf"": 10,
""UseSoftmax"": false,
""UseCat"": false,
""UseMissing"": false,
""MinDataPerGroup"": 50,
""MaxCatThreshold"": 16,
""CatSmooth"": 10,
""CatL2"": 0.5,
""TreeBooster"": {
""Name"": ""Microsoft.ML.LightGBM.TreeBooster"",
""Properties"": {
""RegLambda"": 1,
""RegAlpha"": 1
""RegLambda"": 0.5,
""RegAlpha"": 0.5
}
}
}";
Expand All @@ -99,12 +99,12 @@ public void BuildPipelineNodePropsSdca()
var sdcaBinaryProps = TrainerExtensionUtil.BuildPipelineNodeProps(TrainerName.SdcaBinary, sweepParams);
var expectedJson = @"
{
""L2Const"": 1,
""L1Threshold"": 1,
""ConvergenceTolerance"": 1,
""MaxIterations"": 1,
""Shuffle"": 1,
""BiasLearningRate"": 1
""L2Const"": 1E-07,
""L1Threshold"": 0.0,
""ConvergenceTolerance"": 0.01,
""MaxIterations"": 10,
""Shuffle"": true,
""BiasLearningRate"": 0.01
}";
Util.AssertObjectMatchesJson(expectedJson, sdcaBinaryProps);
}
Expand Down

0 comments on commit 023d6ce

Please sign in to comment.