Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions src/Microsoft.ML.PipelineInference/AutoInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ public sealed class AutoMlMlState : IMlState
private TransformInference.SuggestedTransform[] _availableTransforms;
private RecipeInference.SuggestedRecipe.SuggestedLearner[] _availableLearners;
private DependencyMap _dependencyMapping;
private Dictionary<string, ColumnPurpose> _columnPurpose;
public IPipelineOptimizer AutoMlEngine { get; set; }
public PipelinePattern[] BatchCandidates { get; set; }
public SupportedMetric Metric { get; }
Expand Down Expand Up @@ -370,19 +371,21 @@ private TransformInference.SuggestedTransform[] InferAndFilter(IDataView data, T
TransformInference.SuggestedTransform[] existingTransforms = null)
{
// Infer transforms using experts
var levelTransforms = TransformInference.InferTransforms(_env, data, args);
var levelTransforms = TransformInference.InferTransforms(_env, data, args, this._columnPurpose);
Copy link
Contributor

@Ivanidzo4ka Ivanidzo4ka Jul 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this [](start = 91, length = 4)

we tend to omit usage of "this" word. #Resolved

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed


In reply to: 199589164 [](ancestors = 199589164)


// Retain only those transforms inferred which were also passed in.
if (existingTransforms != null)
return levelTransforms.Where(t => existingTransforms.Any(t2 => t2.Equals(t))).ToArray();
return levelTransforms;
}

public void InferSearchSpace(int numTransformLevels)
public void InferSearchSpace(int numTransformLevels, Dictionary<string, ColumnPurpose> columnPurpose = null)
{
var learners = RecipeInference.AllowedLearners(_env, TrainerKind).ToArray();
if (_requestedLearners != null && _requestedLearners.Length > 0)
learners = learners.Where(l => _requestedLearners.Contains(l.LearnerName)).ToArray();

this._columnPurpose = columnPurpose;
ComputeSearchSpace(numTransformLevels, learners, (b, c) => InferAndFilter(b, c));
}

Expand Down Expand Up @@ -536,7 +539,26 @@ public PipelinePattern[] GetNextCandidates(int numberOfCandidates)
var currentBatchSize = numberOfCandidates;
if (_terminator is IterationTerminator itr)
currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates);
BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize);
BatchCandidates = AutoMlEngine.GetNextCandidates(
_sortedSampledElements.Select(kvp => kvp.Value),
currentBatchSize,
this._columnPurpose);

var h = _env.Register("AutoMlMlState");
Copy link
Contributor

@Ivanidzo4ka Ivanidzo4ka Jul 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

var h = _env.Register("AutoMlMlState"); [](start = 15, length = 40)

you should have _host field, no reason to create new host variable. #Resolved

using (var ch = h.Start("GetNextCandidates"))
Copy link
Contributor

@Ivanidzo4ka Ivanidzo4ka Jul 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GetNextCandidates [](start = 41, length = 17)

Print suggested transforms? #Resolved

{
foreach (var pipeline in BatchCandidates)
{
ch.Info("AutoInference Suggested Transforms.");
Copy link
Contributor

@Ivanidzo4ka Ivanidzo4ka Jul 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AutoInference Suggested Transforms. [](start = 33, length = 35)

either add pipeline identification, or remove it, what's the point to repeat same phrase all the time? #Resolved

int transformK = 0;
foreach (var transform in pipeline.Transforms)
{
transformK += 1;
ch.Info($"Transform {transformK} : {transform.Transform.ToString()}");
Copy link
Contributor

@Ivanidzo4ka Ivanidzo4ka Jul 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.ToString() [](start = 83, length = 11)

i'm not sure you need to call ToString here. #Resolved

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks.


In reply to: 199591671 [](ancestors = 199591671)

}
}
}

return BatchCandidates;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ public DefaultsEngine(IHostEnvironment env, Arguments args)
_currentLearnerIndex = 0;
}

public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numCandidates)
public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numCandidates,
Dictionary<string, ColumnPurpose> columnPurpose = null)
{
var candidates = new List<PipelinePattern>();

Expand All @@ -53,7 +54,8 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern>

do
{ // Make sure transforms set is valid. Repeat until passes verifier.
pipeline = new PipelinePattern(SampleTransforms(out var transformsBitMask), learner, "", Env);
pipeline = new PipelinePattern(SampleTransforms(out var transformsBitMask, columnPurpose),
learner, "", Env);
valid = PipelineVerifier(pipeline, transformsBitMask);
count++;
} while (!valid && count <= 1000);
Expand All @@ -69,15 +71,16 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern>
return candidates.ToArray();
}

private TransformInference.SuggestedTransform[] SampleTransforms(out long transformsBitMask)
private TransformInference.SuggestedTransform[] SampleTransforms(out long transformsBitMask,
Dictionary<string, ColumnPurpose> columnPurpose = null)
{
// For now, return all transforms.
var sampledTransforms = AvailableTransforms.ToList();
transformsBitMask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray());

// Add final features concat transform.
sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms));
DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, columnPurpose));

return sampledTransforms.ToArray();
}
Expand Down
34 changes: 20 additions & 14 deletions src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ private void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLear
}

private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference.SuggestedRecipe.SuggestedLearner learner,
PipelinePattern[] history, out long transformsBitMask, bool uniformRandomSampling = false)
PipelinePattern[] history, out long transformsBitMask, bool uniformRandomSampling = false,
Dictionary<string, ColumnPurpose> columnPurpose = null)
{
var sampledTransforms =
new List<TransformInference.SuggestedTransform>(
Expand Down Expand Up @@ -187,7 +188,7 @@ private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference
// cause an error in verification, since it isn't included in the original
// dependency mapping (i.e., its level isn't in the dictionary).
sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms));
DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, columnPurpose));
transformsBitMask = mask;

return sampledTransforms.ToArray();
Expand All @@ -202,7 +203,8 @@ private RecipeInference.SuggestedRecipe.SuggestedLearner[] GetTopLearners(IEnume
.Select(t=>AvailableLearners[t.Index]).ToArray();
}

public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numCandidates)
public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numCandidates,
Dictionary<string, ColumnPurpose> columnPurpose = null)
{
var prevCandidates = history.ToArray();

Expand All @@ -220,10 +222,10 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern>
// number of candidates, using second stage logic.
UpdateLearners(GetTopLearners(prevCandidates));
_currentStage++;
return GetNextCandidates(prevCandidates, numCandidates);
return GetNextCandidates(prevCandidates, numCandidates, columnPurpose);
}
else
return GetInitialPipelines(prevCandidates, remainingNum);
return GetInitialPipelines(prevCandidates, remainingNum, columnPurpose);
case (int)Stages.Second:
// Second stage: Using top k learners, try random transform configurations.
var candidates = new List<PipelinePattern>();
Expand All @@ -233,7 +235,7 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern>

// Get second stage candidates.
if (numSecondStageCandidates > 0)
candidates.AddRange(NextCandidates(prevCandidates, numSecondStageCandidates, true, true));
candidates.AddRange(NextCandidates(prevCandidates, numSecondStageCandidates, true, true, columnPurpose));

// Update stage when no more second stage trials to sample.
if (_remainingSecondStageTrials < 1)
Expand All @@ -242,22 +244,25 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern>
// If the number of requested candidates is smaller than remaining second stage candidates,
// draw candidates from remaining pool.
if (numThirdStageCandidates > 0)
candidates.AddRange(NextCandidates(prevCandidates, numThirdStageCandidates));
candidates.AddRange(NextCandidates(prevCandidates, numThirdStageCandidates, false, false, columnPurpose));

return candidates.ToArray();
default:
// Sample transforms according to weights and use hyperparameter optimization method.
// Third stage samples hyperparameters uniform randomly in KDO, fourth and above do not.
return NextCandidates(prevCandidates, numCandidates);
return NextCandidates(prevCandidates, numCandidates, false, false, columnPurpose);
}
}

private PipelinePattern[] GetInitialPipelines(IEnumerable<PipelinePattern> history, int numCandidates) =>
_secondaryEngines[_randomInit ? nameof(UniformRandomEngine) : nameof(DefaultsEngine)]
.GetNextCandidates(history, numCandidates);
private PipelinePattern[] GetInitialPipelines(IEnumerable<PipelinePattern> history, int numCandidates, Dictionary<string, ColumnPurpose> columnPurpose)
{
var engine = _secondaryEngines[_randomInit ? nameof(UniformRandomEngine) : nameof(DefaultsEngine)];
return engine.GetNextCandidates(history, numCandidates, columnPurpose);
}

private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandidates,
bool defaultHyperParams = false, bool uniformRandomTransforms = false)
bool defaultHyperParams = false, bool uniformRandomTransforms = false,
Dictionary<string, ColumnPurpose> columnPurpose = null)
{
const int maxNumberAttempts = 10;
double[] learnerWeights = LearnerHistoryToWeights(history, IsMaximizingMetric);
Expand Down Expand Up @@ -294,8 +299,9 @@ private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandi
do
{ // Make sure transforms set is valid and have not seen pipeline before.
// Repeat until passes or runs out of chances.
pipeline = new PipelinePattern(SampleTransforms(learner, history,
out var transformsBitMask, uniformRandomTransforms), learner, "", Env);
pipeline = new PipelinePattern(
SampleTransforms(learner, history, out var transformsBitMask, uniformRandomTransforms, columnPurpose),
learner, "", Env);
hashKey = GetHashKey(transformsBitMask, learner);
valid = PipelineVerifier(pipeline, transformsBitMask) && !VisitedPipelines.Contains(hashKey);
count++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ public UniformRandomEngine(IHostEnvironment env)
: base(env, env.Register("UniformRandomEngine(AutoML)"))
{}

public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates)
public override PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates,
Dictionary<string, ColumnPurpose> columnPurpose = null)
Copy link
Contributor

@Ivanidzo4ka Ivanidzo4ka Jul 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

columnPurpose [](start = 46, length = 13)

any chance you can move them to base class PipelineOptimizerBase so you don't have to pass them around all the time? #Resolved

Copy link
Author

@ghost ghost Jul 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the engines, I have created a field in PipelineOptimizerBase. This reduces some clutter.


In reply to: 199593664 [](ancestors = 199593664)

{
return GetRandomPipelines(numberOfCandidates);
return GetRandomPipelines(numberOfCandidates, columnPurpose);
}

private PipelinePattern[] GetRandomPipelines(int numOfPipelines)
private PipelinePattern[] GetRandomPipelines(int numOfPipelines, Dictionary<string, ColumnPurpose> columnPurpose = null)
{
Host.Check(AvailableLearners.All(l => l.PipelineNode != null));
Host.Check(AvailableTransforms.All(t => t.PipelineNode != null));
Expand Down Expand Up @@ -66,7 +67,7 @@ private PipelinePattern[] GetRandomPipelines(int numOfPipelines)

// Always include features concat transform
selectedTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms));
DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms, columnPurpose));

// Compute hash key for checking if we've already seen this pipeline.
// However, if we keep missing, don't want to get stuck in infinite loop.
Expand Down
10 changes: 6 additions & 4 deletions src/Microsoft.ML.PipelineInference/AutoMlUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ public static long TransformsToBitmask(TransformInference.SuggestedTransform[] t
/// (In other words, if there would be nothing for that concatenate transform to do.)
/// </summary>
private static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHostEnvironment env,
IDataView dataSample, int[] excludedColumnIndices, int level, int atomicIdOffset)
IDataView dataSample, int[] excludedColumnIndices, int level, int atomicIdOffset,
Dictionary<string, ColumnPurpose> columnPurpose = null)
{
var finalArgs = new TransformInference.Arguments
{
Expand All @@ -266,7 +267,7 @@ private static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHo
ExcludedColumnIndices = excludedColumnIndices
};

var featuresConcatTransforms = TransformInference.InferConcatNumericFeatures(env, dataSample, finalArgs);
var featuresConcatTransforms = TransformInference.InferConcatNumericFeatures(env, dataSample, finalArgs, columnPurpose);

for (int i = 0; i < featuresConcatTransforms.Length; i++)
{
Expand All @@ -282,7 +283,8 @@ private static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHo
/// </summary>
public static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHostEnvironment env, IDataView data,
AutoInference.DependencyMap dependencyMapping, TransformInference.SuggestedTransform[] selectedTransforms,
TransformInference.SuggestedTransform[] allTransforms)
TransformInference.SuggestedTransform[] allTransforms,
Dictionary<string, ColumnPurpose> columnPurpose = null)
{
int level = 1;
int atomicGroupLimit = 0;
Expand All @@ -292,7 +294,7 @@ public static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHos
atomicGroupLimit = allTransforms.Max(t => t.AtomicGroupId) + 1;
}
var excludedColumnIndices = GetExcludedColumnIndices(selectedTransforms, data, dependencyMapping);
return GetFinalFeatureConcat(env, data, excludedColumnIndices, level, atomicGroupLimit);
return GetFinalFeatureConcat(env, data, excludedColumnIndices, level, atomicGroupLimit, columnPurpose);
}

public static IDataView ApplyTransformSet(IHostEnvironment env, IDataView data, TransformInference.SuggestedTransform[] transforms)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ namespace Microsoft.ML.Runtime.PipelineInference
/// </summary>
public interface IPipelineOptimizer
{
PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates);
PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates,
Dictionary<string, ColumnPurpose> columnPurpose = null);

void SetSpace(TransformInference.SuggestedTransform[] availableTransforms,
RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners,
Expand Down Expand Up @@ -60,7 +61,8 @@ protected PipelineOptimizerBase(IHostEnvironment env, IHost host)
ProbUtils = new SweeperProbabilityUtils(host);
}

public abstract PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates);
public abstract PipelinePattern[] GetNextCandidates(IEnumerable<PipelinePattern> history, int numberOfCandidates,
Dictionary<string, ColumnPurpose> columnPurpose = null);

public virtual void SetSpace(TransformInference.SuggestedTransform[] availableTransforms,
RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners,
Expand Down
Loading