diff --git a/Directory.Build.targets b/Directory.Build.targets
index 1ab549e60a..5e6446add9 100644
--- a/Directory.Build.targets
+++ b/Directory.Build.targets
@@ -5,5 +5,33 @@
Text="The tools directory [$(ToolsDir)] does not exist. Please run build in the root of the repo to ensure the tools are installed before attempting to build an individual project." />
+
+
+
+ lib
+ .dll
+ .so
+ .dylib
+
+
+
+
+ $(NativeOutputPath)$(LibPrefix)%(NativeAssemblyReference.Identity)$(LibExtension)
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/release-notes/0.3/release-0.3.md b/docs/release-notes/0.3/release-0.3.md
new file mode 100644
index 0000000000..6b88d37f58
--- /dev/null
+++ b/docs/release-notes/0.3/release-0.3.md
@@ -0,0 +1,114 @@
+# ML.NET 0.3 Release Notes
+
+Today we are releasing ML.NET 0.3. This release focuses on adding components
+to ML.NET from the internal codebase (such as Factorization Machines,
+LightGBM, Ensembles, and LightLDA), enabling export to the ONNX model format,
+and bug fixes.
+
+### Installation
+
+ML.NET supports Windows, MacOS, and Linux. See [supported OS versions of .NET
+Core
+2.0](https://github.com/dotnet/core/blob/master/release-notes/2.0/2.0-supported-os.md)
+for more details.
+
+You can install ML.NET NuGet from the CLI using:
+```
+dotnet add package Microsoft.ML
+```
+
+From package manager:
+```
+Install-Package Microsoft.ML
+```
+
+### Release Notes
+
+Below are some of the highlights from this release.
+
+* Added Field-Aware Factorization Machines (FFM) as a learner for binary
+ classification (#383)
+
+ * FFM is useful for various large sparse datasets, especially in areas
+ such as recommendations and click prediction. It has been used to win
+ various click prediction competitions such as the [Criteo Display
+ Advertising Challenge on
+ Kaggle](https://www.kaggle.com/c/criteo-display-ad-challenge). You can
+ learn more about the winning solution
+ [here](https://www.csie.ntu.edu.tw/~r01922136/kaggle-2014-criteo.pdf).
+ * FFM is a streaming learner so it does not require the entire dataset to
+ fit in memory.
+ * You can learn more about FFM
+ [here](http://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf) and some of the
+ speedup approaches that are used in ML.NET
+ [here](https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf).
+
+* Added [LightGBM](https://github.com/Microsoft/LightGBM) as a learner for
+ binary classification, multiclass classification, and regression (#392)
+
+ * LightGBM is a tree based gradient boosting machine. It is under the
+ umbrella of the [DMTK](http://github.com/microsoft/dmtk) project at
+ Microsoft.
+ * The LightGBM repository shows various [comparison
+ experiments](https://github.com/Microsoft/LightGBM/blob/6488f319f243f7ff679a8e388a33e758c5802303/docs/Experiments.rst#comparison-experiment)
+ that show good accuracy and speed, so it is a great learner to try out.
+ It has also been used in winning solutions in various [ML
+ challenges](https://github.com/Microsoft/LightGBM/blob/a6e878e2fc6e7f545921cbe337cc511fbd1f500d/examples/README.md).
+ * This addition wraps LightGBM and exposes it in ML.NET.
+ * Note that LightGBM can also be used for ranking, but the ranking
+ evaluator is not yet exposed in ML.NET.
+
+* Added Ensemble learners for binary classification, multiclass
+ classification, and regression (#379)
+
+ * [Ensemble learners](https://en.wikipedia.org/wiki/Ensemble_learning)
+ enable using multiple learners in one model. As an example, the Ensemble
+ learner could train both `FastTree` and `AveragedPerceptron` and average
+ their predictions to get the final prediction.
+ * Combining multiple models of similar statistical performance may lead to
+ better performance than each model separately.
+
+* Added LightLDA transform for topic modeling (#377)
+
+ * LightLDA is an implementation of [Latent Dirichlet
+ Allocation](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation)
+ which infers topical structure from text data.
+ * The implementation of LightLDA in ML.NET is based on [this
+ paper](https://arxiv.org/abs/1412.1576). There is a distributed
+ implementation of LightLDA
+ [here](https://github.com/Microsoft/lightlda).
+
+* Added One-Versus-All (OVA) learner for multiclass classification (#363)
+
+ * [OVA](https://en.wikipedia.org/wiki/Multiclass_classification#One-vs.-rest)
+ (sometimes known as One-Versus-Rest) is an approach to using binary
+ classifiers in multiclass classification problems.
+ * While some binary classification learners in ML.NET natively support
+ multiclass classification (e.g. Logistic Regression), there are others
+ that do not (e.g. Averaged Perceptron). OVA enables using the latter
+ group for multiclass classification as well.
+
+* Enabled export of ML.NET models to the [ONNX](https://onnx.ai/) format
+ (#248)
+
+ * ONNX is a common format for representing deep learning models (also
+ supporting certain other types of models) which enables developers to
+ move models between different ML toolkits.
+ * ONNX models can be used in [Windows
+ ML](https://docs.microsoft.com/en-us/windows/uwp/machine-learning/overview)
+ which enables evaluating models on Windows 10 devices and taking
+ advantage of capabilities like hardware acceleration.
+ * Currently, only a subset of ML.NET components can be used in a model
+ that is converted to ONNX.
+
+Additional issues closed in this milestone can be found
+[here](https://github.com/dotnet/machinelearning/milestone/2?closed=1).
+
+### Acknowledgements
+
+Shoutout to [pkulikov](https://github.com/pkulikov),
+[veikkoeeva](https://github.com/veikkoeeva),
+[ross-p-smith](https://github.com/ross-p-smith),
+[jwood803](https://github.com/jwood803),
+[Nepomuceno](https://github.com/Nepomuceno), and the ML.NET team for their
+contributions as part of this release!
diff --git a/src/Microsoft.ML.Console/Console.cs b/src/Microsoft.ML.Console/Console.cs
index 12e6254cce..152d65951a 100644
--- a/src/Microsoft.ML.Console/Console.cs
+++ b/src/Microsoft.ML.Console/Console.cs
@@ -8,4 +8,4 @@ public static class Console
{
public static int Main(string[] args) => Maml.Main(args);
}
-}
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.Console/Microsoft.ML.Console.csproj b/src/Microsoft.ML.Console/Microsoft.ML.Console.csproj
index 25c51de69f..1256bf75ba 100644
--- a/src/Microsoft.ML.Console/Microsoft.ML.Console.csproj
+++ b/src/Microsoft.ML.Console/Microsoft.ML.Console.csproj
@@ -3,17 +3,34 @@
true
CORECLR
- netcoreapp2.0
- Exe
- MML
- Microsoft.ML.Runtime.Tools.Console.Console
+ netcoreapp2.0
+ Exe
+ MML
+ Microsoft.ML.Runtime.Tools.Console.Console
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs
index 99cfec0dd9..2c4f877c1b 100644
--- a/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs
+++ b/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs
@@ -527,6 +527,11 @@ public sealed class EntryPointAttribute : Attribute
/// Short name of the Entry Point
///
public string ShortName { get; set; }
+
+ ///
+ /// Remarks on the Entry Point, for more extensive XML documentation on the C#API
+ ///
+ public string Remarks { get; set; }
}
///
diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs
index 498a75c9e5..af45202937 100644
--- a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs
+++ b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs
@@ -44,6 +44,7 @@ public sealed class EntryPointInfo
public readonly string Description;
public readonly string ShortName;
public readonly string FriendlyName;
+ public readonly string Remarks;
public readonly MethodInfo Method;
public readonly Type InputType;
public readonly Type OutputType;
@@ -63,6 +64,7 @@ internal EntryPointInfo(IExceptionContext ectx, MethodInfo method,
Method = method;
ShortName = attribute.ShortName;
FriendlyName = attribute.UserName;
+ Remarks = attribute.Remarks;
ObsoleteAttribute = obsoleteAttribute;
// There are supposed to be 2 parameters, env and input for non-macro nodes.
diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
index 4b2655ee5b..86edac082a 100644
--- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
+++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
@@ -653,10 +653,10 @@ public static void ReconcileKeyValuesWithNoNames(IHostEnvironment env, IDataView
ValueMapper mapper =
(ref uint src, ref uint dst) =>
{
- if (src == 0 || src > keyCount)
+ if (src > keyCount)
dst = 0;
else
- dst = src + 1;
+ dst = src;
};
views[i] = LambdaColumnMapper.Create(env, "ReconcileKeyValues", views[i], columnName, columnName,
views[i].Schema.GetColumnType(index), keyType, mapper);
@@ -866,7 +866,7 @@ private static IDataView AppendPerInstanceDataViews(IHostEnvironment env, string
}
else if (dvNumber == 0 && dv.Schema.HasKeyNames(i, type.KeyCount))
firstDvKeyWithNamesColumns.Add(name);
- else if (type.KeyCount > 0 && name != labelColName)
+ else if (type.KeyCount > 0 && name != labelColName && !dv.Schema.HasKeyNames(i, type.KeyCount))
{
// For any other key column (such as GroupId) we do not reconcile the key values, we only convert to U4.
if (!firstDvKeyNoNamesColumns.ContainsKey(name))
@@ -901,7 +901,7 @@ private static IDataView AppendPerInstanceDataViews(IHostEnvironment env, string
Func keyToValue =
(idv, i) =>
{
- foreach (var keyCol in firstDvVectorKeyColumns.Prepend(labelColName))
+ foreach (var keyCol in firstDvVectorKeyColumns.Concat(firstDvKeyWithNamesColumns).Prepend(labelColName))
{
if (keyCol == labelColName && labelColKeyValuesType == null)
continue;
diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs
index 654735c4b6..9f00fb70e0 100644
--- a/src/Microsoft.ML.FastTree/FastTree.cs
+++ b/src/Microsoft.ML.FastTree/FastTree.cs
@@ -82,6 +82,31 @@ public abstract class FastTreeTrainerBase :
protected string InnerArgs => CmdParser.GetSettings(Host, Args, new TArgs());
+ internal const string Remarks = @"
+FastTrees is an efficient implementation of the MART gradient boosting algorithm.
+Gradient boosting is a machine learning technique for regression problems.
+It builds each regression tree in a step-wise fashion, using a predefined loss function to measure the error for each step and corrects for it in the next.
+So this prediction model is actually an ensemble of weaker prediction models. In regression problems, boosting builds a series of of such trees in a step-wise fashion and then selects the optimal tree using an arbitrary differentiable loss function.
+
+
+MART learns an ensemble of regression trees, which is a decision tree with scalar values in its leaves.
+A decision (or regression) tree is a binary tree-like flow chart, where at each interior node one decides which of the two child nodes to continue to based on one of the feature values from the input.
+At each leaf node, a value is returned. In the interior nodes, the decision is based on the test 'x <= v' where x is the value of the feature in the input sample and v is one of the possible values of this feature.
+The functions that can be produced by a regression tree are all the piece-wise constant functions.
+
+
+The ensemble of trees is produced by computing, in each step, a regression tree that approximates the gradient of the loss function, and adding it to the previous tree with coefficients that minimize the loss of the new tree.
+The output of the ensemble produced by MART on a given instance is the sum of the tree outputs.
+
+
+- In case of a binary classification problem, the output is converted to a probability by using some form of calibration.
+- In case of a regression problem, the output is the predicted value of the function.
+- In case of a ranking problem, the instances are ordered by the output value of the ensemble.
+
+Wikipedia: Gradient boosting (Gradient tree boosting).
+Greedy function approximation: A gradient boosting machine..
+";
+
public override bool NeedNormalization => false;
public override bool WantCaching => false;
diff --git a/src/Microsoft.ML.FastTree/FastTreeClassification.cs b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
index 43409dadd3..edbdd47a03 100644
--- a/src/Microsoft.ML.FastTree/FastTreeClassification.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
@@ -338,7 +338,11 @@ public void AdjustTreeOutputs(IChannel ch, RegressionTree tree,
public static partial class FastTree
{
- [TlcModule.EntryPoint(Name = "Trainers.FastTreeBinaryClassifier", Desc = FastTreeBinaryClassificationTrainer.Summary, UserName = FastTreeBinaryClassificationTrainer.UserNameValue, ShortName = FastTreeBinaryClassificationTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.FastTreeBinaryClassifier",
+ Desc = FastTreeBinaryClassificationTrainer.Summary,
+ Remarks = FastTreeBinaryClassificationTrainer.Remarks,
+ UserName = FastTreeBinaryClassificationTrainer.UserNameValue,
+ ShortName = FastTreeBinaryClassificationTrainer.ShortName)]
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, FastTreeBinaryClassificationTrainer.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/FastTreeRanking.cs b/src/Microsoft.ML.FastTree/FastTreeRanking.cs
index 2263d2541e..a689408748 100644
--- a/src/Microsoft.ML.FastTree/FastTreeRanking.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeRanking.cs
@@ -1096,7 +1096,11 @@ public static FastTreeRankingPredictor Create(IHostEnvironment env, ModelLoadCon
public static partial class FastTree
{
- [TlcModule.EntryPoint(Name = "Trainers.FastTreeRanker", Desc = FastTreeRankingTrainer.Summary, UserName = FastTreeRankingTrainer.UserNameValue, ShortName = FastTreeRankingTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.FastTreeRanker",
+ Desc = FastTreeRankingTrainer.Summary,
+ Remarks = FastTreeRankingTrainer.Remarks,
+ UserName = FastTreeRankingTrainer.UserNameValue,
+ ShortName = FastTreeRankingTrainer.ShortName)]
public static CommonOutputs.RankingOutput TrainRanking(IHostEnvironment env, FastTreeRankingTrainer.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/FastTreeRegression.cs b/src/Microsoft.ML.FastTree/FastTreeRegression.cs
index ae7f4cfdbd..40ee906b5b 100644
--- a/src/Microsoft.ML.FastTree/FastTreeRegression.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeRegression.cs
@@ -448,7 +448,11 @@ public static FastTreeRegressionPredictor Create(IHostEnvironment env, ModelLoad
public static partial class FastTree
{
- [TlcModule.EntryPoint(Name = "Trainers.FastTreeRegressor", Desc = FastTreeRegressionTrainer.Summary, UserName = FastTreeRegressionTrainer.UserNameValue, ShortName = FastTreeRegressionTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.FastTreeRegressor",
+ Desc = FastTreeRegressionTrainer.Summary,
+ Remarks = FastTreeRegressionTrainer.Remarks,
+ UserName = FastTreeRegressionTrainer.UserNameValue,
+ ShortName = FastTreeRegressionTrainer.ShortName)]
public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, FastTreeRegressionTrainer.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/FastTreeTweedie.cs b/src/Microsoft.ML.FastTree/FastTreeTweedie.cs
index 19a026df20..00c29afd94 100644
--- a/src/Microsoft.ML.FastTree/FastTreeTweedie.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeTweedie.cs
@@ -36,8 +36,11 @@ public sealed partial class FastTreeTweedieTrainer : BoostingFastTreeTrainerBase
{
public const string LoadNameValue = "FastTreeTweedieRegression";
public const string UserNameValue = "FastTree (Boosted Trees) Tweedie Regression";
- public const string Summary = "Trains gradient boosted decision trees to fit target values using a Tweedie loss function. This learner " +
- "is a generalization of Poisson, compound Poisson, and gamma regression.";
+ public const string Summary = "Trains gradient boosted decision trees to fit target values using a Tweedie loss function. This learner is a generalization of Poisson, compound Poisson, and gamma regression.";
+ new public const string Remarks = @"
+Wikipedia: Gradient boosting (Gradient tree boosting)
+Greedy function approximation: A gradient boosting machine
+";
public const string ShortName = "fttweedie";
@@ -460,7 +463,10 @@ protected override void Map(ref VBuffer src, ref float dst)
public static partial class FastTree
{
- [TlcModule.EntryPoint(Name = "Trainers.FastTreeTweedieRegressor", Desc = FastTreeTweedieTrainer.Summary, UserName = FastTreeTweedieTrainer.UserNameValue, ShortName = FastTreeTweedieTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.FastTreeTweedieRegressor",
+ Desc = FastTreeTweedieTrainer.Summary,
+ UserName = FastTreeTweedieTrainer.UserNameValue,
+ ShortName = FastTreeTweedieTrainer.ShortName)]
public static CommonOutputs.RegressionOutput TrainTweedieRegression(IHostEnvironment env, FastTreeTweedieTrainer.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/RandomForest.cs b/src/Microsoft.ML.FastTree/RandomForest.cs
index 88676754d5..2f670539b4 100644
--- a/src/Microsoft.ML.FastTree/RandomForest.cs
+++ b/src/Microsoft.ML.FastTree/RandomForest.cs
@@ -12,6 +12,28 @@ public abstract class RandomForestTrainerBase : FastTreeTrain
where TArgs : FastForestArgumentsBase, new()
where TPredictor : IPredictorProducing
{
+ new internal const string Remarks = @"
+Decision trees are non-parametric models that perform a sequence of simple tests on inputs.
+This decision procedure maps them to outputs found in the training dataset whose inputs were similar to the instance being processed.
+A decision is made at each node of the binary tree data structure based on a measure of similarity that maps each instance recursively through the branches of the tree until the appropriate leaf node is reached and the output decision returned.
+Decision trees have several advantages:
+
+- They are efficient in both computation and memory usage during training and prediction.
+- They can represent non-linear decision boundaries.
+- They perform integrated feature selection and classification.
+- They are resilient in the presence of noisy features.
+
+Fast forest is a random forest implementation.
+The model consists of an ensemble of decision trees. Each tree in a decision forest outputs a Gaussian distribution by way of prediction.
+An aggregation is performed over the ensemble of trees to find a Gaussian distribution closest to the combined distribution for all trees in the model.
+This decision forest classifier consists of an ensemble of decision trees.
+Generally, ensemble models provide better coverage and accuracy than single decision trees.
+Each tree in a decision forest outputs a Gaussian distribution.
+Wikipedia: Random forest
+Quantile regression forest
+From Stumps to Trees to Forests
+";
+
private readonly bool _quantileEnabled;
protected RandomForestTrainerBase(IHostEnvironment env, TArgs args, bool quantileEnabled = false)
diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs
index 54a05d5b11..e085996747 100644
--- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs
+++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs
@@ -208,7 +208,11 @@ protected override void GetGradientInOneQuery(int query, int threadIndex)
public static partial class FastForest
{
- [TlcModule.EntryPoint(Name = "Trainers.FastForestBinaryClassifier", Desc = FastForestClassification.Summary, UserName = FastForestClassification.UserNameValue, ShortName = FastForestClassification.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.FastForestBinaryClassifier",
+ Desc = FastForestClassification.Summary,
+ Remarks = FastForestClassification.Remarks,
+ UserName = FastForestClassification.UserNameValue,
+ ShortName = FastForestClassification.ShortName)]
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, FastForestClassification.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.FastTree/RandomForestRegression.cs b/src/Microsoft.ML.FastTree/RandomForestRegression.cs
index 3fd97afb32..74bf8c2a1c 100644
--- a/src/Microsoft.ML.FastTree/RandomForestRegression.cs
+++ b/src/Microsoft.ML.FastTree/RandomForestRegression.cs
@@ -280,7 +280,11 @@ public BasicImpl(Dataset trainData, Arguments args)
public static partial class FastForest
{
- [TlcModule.EntryPoint(Name = "Trainers.FastForestRegressor", Desc = FastForestRegression.Summary, UserName = FastForestRegression.LoadNameValue, ShortName = FastForestRegression.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.FastForestRegressor",
+ Desc = FastForestRegression.Summary,
+ Remarks = FastForestRegression.Remarks,
+ UserName = FastForestRegression.LoadNameValue,
+ ShortName = FastForestRegression.ShortName)]
public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, FastForestRegression.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs
index 1f09ec850f..3e47c595cd 100644
--- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs
+++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs
@@ -36,6 +36,14 @@ public class KMeansPlusPlusTrainer : TrainerBase
+K-means++ improves upon K-means by using the Yinyang K-Means method for choosing the initial cluster centers.
+YYK-Means accelerates K-Means up to an order of magnitude while producing exactly the same clustering results (modulo floating point precision issues).
+YYK-Means observes that there is a lot of redundancy across iterations in the KMeans algorithms and most points do not change their clusters during an iteration.
+It uses various bounding techniques to identify this redundancy and eliminate many distance computations and optimize centroid computations.
+K-means.
+K-means++
+";
public enum InitAlgorithm
{
@@ -225,7 +233,11 @@ private static int ComputeNumThreads(IHost host, int? argNumThreads)
return Math.Max(1, maxThreads);
}
- [TlcModule.EntryPoint(Name = "Trainers.KMeansPlusPlusClusterer", Desc = KMeansPlusPlusTrainer.Summary, UserName = UserNameValue, ShortName = ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.KMeansPlusPlusClusterer",
+ Desc = Summary,
+ Remarks = Remarks,
+ UserName = UserNameValue,
+ ShortName = ShortName)]
public static CommonOutputs.ClusteringOutput TrainKMeans(IHostEnvironment env, Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs
index e6c5ed360a..0612135ce3 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs
@@ -32,7 +32,7 @@ public interface ISupportBoosterParameterFactory : IComponentFactory res);
+ void UpdateParameters(Dictionary res);
}
///
@@ -54,7 +54,7 @@ protected BoosterParameter(TArgs args)
///
/// Update the parameters by specific Booster, will update parameters into "res" directly.
///
- public virtual void UpdateParameters(Dictionary res)
+ public virtual void UpdateParameters(Dictionary res)
{
FieldInfo[] fields = Args.GetType().GetFields();
foreach (var field in fields)
@@ -163,7 +163,7 @@ public TreeBooster(Arguments args)
Contracts.CheckUserArg(Args.ScalePosWeight > 0 && Args.ScalePosWeight <= 1, nameof(Args.ScalePosWeight), "must be in (0,1].");
}
- public override void UpdateParameters(Dictionary res)
+ public override void UpdateParameters(Dictionary res)
{
base.UpdateParameters(res);
res["boosting_type"] = Name;
@@ -207,7 +207,7 @@ public DartBooster(Arguments args)
Contracts.CheckUserArg(Args.SkipDrop >= 0 && Args.SkipDrop < 1, nameof(Args.SkipDrop), "must be in [0,1).");
}
- public override void UpdateParameters(Dictionary res)
+ public override void UpdateParameters(Dictionary res)
{
base.UpdateParameters(res);
res["boosting_type"] = Name;
@@ -244,7 +244,7 @@ public GossBooster(Arguments args)
Contracts.Check(Args.TopRate + Args.OtherRate <= 1, "Sum of topRate and otherRate cannot be larger than 1.");
}
- public override void UpdateParameters(Dictionary res)
+ public override void UpdateParameters(Dictionary res)
{
base.UpdateParameters(res);
res["boosting_type"] = Name;
@@ -355,11 +355,11 @@ public enum EvalMetricType
[Argument(ArgumentType.Multiple, HelpText = "Parallel LightGBM Learning Algorithm", ShortName = "parag")]
public ISupportParallel ParallelTrainer = new SingleTrainerFactory();
- internal Dictionary ToDictionary(IHost host)
+ internal Dictionary ToDictionary(IHost host)
{
Contracts.CheckValue(host, nameof(host));
Contracts.CheckUserArg(MaxBin > 0, nameof(MaxBin), "must be > 0.");
- Dictionary res = new Dictionary();
+ Dictionary res = new Dictionary();
var boosterParams = Booster.CreateComponent(host);
boosterParams.UpdateParameters(res);
diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
index 9cecb0da6b..0b71bfa70e 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
@@ -13,7 +13,7 @@
[assembly: LoadableClass(LightGbmBinaryTrainer.Summary, typeof(LightGbmBinaryTrainer), typeof(LightGbmArguments),
new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureTreeEnsembleTrainer) },
- "LightGBM Binary Classification", LightGbmBinaryTrainer.LoadNameValue, LightGbmBinaryTrainer.ShortName, DocName = "trainer/LightGBM.md")]
+ LightGbmBinaryTrainer.UserName, LightGbmBinaryTrainer.LoadNameValue, LightGbmBinaryTrainer.ShortName, DocName = "trainer/LightGBM.md")]
[assembly: LoadableClass(typeof(IPredictorProducing), typeof(LightGbmBinaryPredictor), null, typeof(SignatureLoadModel),
"LightGBM Binary Executor",
@@ -27,6 +27,7 @@ public sealed class LightGbmBinaryPredictor : FastTreePredictionWrapper
{
public const string LoaderSignature = "LightGBMBinaryExec";
public const string RegistrationName = "LightGBMBinaryPredictor";
+
private static VersionInfo GetVersionInfo()
{
// REVIEW: can we decouple the version from FastTree predictor version ?
@@ -82,9 +83,10 @@ public static IPredictorProducing Create(IHostEnvironment env, ModelLoadC
public sealed class LightGbmBinaryTrainer : LightGbmTrainerBase>
{
- public const string Summary = "LightGBM Binary Classifier";
- public const string LoadNameValue = "LightGBMBinary";
- public const string ShortName = "LightGBM";
+ internal const string UserName = "LightGBM Binary Classifier";
+ internal const string LoadNameValue = "LightGBMBinary";
+ internal const string ShortName = "LightGBM";
+ internal const string Summary = "Train a LightGBM binary classification model.";
public LightGbmBinaryTrainer(IHostEnvironment env, LightGbmArguments args)
: base(env, args, PredictionKind.BinaryClassification, "LGBBINCL")
@@ -122,14 +124,15 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role
}
///
- /// A component to train an LightGBM model.
+ /// A component to train a LightGBM model.
///
public static partial class LightGbm
{
[TlcModule.EntryPoint(
Name = "Trainers.LightGbmBinaryClassifier",
- Desc = "Train an LightGBM binary class model",
- UserName = LightGbmBinaryTrainer.Summary,
+ Desc = LightGbmBinaryTrainer.Summary,
+ Remarks = LightGbmBinaryTrainer.Remarks,
+ UserName = LightGbmBinaryTrainer.UserName,
ShortName = LightGbmBinaryTrainer.ShortName)]
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, LightGbmArguments input)
{
diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
index 5acd90e83d..479be65bec 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.
using System;
+using System.Globalization;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
@@ -130,9 +131,9 @@ protected override void ConvertNaNLabels(IChannel ch, RoleMappedData data, float
protected override void GetDefaultParameters(IChannel ch, int numRow, bool hasCategorical, int totalCats, bool hiddenMsg=false)
{
base.GetDefaultParameters(ch, numRow, hasCategorical, totalCats, true);
- int numLeaves = int.Parse(Options["num_leaves"]);
+ int numLeaves = (int)Options["num_leaves"];
int minDataPerLeaf = Args.MinDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, _numClass);
- Options["min_data_per_leaf"] = minDataPerLeaf.ToString();
+ Options["min_data_per_leaf"] = minDataPerLeaf;
if (!hiddenMsg)
{
if (!Args.LearningRate.HasValue)
@@ -149,7 +150,7 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role
Host.AssertValue(ch);
ch.Assert(PredictionKind == PredictionKind.MultiClassClassification);
ch.Assert(_numClass > 1);
- Options["num_class"] = _numClass.ToString();
+ Options["num_class"] = _numClass;
bool useSoftmax = false;
if (Args.UseSoftmax.HasValue)
@@ -174,13 +175,14 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role
}
///
- /// A component to train an LightGBM model.
+ /// A component to train a LightGBM model.
///
public static partial class LightGbm
{
[TlcModule.EntryPoint(
Name = "Trainers.LightGbmClassifier",
- Desc = "Train an LightGBM multi class model",
+ Desc = "Train a LightGBM multi class model.",
+ Remarks = LightGbmMulticlassTrainer.Remarks,
UserName = LightGbmMulticlassTrainer.Summary,
ShortName = LightGbmMulticlassTrainer.ShortName)]
public static CommonOutputs.MulticlassClassificationOutput TrainMultiClass(IHostEnvironment env, LightGbmArguments input)
diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
index b2b82fbd24..2ed436b4eb 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs
@@ -10,7 +10,7 @@
using Microsoft.ML.Runtime.LightGBM;
using Microsoft.ML.Runtime.Model;
-[assembly: LoadableClass(LightGbmRankingTrainer.Summary, typeof(LightGbmRankingTrainer), typeof(LightGbmArguments),
+[assembly: LoadableClass(LightGbmRankingTrainer.UserName, typeof(LightGbmRankingTrainer), typeof(LightGbmArguments),
new[] { typeof(SignatureRankerTrainer), typeof(SignatureTrainer), typeof(SignatureTreeEnsembleTrainer) },
"LightGBM Ranking", LightGbmRankingTrainer.LoadNameValue, LightGbmRankingTrainer.ShortName, DocName = "trainer/LightGBM.md")]
@@ -73,7 +73,7 @@ public static LightGbmRankingPredictor Create(IHostEnvironment env, ModelLoadCon
public sealed class LightGbmRankingTrainer : LightGbmTrainerBase
{
- public const string Summary = "LightGBM Ranking";
+ public const string UserName = "LightGBM Ranking";
public const string LoadNameValue = "LightGBMRanking";
public const string ShortName = "LightGBMRank";
@@ -123,11 +123,15 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role
}
///
- /// A component to train an LightGBM model.
+ /// A component to train a LightGBM model.
///
public static partial class LightGbm
{
- [TlcModule.EntryPoint(Name = "Trainers.LightGbmRanker", Desc = "Train an LightGBM ranking model", UserName = LightGbmRankingTrainer.Summary, ShortName = LightGbmRankingTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.LightGbmRanker",
+ Remarks = LightGbmMulticlassTrainer.Remarks,
+ Desc = "Train a LightGBM ranking model.",
+ UserName = LightGbmRankingTrainer.UserName,
+ ShortName = LightGbmRankingTrainer.ShortName)]
public static CommonOutputs.RankingOutput TrainRanking(IHostEnvironment env, LightGbmArguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
index 5024009f98..36c82aa79a 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
@@ -116,11 +116,15 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role
}
///
- /// A component to train an LightGBM model.
+ /// A component to train a LightGBM model.
///
public static partial class LightGbm
{
- [TlcModule.EntryPoint(Name = "Trainers.LightGbmRegressor", Desc = LightGbmRegressorTrainer.Summary, UserName = LightGbmRegressorTrainer.UserNameValue, ShortName = LightGbmRegressorTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.LightGbmRegressor",
+ Desc = LightGbmRegressorTrainer.Summary,
+ Remarks = LightGbmRegressorTrainer.Remarks,
+ UserName = LightGbmRegressorTrainer.UserNameValue,
+ ShortName = LightGbmRegressorTrainer.ShortName)]
public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, LightGbmArguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs
index aff3befa0d..a93fa2ad60 100644
--- a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs
+++ b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs
@@ -3,14 +3,10 @@
// See the LICENSE file in the project root for more information.
using System;
-using System.Linq;
-using System.Threading;
-using System.Threading.Tasks;
using System.Collections.Generic;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Training;
-using Microsoft.ML.Runtime.FastTree.Internal;
namespace Microsoft.ML.Runtime.LightGBM
{
@@ -49,13 +45,22 @@ private sealed class CategoricalMetaData
protected readonly IHost Host;
protected readonly LightGbmArguments Args;
- protected readonly Dictionary Options;
+
+ ///
+ /// Stores argumments as objects to convert them to invariant string type in the end so that
+ /// the code is culture agnostic. When retrieving key value from this dictionary as string
+ /// please convert to string invariant by string.Format(CultureInfo.InvariantCulture, "{0}", Option[key]).
+ ///
+ protected readonly Dictionary Options;
protected readonly IParallel ParallelTraining;
// Store _featureCount and _trainedEnsemble to construct predictor.
protected int FeatureCount;
protected FastTree.Internal.Ensemble TrainedEnsemble;
+ internal const string Remarks = @"Light GBM is an open source implementation of boosted trees.
+GitHub: LightGBM";
+
#endregion
protected LightGbmTrainerBase(IHostEnvironment env, LightGbmArguments args, PredictionKind predictionKind, string name)
@@ -159,9 +164,9 @@ protected virtual void GetDefaultParameters(IChannel ch, int numRow, bool hasCat
double learningRate = Args.LearningRate ?? DefaultLearningRate(numRow, hasCategarical, totalCats);
int numLeaves = Args.NumLeaves ?? DefaultNumLeaves(numRow, hasCategarical, totalCats);
int minDataPerLeaf = Args.MinDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, 1);
- Options["learning_rate"] = learningRate.ToString();
- Options["num_leaves"] = numLeaves.ToString();
- Options["min_data_per_leaf"] = minDataPerLeaf.ToString();
+ Options["learning_rate"] = learningRate;
+ Options["num_leaves"] = numLeaves;
+ Options["min_data_per_leaf"] = minDataPerLeaf;
if (!hiddenMsg)
{
if (!Args.LearningRate.HasValue)
@@ -192,7 +197,7 @@ private static List GetCategoricalBoundires(int[] categoricalFeatures, int
{
if (j < categoricalFeatures.Length && curFidx == categoricalFeatures[j])
{
- if (curFidx > catBoundaries.Last())
+ if (curFidx > catBoundaries[catBoundaries.Count - 1])
catBoundaries.Add(curFidx);
if (categoricalFeatures[j + 1] - categoricalFeatures[j] >= 0)
{
@@ -219,7 +224,7 @@ private static List GetCategoricalBoundires(int[] categoricalFeatures, int
private static List ConstructCategoricalFeatureMetaData(int[] categoricalFeatures, int rawNumCol, ref CategoricalMetaData catMetaData)
{
List catBoundaries = GetCategoricalBoundires(categoricalFeatures, rawNumCol);
- catMetaData.NumCol = catBoundaries.Count() - 1;
+ catMetaData.NumCol = catBoundaries.Count - 1;
catMetaData.CategoricalBoudaries = catBoundaries.ToArray();
catMetaData.IsCategoricalFeature = new bool[catMetaData.NumCol];
catMetaData.OnehotIndices = new int[rawNumCol];
@@ -279,7 +284,7 @@ private CategoricalMetaData GetCategoricalMetaData(IChannel ch, RoleMappedData t
{
var catIndices = ConstructCategoricalFeatureMetaData(categoricalFeatures, rawNumCol, ref catMetaData);
// Set categorical features
- Options["categorical_feature"] = String.Join(",", catIndices);
+ Options["categorical_feature"] = string.Join(",", catIndices);
}
return catMetaData;
}
@@ -527,13 +532,13 @@ private void GetFeatureValueSparse(IChannel ch, FloatLabelCursor cursor,
++nhot;
var prob = rand.NextSingle();
if (prob < 1.0f / nhot)
- values[values.Count() - 1] = fv;
+ values[values.Count - 1] = fv;
}
lastIdx = newColIdx;
}
indices = featureIndices.ToArray();
featureValues = values.ToArray();
- cnt = featureIndices.Count();
+ cnt = featureIndices.Count;
}
else
{
diff --git a/src/Microsoft.ML.LightGBM/WrappedLightGbmBooster.cs b/src/Microsoft.ML.LightGBM/WrappedLightGbmBooster.cs
index 116fd1c4a9..c9f3128434 100644
--- a/src/Microsoft.ML.LightGBM/WrappedLightGbmBooster.cs
+++ b/src/Microsoft.ML.LightGBM/WrappedLightGbmBooster.cs
@@ -20,7 +20,7 @@ internal sealed class Booster : IDisposable
public IntPtr Handle { get; private set; }
public int BestIteration { get; set; }
- public Booster(Dictionary parameters, Dataset trainset, Dataset validset = null)
+ public Booster(Dictionary parameters, Dataset trainset, Dataset validset = null)
{
var param = LightGbmInterfaceUtils.JoinParameters(parameters);
var handle = IntPtr.Zero;
diff --git a/src/Microsoft.ML.LightGBM/WrappedLightGbmInterface.cs b/src/Microsoft.ML.LightGBM/WrappedLightGbmInterface.cs
index 6d5e13bbeb..eec00d9bd1 100644
--- a/src/Microsoft.ML.LightGBM/WrappedLightGbmInterface.cs
+++ b/src/Microsoft.ML.LightGBM/WrappedLightGbmInterface.cs
@@ -3,8 +3,9 @@
// See the LICENSE file in the project root for more information.
using System;
-using System.Runtime.InteropServices;
using System.Collections.Generic;
+using System.Globalization;
+using System.Runtime.InteropServices;
namespace Microsoft.ML.Runtime.LightGBM
{
@@ -199,13 +200,13 @@ public static void Check(int res)
///
/// Join the parameters to key=value format.
///
- public static string JoinParameters(Dictionary parameters)
+ public static string JoinParameters(Dictionary parameters)
{
if (parameters == null)
return "";
List res = new List();
foreach (var keyVal in parameters)
- res.Add(keyVal.Key + "=" + keyVal.Value);
+ res.Add(keyVal.Key + "=" + string.Format(CultureInfo.InvariantCulture, "{0}", keyVal.Value));
return string.Join(" ", res);
}
diff --git a/src/Microsoft.ML.LightGBM/WrappedLightGbmTraining.cs b/src/Microsoft.ML.LightGBM/WrappedLightGbmTraining.cs
index 9699581a13..8b2036fb11 100644
--- a/src/Microsoft.ML.LightGBM/WrappedLightGbmTraining.cs
+++ b/src/Microsoft.ML.LightGBM/WrappedLightGbmTraining.cs
@@ -16,7 +16,7 @@ internal static class WrappedLightGbmTraining
/// Train and return a booster.
///
public static Booster Train(IChannel ch, IProgressChannel pch,
- Dictionary parameters, Dataset dtrain, Dataset dvalid = null, int numIteration = 100,
+ Dictionary parameters, Dataset dtrain, Dataset dvalid = null, int numIteration = 100,
bool verboseEval = true, int earlyStoppingRound = 0)
{
// create Booster.
@@ -33,12 +33,9 @@ public static Booster Train(IChannel ch, IProgressChannel pch,
double bestScore = double.MaxValue;
double factorToSmallerBetter = 1.0;
- if (earlyStoppingRound > 0 && (parameters["metric"] == "auc"
- || parameters["metric"] == "ndcg"
- || parameters["metric"] == "map"))
- {
+ var metric = (string)parameters["metric"];
+ if (earlyStoppingRound > 0 && (metric == "auc" || metric == "ndcg" || metric == "map"))
factorToSmallerBetter = -1.0;
- }
const int evalFreq = 50;
diff --git a/src/Microsoft.ML.PCA/PcaTrainer.cs b/src/Microsoft.ML.PCA/PcaTrainer.cs
index 0945b04041..6a6efd1ea6 100644
--- a/src/Microsoft.ML.PCA/PcaTrainer.cs
+++ b/src/Microsoft.ML.PCA/PcaTrainer.cs
@@ -284,7 +284,11 @@ private static void PostProcess(VBuffer[] y, Float[] sigma, Float[] z, in
}
}
- [TlcModule.EntryPoint(Name = "Trainers.PcaAnomalyDetector", Desc = "Train an PCA Anomaly model.", UserName = UserNameValue, ShortName = ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.PcaAnomalyDetector",
+ Desc = "Train an PCA Anomaly model.",
+ Remarks = PcaPredictor.Remarks,
+ UserName = UserNameValue,
+ ShortName = ShortName)]
public static CommonOutputs.AnomalyDetectionOutput TrainPcaAnomaly(IHostEnvironment env, Arguments input)
{
Contracts.CheckValue(env, nameof(env));
@@ -312,6 +316,14 @@ public sealed class PcaPredictor : PredictorBase,
{
public const string LoaderSignature = "pcaAnomExec";
public const string RegistrationName = "PCAPredictor";
+ internal const string Remarks = @"
+Principle Component Analysis (PCA) is a dimensionality-reduction transform which computes the projection of the feature vector to onto a low-rank subspace.
+Its training is done using the technique described in the paper: Combining Structured and Unstructured Randomness in Large Scale PCA,
+and the paper Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions
+Randomized Methods for Computing the Singular Value Decomposition (SVD) of very large matrices
+A randomized algorithm for principal component analysis
+Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions
+";
private static VersionInfo GetVersionInfo()
{
diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs
index 87d72471d9..08190acb74 100644
--- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs
+++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs
@@ -39,6 +39,15 @@ public sealed class FieldAwareFactorizationMachineTrainer : TrainerBase
+Field Aware Factorization Machines use, in addition to the input variables, factorized parameters to model the interaction between pairs of variables.
+The algorithm is particularly useful for high dimensional datasets which can be very sparse (e.g. click-prediction for advertising systems).
+An advantage of FFM over SVMs is that the training data does not need to be stored in memory, and the coefficients can be optimized directly.
+Field Aware Factorization Machines
+Field-aware Factorization Machines for CTR Prediction
+Adaptive Subgradient Methods for Online Learning and Stochastic Optimization
+An Improved Stochastic Gradient Method for Training Large-scale Field-aware Factorization Machine.
+";
public sealed class Arguments : LearnerInputBaseWithLabel
{
@@ -404,7 +413,11 @@ public override FieldAwareFactorizationMachinePredictor CreatePredictor()
return _pred;
}
- [TlcModule.EntryPoint(Name = "Trainers.FieldAwareFactorizationMachineBinaryClassifier", Desc = FieldAwareFactorizationMachineTrainer.Summary, UserName = FieldAwareFactorizationMachineTrainer.UserName, ShortName = FieldAwareFactorizationMachineTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.FieldAwareFactorizationMachineBinaryClassifier",
+ Desc = Summary,
+ Remarks = Remarks,
+ UserName = UserName,
+ ShortName = ShortName)]
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs
index 56a3663054..93a7cc2b32 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs
@@ -222,6 +222,26 @@ internal virtual void Check(IHostEnvironment env)
}
}
+ internal const string Remarks = @"
+This classifier is a trainer based on the Stochastic DualCoordinate Ascent(SDCA) method, a state-of-the-art optimization technique for convex objective functions.
+The algorithm can be scaled for use on large out-of-memory data sets due to a semi-asynchronized implementation
+that supports multi-threading.
+
+Convergence is underwritten by periodically enforcing synchronization between primal and dual updates in a separate thread.
+Several choices of loss functions are also provided.
+The SDCA method combines several of the best properties and capabilities of logistic regression and SVM algorithms.
+
+
+Note that SDCA is a stochastic and streaming optimization algorithm.
+The results depends on the order of the training data. For reproducible results, it is recommended that one sets to
+False and to 1.
+Elastic net regularization can be specified by the and parameters. Note that the has an effect on the rate of convergence.
+In general, the larger the , the faster SDCA converges.
+
+Scaling Up Stochastic Dual Coordinate Ascent.
+Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization.
+";
+
// The order of these matter, since they are used as indices into arrays.
protected enum MetricKind
{
@@ -1775,7 +1795,11 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm
///
public static partial class Sdca
{
- [TlcModule.EntryPoint(Name = "Trainers.StochasticDualCoordinateAscentBinaryClassifier", Desc = "Train an SDCA binary model.", UserName = LinearClassificationTrainer.UserNameValue, ShortName = LinearClassificationTrainer.LoadNameValue)]
+ [TlcModule.EntryPoint(Name = "Trainers.StochasticDualCoordinateAscentBinaryClassifier",
+ Desc = "Train an SDCA binary model.",
+ Remarks = LinearClassificationTrainer.Remarks,
+ UserName = LinearClassificationTrainer.UserNameValue,
+ ShortName = LinearClassificationTrainer.LoadNameValue)]
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, LinearClassificationTrainer.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs
index 89f4866228..95982047aa 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs
@@ -94,6 +94,35 @@ public abstract class ArgumentsBase : LearnerInputBaseWithWeight
public bool EnforceNonNegativity = false;
}
+ internal const string Remarks = @"
+If the dependent variable has more than two possible values (blood type given diagnostic test results), then the logistic regression is multinomial.
+
+The optimization technique used for LogisticRegressionBinaryClassifier is the limited memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS).
+Both the L-BFGS and regular BFGS algorithms use quasi-Newtonian methods to estimate the computationally intensive Hessian matrix in the equation used by Newton's method to calculate steps.
+But the L-BFGS approximation uses only a limited amount of memory to compute the next step direction,
+so that it is especially suited for problems with a large number of variables.
+The MemorySize parameter specifies the number of past positions and gradients to store for use in the computation of the next step.
+
+
+This learner can use elastic net regularization: a linear combination of L1 (lasso) and L2 (ridge) regularizations.
+Regularization is a method that can render an ill-posed problem more tractable by imposing constraints that provide information to supplement the data and that prevents overfitting by penalizing models with extreme coefficient values.
+This can improve the generalization of the model learned by selecting the optimal complexity in the bias-variance tradeoff.
+Regularization works by adding the penalty that is associated with coefficient values to the error of the hypothesis.
+An accurate model with extreme coefficient values would be penalized more, but a less accurate model with more conservative values would be penalized less. L1 and L2 regularization have different effects and uses that are complementary in certain respects.
+
+- L1Weight: can be applied to sparse models, when working with high-dimensional data.
+It pulls small weights associated features that are relatively unimportant towards 0.
+- L2Weight: is preferable for data that is not sparse. It pulls large weights towards zero.
+
+Adding the ridge penalty to the regularization overcomes some of lasso's limitations. It can improve its predictive accuracy, for example, when the number of predictors is greater than the sample size. If x = l1_weight and y = l2_weight, ax + by = c defines the linear span of the regularization terms.
+The default values of x and y are both 1.
+An agressive regularization can harm predictive capacity by excluding important variables out of the model. So choosing the optimal values for the regularization parameters is important for the performance of the logistic regression model.
+Scalable Training of L1-Regularized Log-Linear Models.
+Test Run - L1 and L2 Regularization for Machine Learning.
+Wikipedia: L-BFGS.
+Wikipedia: Logistic regression.
+";
+
protected int NumFeatures;
protected VBuffer CurrentWeights;
protected long NumGoodRows;
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
index 5abc062bf7..0e85cd6712 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs
@@ -386,7 +386,11 @@ public override ParameterMixingCalibratedPredictor CreatePredictor()
new PlattCalibrator(Host, -1, 0));
}
- [TlcModule.EntryPoint(Name = "Trainers.LogisticRegressionBinaryClassifier", Desc = "Train a logistic regression binary model", UserName = UserNameValue, ShortName = ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.LogisticRegressionBinaryClassifier",
+ Desc = Summary,
+ Remarks = Remarks,
+ UserName = UserNameValue,
+ ShortName = ShortName)]
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
index 51decafea5..f2fad63794 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
@@ -961,7 +961,11 @@ public IRow GetStatsIRowOrNull(RoleMappedSchema schema)
///
public partial class LogisticRegression
{
- [TlcModule.EntryPoint(Name = "Trainers.LogisticRegressionClassifier", Desc = "Train a logistic regression multi class model", UserName = MulticlassLogisticRegression.UserNameValue, ShortName = MulticlassLogisticRegression.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.LogisticRegressionClassifier",
+ Desc = Summary,
+ Remarks = MulticlassLogisticRegression.Remarks,
+ UserName = MulticlassLogisticRegression.UserNameValue,
+ ShortName = MulticlassLogisticRegression.ShortName)]
public static CommonOutputs.MulticlassClassificationOutput TrainMultiClass(IHostEnvironment env, MulticlassLogisticRegression.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs
index a06b54fc26..239392b085 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs
@@ -32,6 +32,12 @@ public sealed class MultiClassNaiveBayesTrainer : TrainerBase
+Naive Bayes is a probabilistic classifier that can be used for multiclass problems.
+Using Bayes' theorem, the conditional probability for a sample belonging to a class can be calculated based on the sample count for each feature combination groups.
+However, Naive Bayes Classifier is feasible only if the number of features and the values each feature can take is relatively small.
+It also assumes that the features are strictly independent.
+";
public sealed class Arguments : LearnerInputBaseWithLabel
{
@@ -124,7 +130,9 @@ public override MultiClassNaiveBayesPredictor CreatePredictor()
return _predictor;
}
- [TlcModule.EntryPoint(Name = "Trainers.NaiveBayesClassifier", Desc = "Train a MultiClassNaiveBayesTrainer.", UserName = UserName, ShortName = ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.NaiveBayesClassifier",
+ Desc = "Train a MultiClassNaiveBayesTrainer.",
+ UserName = UserName, ShortName = ShortName)]
public static CommonOutputs.MulticlassClassificationOutput TrainMultiClassNaiveBayesTrainer(IHostEnvironment env, Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/OlsLinearRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/OlsLinearRegression.cs
index 7ea557159e..db271ff858 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/OlsLinearRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/OlsLinearRegression.cs
@@ -51,6 +51,11 @@ public sealed class Arguments : LearnerInputBaseWithWeight
public const string ShortName = "ols";
internal const string Summary = "The ordinary least square regression fits the target function as a linear function of the numerical features "
+ "that minimizes the square loss function.";
+ internal const string Remarks = @"
+Ordinary least squares (OLS) is a parameterized regression method.
+It assumes that the conditional mean of the dependent variable follows a linear function of the dependent variables.
+By minimizing the squares of the difference between observed values and the predictions, the parameters of the regressor can be estimated.
+";
private VBuffer _weights;
private Float _bias;
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index 1861821d1c..138b3f0485 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -36,7 +36,26 @@ public sealed class AveragedPerceptronTrainer :
public const string LoadNameValue = "AveragedPerceptron";
internal const string UserNameValue = "Averaged Perceptron";
internal const string ShortName = "ap";
- internal const string Summary = "Perceptron is a binary classification algorithm that makes its predictions based on a linear function.";
+ internal const string Summary = "Averaged Perceptron Binary Classifier.";
+ internal const string Remarks = @"
+Perceptron is a classification algorithm that makes its predictions based on a linear function.
+I.e., for an instance with feature values f0, f1,..., f_D-1, , the prediction is given by the sign of sigma[0,D-1] ( w_i * f_i), where w_0, w_1,...,w_D-1 are the weights computed by the algorithm.
+
+Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time.
+The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed.
+If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs,
+the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
+multiplied by a factor 0 < a <= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
+and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
+
+
+In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored,
+together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not).
+The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors.
+
+Wikipedia entry for Perceptron
+Large Margin Classification Using the Perceptron Algorithm
+";
public class Arguments : AveragedLinearArguments
{
@@ -91,7 +110,11 @@ public override LinearBinaryPredictor CreatePredictor()
return new LinearBinaryPredictor(Host, ref weights, bias);
}
- [TlcModule.EntryPoint(Name = "Trainers.AveragedPerceptronBinaryClassifier", Desc = "Train a Average perceptron.", UserName = UserNameValue, ShortName = ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.AveragedPerceptronBinaryClassifier",
+ Desc = Summary,
+ Remarks = Remarks,
+ UserName = UserNameValue,
+ ShortName = ShortName)]
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
index 6910267759..1af080a76b 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
@@ -32,8 +32,13 @@ public sealed class OnlineGradientDescentTrainer : AveragedLinearTrainer
+Stochastic gradient descent uses a simple yet efficient iterative technique to fit model coefficients using error gradients for convex loss functions.
+The OnlineGradientDescentRegressor implements the standard (non-batch) SGD, with a choice of loss functions,
+and an option to update the weight vector using the average of the vectors seen over time (averaged argument is set to True by default).
+";
public sealed class Arguments : AveragedLinearArguments
{
@@ -89,7 +94,11 @@ public override TPredictor CreatePredictor()
return new LinearRegressionPredictor(Host, ref weights, bias);
}
- [TlcModule.EntryPoint(Name = "Trainers.OnlineGradientDescentRegressor", Desc = "Train a Online gradient descent perceptron.", UserName = UserNameValue, ShortName = OnlineGradientDescentTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.OnlineGradientDescentRegressor",
+ Desc = "Train a Online gradient descent perceptron.",
+ Remarks = Remarks,
+ UserName = UserNameValue,
+ ShortName = ShortName)]
public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs
index a3ef06cf4e..c5ad4b4495 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs
@@ -32,6 +32,11 @@ public sealed class PoissonRegression : LbfgsTrainerBase
+Poisson regression is a parameterized regression method.
+It assumes that the log of the conditional mean of the dependent variable follows a linear function of the dependent variables.
+Assuming that the dependent variable follows a Poisson distribution, the parameters of the regressor can be estimated by maximizing the likelihood of the obtained observations.
+";
public sealed class Arguments : ArgumentsBase
{
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
index f8d7db7998..f0a250e4b8 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
@@ -386,7 +386,11 @@ protected override Float GetInstanceWeight(FloatLabelCursor cursor)
///
public static partial class Sdca
{
- [TlcModule.EntryPoint(Name = "Trainers.StochasticDualCoordinateAscentClassifier", Desc = "Train an SDCA multi class model", UserName = SdcaMultiClassTrainer.UserNameValue, ShortName = SdcaMultiClassTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.StochasticDualCoordinateAscentClassifier",
+ Desc = SdcaMultiClassTrainer.Summary,
+ Remarks = SdcaMultiClassTrainer.Remarks,
+ UserName = SdcaMultiClassTrainer.UserNameValue,
+ ShortName = SdcaMultiClassTrainer.ShortName)]
public static CommonOutputs.MulticlassClassificationOutput TrainMultiClass(IHostEnvironment env, SdcaMultiClassTrainer.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
index e8f5aeb04d..55a021ebb7 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
@@ -131,7 +131,11 @@ protected override Float TuneDefaultL2(IChannel ch, int maxIterations, long rowC
///
public static partial class Sdca
{
- [TlcModule.EntryPoint(Name = "Trainers.StochasticDualCoordinateAscentRegressor", Desc = "Train an SDCA regression model", UserName = SdcaRegressionTrainer.UserNameValue, ShortName = SdcaRegressionTrainer.ShortName)]
+ [TlcModule.EntryPoint(Name = "Trainers.StochasticDualCoordinateAscentRegressor",
+ Desc = SdcaRegressionTrainer.Summary,
+ Remarks = SdcaRegressionTrainer.Remarks,
+ UserName = SdcaRegressionTrainer.UserNameValue,
+ ShortName = SdcaRegressionTrainer.ShortName)]
public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, SdcaRegressionTrainer.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
index 1267f634cd..588e0a86f1 100644
--- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
@@ -27,8 +27,21 @@
namespace Microsoft.ML.Runtime.TextAnalytics
{
///
- /// The latent Dirichlet allocation (LDA) transform.
- /// http://arxiv.org/abs/1412.1576
+ /// LightLDA transform: Big Topic Models on Modest Compute Clusters.
+ /// LightLDA is an implementation of Latent Dirichlet Allocation (LDA).
+ /// Previous implementations of LDA such as SparseLDA or AliasLDA allow to achieve massive data and model scales,
+ /// for example models with tens of billions of parameters to be inferred from billions of documents.
+ /// However this requires using a cluster of thousands of machines with all ensuing costs to setup and maintain.
+ /// LightLDA solves this problem in a more cost-effective manner by providing an implementation
+ /// that is efficient enough for modest clusters with at most tens of machines...
+ /// For more details please see original LightLDA paper:
+ /// http://arxiv.org/abs/1412.1576
+ /// http://www.www2015.it/documents/proceedings/proceedings/p1351.pdf
+ /// and open source implementation:
+ /// https://github.com/Microsoft/LightLDA
+ ///
+ /// See
+ /// for an example on how to use LdaTransform.
///
public sealed class LdaTransform : OneToOneTransformBase
{
diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs
index 12402b3993..6819f81543 100644
--- a/src/Microsoft.ML/CSharpApi.cs
+++ b/src/Microsoft.ML/CSharpApi.cs
@@ -4081,8 +4081,27 @@ namespace Trainers
{
///
- /// Train a Average perceptron.
+ /// Averaged Perceptron Binary Classifier.
///
+ ///
+ /// Perceptron is a classification algorithm that makes its predictions based on a linear function.
+ /// I.e., for an instance with feature values f0, f1,..., f_D-1, , the prediction is given by the sign of sigma[0,D-1] ( w_i * f_i), where w_0, w_1,...,w_D-1 are the weights computed by the algorithm.
+ ///
+ /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time.
+ /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed.
+ /// If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs,
+ /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
+ /// multiplied by a factor 0 < a <= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
+ /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
+ ///
+ ///
+ /// In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored,
+ /// together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not).
+ /// The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors.
+ ///
+ /// Wikipedia entry for Perceptron
+ /// Large Margin Classification Using the Perceptron Algorithm
+ ///
public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -4589,6 +4608,27 @@ public enum Bundle : byte
///
/// Uses a random forest learner to perform binary classification.
///
+ ///
+ /// Decision trees are non-parametric models that perform a sequence of simple tests on inputs.
+ /// This decision procedure maps them to outputs found in the training dataset whose inputs were similar to the instance being processed.
+ /// A decision is made at each node of the binary tree data structure based on a measure of similarity that maps each instance recursively through the branches of the tree until the appropriate leaf node is reached and the output decision returned.
+ /// Decision trees have several advantages:
+ ///
+ /// - They are efficient in both computation and memory usage during training and prediction.
+ /// - They can represent non-linear decision boundaries.
+ /// - They perform integrated feature selection and classification.
+ /// - They are resilient in the presence of noisy features.
+ ///
+ /// Fast forest is a random forest implementation.
+ /// The model consists of an ensemble of decision trees. Each tree in a decision forest outputs a Gaussian distribution by way of prediction.
+ /// An aggregation is performed over the ensemble of trees to find a Gaussian distribution closest to the combined distribution for all trees in the model.
+ /// This decision forest classifier consists of an ensemble of decision trees.
+ /// Generally, ensemble models provide better coverage and accuracy than single decision trees.
+ /// Each tree in a decision forest outputs a Gaussian distribution.
+ /// Wikipedia: Random forest
+ /// Quantile regression forest
+ /// From Stumps to Trees to Forests
+ ///
public sealed partial class FastForestBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -4882,6 +4922,27 @@ namespace Trainers
///
/// Trains a random forest to fit target values using least-squares.
///
+ ///
+ /// Decision trees are non-parametric models that perform a sequence of simple tests on inputs.
+ /// This decision procedure maps them to outputs found in the training dataset whose inputs were similar to the instance being processed.
+ /// A decision is made at each node of the binary tree data structure based on a measure of similarity that maps each instance recursively through the branches of the tree until the appropriate leaf node is reached and the output decision returned.
+ /// Decision trees have several advantages:
+ ///
+ /// - They are efficient in both computation and memory usage during training and prediction.
+ /// - They can represent non-linear decision boundaries.
+ /// - They perform integrated feature selection and classification.
+ /// - They are resilient in the presence of noisy features.
+ ///
+ /// Fast forest is a random forest implementation.
+ /// The model consists of an ensemble of decision trees. Each tree in a decision forest outputs a Gaussian distribution by way of prediction.
+ /// An aggregation is performed over the ensemble of trees to find a Gaussian distribution closest to the combined distribution for all trees in the model.
+ /// This decision forest classifier consists of an ensemble of decision trees.
+ /// Generally, ensemble models provide better coverage and accuracy than single decision trees.
+ /// Each tree in a decision forest outputs a Gaussian distribution.
+ /// Wikipedia: Random forest
+ /// Quantile regression forest
+ /// From Stumps to Trees to Forests
+ ///
public sealed partial class FastForestRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -5171,6 +5232,30 @@ public enum BoostedTreeArgsOptimizationAlgorithmType
///
/// Uses a logit-boost boosted tree learner to perform binary classification.
///
+ ///
+ /// FastTrees is an efficient implementation of the MART gradient boosting algorithm.
+ /// Gradient boosting is a machine learning technique for regression problems.
+ /// It builds each regression tree in a step-wise fashion, using a predefined loss function to measure the error for each step and corrects for it in the next.
+ /// So this prediction model is actually an ensemble of weaker prediction models. In regression problems, boosting builds a series of of such trees in a step-wise fashion and then selects the optimal tree using an arbitrary differentiable loss function.
+ ///
+ ///
+ /// MART learns an ensemble of regression trees, which is a decision tree with scalar values in its leaves.
+ /// A decision (or regression) tree is a binary tree-like flow chart, where at each interior node one decides which of the two child nodes to continue to based on one of the feature values from the input.
+ /// At each leaf node, a value is returned. In the interior nodes, the decision is based on the test 'x <= v' where x is the value of the feature in the input sample and v is one of the possible values of this feature.
+ /// The functions that can be produced by a regression tree are all the piece-wise constant functions.
+ ///
+ ///
+ /// The ensemble of trees is produced by computing, in each step, a regression tree that approximates the gradient of the loss function, and adding it to the previous tree with coefficients that minimize the loss of the new tree.
+ /// The output of the ensemble produced by MART on a given instance is the sum of the tree outputs.
+ ///
+ ///
+ /// - In case of a binary classification problem, the output is converted to a probability by using some form of calibration.
+ /// - In case of a regression problem, the output is the predicted value of the function.
+ /// - In case of a ranking problem, the instances are ordered by the output value of the ensemble.
+ ///
+ /// Wikipedia: Gradient boosting (Gradient tree boosting).
+ /// Greedy function approximation: A gradient boosting machine..
+ ///
public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -5562,6 +5647,30 @@ namespace Trainers
///
/// Trains gradient boosted decision trees to the LambdaRank quasi-gradient.
///
+ ///
+ /// FastTrees is an efficient implementation of the MART gradient boosting algorithm.
+ /// Gradient boosting is a machine learning technique for regression problems.
+ /// It builds each regression tree in a step-wise fashion, using a predefined loss function to measure the error for each step and corrects for it in the next.
+ /// So this prediction model is actually an ensemble of weaker prediction models. In regression problems, boosting builds a series of of such trees in a step-wise fashion and then selects the optimal tree using an arbitrary differentiable loss function.
+ ///
+ ///
+ /// MART learns an ensemble of regression trees, which is a decision tree with scalar values in its leaves.
+ /// A decision (or regression) tree is a binary tree-like flow chart, where at each interior node one decides which of the two child nodes to continue to based on one of the feature values from the input.
+ /// At each leaf node, a value is returned. In the interior nodes, the decision is based on the test 'x <= v' where x is the value of the feature in the input sample and v is one of the possible values of this feature.
+ /// The functions that can be produced by a regression tree are all the piece-wise constant functions.
+ ///
+ ///
+ /// The ensemble of trees is produced by computing, in each step, a regression tree that approximates the gradient of the loss function, and adding it to the previous tree with coefficients that minimize the loss of the new tree.
+ /// The output of the ensemble produced by MART on a given instance is the sum of the tree outputs.
+ ///
+ ///
+ /// - In case of a binary classification problem, the output is converted to a probability by using some form of calibration.
+ /// - In case of a regression problem, the output is the predicted value of the function.
+ /// - In case of a ranking problem, the instances are ordered by the output value of the ensemble.
+ ///
+ /// Wikipedia: Gradient boosting (Gradient tree boosting).
+ /// Greedy function approximation: A gradient boosting machine..
+ ///
public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -5988,6 +6097,30 @@ namespace Trainers
///
/// Trains gradient boosted decision trees to fit target values using least-squares.
///
+ ///
+ /// FastTrees is an efficient implementation of the MART gradient boosting algorithm.
+ /// Gradient boosting is a machine learning technique for regression problems.
+ /// It builds each regression tree in a step-wise fashion, using a predefined loss function to measure the error for each step and corrects for it in the next.
+ /// So this prediction model is actually an ensemble of weaker prediction models. In regression problems, boosting builds a series of of such trees in a step-wise fashion and then selects the optimal tree using an arbitrary differentiable loss function.
+ ///
+ ///
+ /// MART learns an ensemble of regression trees, which is a decision tree with scalar values in its leaves.
+ /// A decision (or regression) tree is a binary tree-like flow chart, where at each interior node one decides which of the two child nodes to continue to based on one of the feature values from the input.
+ /// At each leaf node, a value is returned. In the interior nodes, the decision is based on the test 'x <= v' where x is the value of the feature in the input sample and v is one of the possible values of this feature.
+ /// The functions that can be produced by a regression tree are all the piece-wise constant functions.
+ ///
+ ///
+ /// The ensemble of trees is produced by computing, in each step, a regression tree that approximates the gradient of the loss function, and adding it to the previous tree with coefficients that minimize the loss of the new tree.
+ /// The output of the ensemble produced by MART on a given instance is the sum of the tree outputs.
+ ///
+ ///
+ /// - In case of a binary classification problem, the output is converted to a probability by using some form of calibration.
+ /// - In case of a regression problem, the output is the predicted value of the function.
+ /// - In case of a ranking problem, the instances are ordered by the output value of the ensemble.
+ ///
+ /// Wikipedia: Gradient boosting (Gradient tree boosting).
+ /// Greedy function approximation: A gradient boosting machine..
+ ///
public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -6765,6 +6898,15 @@ namespace Trainers
///
/// Train a field-aware factorization machine for binary classification
///
+ ///
+ /// Field Aware Factorization Machines use, in addition to the input variables, factorized parameters to model the interaction between pairs of variables.
+ /// The algorithm is particularly useful for high dimensional datasets which can be very sparse (e.g. click-prediction for advertising systems).
+ /// An advantage of FFM over SVMs is that the training data does not need to be stored in memory, and the coefficients can be optimized directly.
+ /// Field Aware Factorization Machines
+ /// Field-aware Factorization Machines for CTR Prediction
+ /// Adaptive Subgradient Methods for Online Learning and Stochastic Optimization
+ /// An Improved Stochastic Gradient Method for Training Large-scale Field-aware Factorization Machine.
+ ///
public sealed partial class FieldAwareFactorizationMachineBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -7194,6 +7336,14 @@ public enum KMeansPlusPlusTrainerInitAlgorithm
///
/// K-means is a popular clustering algorithm. With K-means, the data is clustered into a specified number of clusters in order to minimize the within-cluster sum of squares. K-means++ improves upon K-means by using a better method for choosing the initial cluster centers.
///
+ ///
+ /// K-means++ improves upon K-means by using the Yinyang K-Means method for choosing the initial cluster centers.
+ /// YYK-Means accelerates K-Means up to an order of magnitude while producing exactly the same clustering results (modulo floating point precision issues).
+ /// YYK-Means observes that there is a lot of redundancy across iterations in the KMeans algorithms and most points do not change their clusters during an iteration.
+ /// It uses various bounding techniques to identify this redundancy and eliminate many distance computations and optimize centroid computations.
+ /// K-means.
+ /// K-means++
+ ///
public sealed partial class KMeansPlusPlusClusterer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.IUnsupervisedTrainerWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -7310,8 +7460,10 @@ public enum LightGbmArgumentsEvalMetricType
///
- /// Train an LightGBM binary class model
+ /// Train a LightGBM binary classification model.
///
+ /// Light GBM is an open source implementation of boosted trees.
+ /// GitHub: LightGBM
public sealed partial class LightGbmBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -7515,8 +7667,10 @@ namespace Trainers
{
///
- /// Train an LightGBM multi class model
+ /// Train a LightGBM multi class model.
///
+ /// Light GBM is an open source implementation of boosted trees.
+ /// GitHub: LightGBM
public sealed partial class LightGbmClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -7720,8 +7874,10 @@ namespace Trainers
{
///
- /// Train an LightGBM ranking model
+ /// Train a LightGBM ranking model.
///
+ /// Light GBM is an open source implementation of boosted trees.
+ /// GitHub: LightGBM
public sealed partial class LightGbmRanker : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -7927,6 +8083,8 @@ namespace Trainers
///
/// LightGBM Regression
///
+ /// Light GBM is an open source implementation of boosted trees.
+ /// GitHub: LightGBM
public sealed partial class LightGbmRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithGroupId, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -8265,8 +8423,36 @@ namespace Trainers
{
///
- /// Train a logistic regression binary model
+ /// Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function.
///
+ ///
+ /// If the dependent variable has more than two possible values (blood type given diagnostic test results), then the logistic regression is multinomial.
+ ///
+ /// The optimization technique used for LogisticRegressionBinaryClassifier is the limited memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS).
+ /// Both the L-BFGS and regular BFGS algorithms use quasi-Newtonian methods to estimate the computationally intensive Hessian matrix in the equation used by Newton's method to calculate steps.
+ /// But the L-BFGS approximation uses only a limited amount of memory to compute the next step direction,
+ /// so that it is especially suited for problems with a large number of variables.
+ /// The MemorySize parameter specifies the number of past positions and gradients to store for use in the computation of the next step.
+ ///
+ ///
+ /// This learner can use elastic net regularization: a linear combination of L1 (lasso) and L2 (ridge) regularizations.
+ /// Regularization is a method that can render an ill-posed problem more tractable by imposing constraints that provide information to supplement the data and that prevents overfitting by penalizing models with extreme coefficient values.
+ /// This can improve the generalization of the model learned by selecting the optimal complexity in the bias-variance tradeoff.
+ /// Regularization works by adding the penalty that is associated with coefficient values to the error of the hypothesis.
+ /// An accurate model with extreme coefficient values would be penalized more, but a less accurate model with more conservative values would be penalized less. L1 and L2 regularization have different effects and uses that are complementary in certain respects.
+ ///
+ /// - L1Weight: can be applied to sparse models, when working with high-dimensional data.
+ /// It pulls small weights associated features that are relatively unimportant towards 0.
+ /// - L2Weight: is preferable for data that is not sparse. It pulls large weights towards zero.
+ ///
+ /// Adding the ridge penalty to the regularization overcomes some of lasso's limitations. It can improve its predictive accuracy, for example, when the number of predictors is greater than the sample size. If x = l1_weight and y = l2_weight, ax + by = c defines the linear span of the regularization terms.
+ /// The default values of x and y are both 1.
+ /// An agressive regularization can harm predictive capacity by excluding important variables out of the model. So choosing the optimal values for the regularization parameters is important for the performance of the logistic regression model.
+ /// Scalable Training of L1-Regularized Log-Linear Models.
+ /// Test Run - L1 and L2 Regularization for Machine Learning.
+ /// Wikipedia: L-BFGS.
+ /// Wikipedia: Logistic regression.
+ ///
public sealed partial class LogisticRegressionBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -8415,8 +8601,36 @@ namespace Trainers
{
///
- /// Train a logistic regression multi class model
+ /// Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function.
///
+ ///
+ /// If the dependent variable has more than two possible values (blood type given diagnostic test results), then the logistic regression is multinomial.
+ ///
+ /// The optimization technique used for LogisticRegressionBinaryClassifier is the limited memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS).
+ /// Both the L-BFGS and regular BFGS algorithms use quasi-Newtonian methods to estimate the computationally intensive Hessian matrix in the equation used by Newton's method to calculate steps.
+ /// But the L-BFGS approximation uses only a limited amount of memory to compute the next step direction,
+ /// so that it is especially suited for problems with a large number of variables.
+ /// The MemorySize parameter specifies the number of past positions and gradients to store for use in the computation of the next step.
+ ///
+ ///
+ /// This learner can use elastic net regularization: a linear combination of L1 (lasso) and L2 (ridge) regularizations.
+ /// Regularization is a method that can render an ill-posed problem more tractable by imposing constraints that provide information to supplement the data and that prevents overfitting by penalizing models with extreme coefficient values.
+ /// This can improve the generalization of the model learned by selecting the optimal complexity in the bias-variance tradeoff.
+ /// Regularization works by adding the penalty that is associated with coefficient values to the error of the hypothesis.
+ /// An accurate model with extreme coefficient values would be penalized more, but a less accurate model with more conservative values would be penalized less. L1 and L2 regularization have different effects and uses that are complementary in certain respects.
+ ///
+ /// - L1Weight: can be applied to sparse models, when working with high-dimensional data.
+ /// It pulls small weights associated features that are relatively unimportant towards 0.
+ /// - L2Weight: is preferable for data that is not sparse. It pulls large weights towards zero.
+ ///
+ /// Adding the ridge penalty to the regularization overcomes some of lasso's limitations. It can improve its predictive accuracy, for example, when the number of predictors is greater than the sample size. If x = l1_weight and y = l2_weight, ax + by = c defines the linear span of the regularization terms.
+ /// The default values of x and y are both 1.
+ /// An agressive regularization can harm predictive capacity by excluding important variables out of the model. So choosing the optimal values for the regularization parameters is important for the performance of the logistic regression model.
+ /// Scalable Training of L1-Regularized Log-Linear Models.
+ /// Test Run - L1 and L2 Regularization for Machine Learning.
+ /// Wikipedia: L-BFGS.
+ /// Wikipedia: Logistic regression.
+ ///
public sealed partial class LogisticRegressionClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -8640,6 +8854,11 @@ namespace Trainers
///
/// Train a Online gradient descent perceptron.
///
+ ///
+ /// Stochastic gradient descent uses a simple yet efficient iterative technique to fit model coefficients using error gradients for convex loss functions.
+ /// The OnlineGradientDescentRegressor implements the standard (non-batch) SGD, with a choice of loss functions,
+ /// and an option to update the weight vector using the average of the vectors seen over time (averaged argument is set to True by default).
+ ///
public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -8795,6 +9014,14 @@ namespace Trainers
///
/// Train an PCA Anomaly model.
///
+ ///
+ /// Principle Component Analysis (PCA) is a dimensionality-reduction transform which computes the projection of the feature vector to onto a low-rank subspace.
+ /// Its training is done using the technique described in the paper: Combining Structured and Unstructured Randomness in Large Scale PCA,
+ /// and the paper Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions
+ /// Randomized Methods for Computing the Singular Value Decomposition (SVD) of very large matrices
+ /// A randomized algorithm for principal component analysis
+ /// Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions
+ ///
public sealed partial class PcaAnomalyDetector : Microsoft.ML.Runtime.EntryPoints.CommonInputs.IUnsupervisedTrainerWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -9036,6 +9263,25 @@ namespace Trainers
///
/// Train an SDCA binary model.
///
+ ///
+ /// This classifier is a trainer based on the Stochastic DualCoordinate Ascent(SDCA) method, a state-of-the-art optimization technique for convex objective functions.
+ /// The algorithm can be scaled for use on large out-of-memory data sets due to a semi-asynchronized implementation
+ /// that supports multi-threading.
+ ///
+ /// Convergence is underwritten by periodically enforcing synchronization between primal and dual updates in a separate thread.
+ /// Several choices of loss functions are also provided.
+ /// The SDCA method combines several of the best properties and capabilities of logistic regression and SVM algorithms.
+ ///
+ ///
+ /// Note that SDCA is a stochastic and streaming optimization algorithm.
+ /// The results depends on the order of the training data. For reproducible results, it is recommended that one sets to
+ /// False and to 1.
+ /// Elastic net regularization can be specified by the and parameters. Note that the has an effect on the rate of convergence.
+ /// In general, the larger the , the faster SDCA converges.
+ ///
+ /// Scaling Up Stochastic Dual Coordinate Ascent.
+ /// Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization.
+ ///
public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -9175,8 +9421,27 @@ namespace Trainers
{
///
- /// Train an SDCA multi class model
+ /// The SDCA linear multi-class classification trainer.
///
+ ///
+ /// This classifier is a trainer based on the Stochastic DualCoordinate Ascent(SDCA) method, a state-of-the-art optimization technique for convex objective functions.
+ /// The algorithm can be scaled for use on large out-of-memory data sets due to a semi-asynchronized implementation
+ /// that supports multi-threading.
+ ///
+ /// Convergence is underwritten by periodically enforcing synchronization between primal and dual updates in a separate thread.
+ /// Several choices of loss functions are also provided.
+ /// The SDCA method combines several of the best properties and capabilities of logistic regression and SVM algorithms.
+ ///
+ ///
+ /// Note that SDCA is a stochastic and streaming optimization algorithm.
+ /// The results depends on the order of the training data. For reproducible results, it is recommended that one sets to
+ /// False and to 1.
+ /// Elastic net regularization can be specified by the and parameters. Note that the has an effect on the rate of convergence.
+ /// In general, the larger the , the faster SDCA converges.
+ ///
+ /// Scaling Up Stochastic Dual Coordinate Ascent.
+ /// Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization.
+ ///
public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
@@ -9300,8 +9565,27 @@ namespace Trainers
{
///
- /// Train an SDCA regression model
+ /// The SDCA linear regression trainer.
///
+ ///
+ /// This classifier is a trainer based on the Stochastic DualCoordinate Ascent(SDCA) method, a state-of-the-art optimization technique for convex objective functions.
+ /// The algorithm can be scaled for use on large out-of-memory data sets due to a semi-asynchronized implementation
+ /// that supports multi-threading.
+ ///
+ /// Convergence is underwritten by periodically enforcing synchronization between primal and dual updates in a separate thread.
+ /// Several choices of loss functions are also provided.
+ /// The SDCA method combines several of the best properties and capabilities of logistic regression and SVM algorithms.
+ ///
+ ///
+ /// Note that SDCA is a stochastic and streaming optimization algorithm.
+ /// The results depends on the order of the training data. For reproducible results, it is recommended that one sets to
+ /// False and to 1.
+ /// Elastic net regularization can be specified by the and parameters. Note that the has an effect on the rate of convergence.
+ /// In general, the larger the , the faster SDCA converges.
+ ///
+ /// Scaling Up Stochastic Dual Coordinate Ascent.
+ /// Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization.
+ ///
public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{
diff --git a/src/Microsoft.ML/Models/OneVersusAll.cs b/src/Microsoft.ML/Models/OneVersusAll.cs
index 2f0a265dad..b4317cc1fe 100644
--- a/src/Microsoft.ML/Models/OneVersusAll.cs
+++ b/src/Microsoft.ML/Models/OneVersusAll.cs
@@ -13,10 +13,26 @@ namespace Microsoft.ML.Models
public sealed partial class OneVersusAll
{
///
- /// Create OneVersusAll multiclass trainer.
+ /// One-versus-all, OvA, learner (also known as One-vs.-rest, "OvR") is a multi-class learner
+ /// with the strategy to fit one binary classifier per class in the dataset.
+ /// It trains the provided binary classifier for each class against the other classes, where the current
+ /// class is treated as the positive labels and examples in other classes are treated as the negative classes.
+ /// See wikipedia page.
///
+ ///
+ /// In order to use it all you need to do is add it to pipeline as regular learner:
+ ///
+ /// pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier()));
+ ///
+ ///
+ /// The base trainer must be a binary classifier. To check the available binary classifiers, type BinaryClassifiers,
+ /// and look at the available binary learners as suggested by IntelliSense.
+ ///
/// Underlying binary trainer
- /// "Use probabilities (vs. raw outputs) to identify top-score category
+ /// "Use probabilities (vs. raw outputs) to identify top-score category.
+ /// By specifying it to false, you can tell One-versus-all to not use the probabilities but instead
+ /// the raw uncalibrated scores from each predictor. This is generally not recommended, since these quantities
+ /// are not meant to be comparable from one predictor to another, unlike calibrated probabilities.
public static ILearningPipelineItem With(ITrainerInputWithLabel trainer, bool useProbabilities = true)
{
return new OvaPipelineItem(trainer, useProbabilities);
diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs
index db7c1d490d..29884fe620 100644
--- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs
+++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs
@@ -382,7 +382,7 @@ private void GenerateInput(IndentingTextWriter writer, ModuleCatalog.EntryPointI
GenerateEnums(writer, entryPointInfo.InputType, _defaultNamespace + entryPointMetadata.Namespace);
writer.WriteLine();
GenerateClasses(writer, entryPointInfo.InputType, catalog, _defaultNamespace + entryPointMetadata.Namespace);
- CSharpGeneratorUtils.GenerateSummary(writer, entryPointInfo.Description);
+ CSharpGeneratorUtils.GenerateSummary(writer, entryPointInfo.Description, entryPointInfo.Remarks);
if (entryPointInfo.ObsoleteAttribute != null)
writer.WriteLine($"[Obsolete(\"{entryPointInfo.ObsoleteAttribute.Message}\")]");
diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpGeneratorUtils.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpGeneratorUtils.cs
index cca73a21f9..1cab5cc35c 100644
--- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpGeneratorUtils.cs
+++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpGeneratorUtils.cs
@@ -349,7 +349,7 @@ public static string GetComponentName(ModuleCatalog.ComponentInfo component)
return $"{Capitalize(component.Name)}{component.Kind}";
}
- public static void GenerateSummary(IndentingTextWriter writer, string summary)
+ public static void GenerateSummary(IndentingTextWriter writer, string summary, string remarks = null)
{
if (string.IsNullOrEmpty(summary))
return;
@@ -357,6 +357,10 @@ public static void GenerateSummary(IndentingTextWriter writer, string summary)
foreach (var line in summary.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
writer.WriteLine($"/// {line}");
writer.WriteLine("/// ");
+
+ if(!string.IsNullOrEmpty(remarks))
+ foreach (var line in remarks.Split(new[] { Environment.NewLine }, StringSplitOptions.None))
+ writer.WriteLine($"/// {line}");
}
public static void GenerateHeader(IndentingTextWriter writer)
diff --git a/src/Microsoft.ML/Trainers/LightGBM.cs b/src/Microsoft.ML/Trainers/LightGBM.cs
new file mode 100644
index 0000000000..60c7a45177
--- /dev/null
+++ b/src/Microsoft.ML/Trainers/LightGBM.cs
@@ -0,0 +1,58 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace Microsoft.ML.Trainers
+{
+ ///
+ /// This API requires Microsoft.ML.LightGBM nuget.
+ ///
+ ///
+ ///
+ /// pipeline.Add(new LightGbmBinaryClassifier() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 })
+ ///
+ ///
+ public sealed partial class LightGbmBinaryClassifier
+ {
+
+ }
+
+ ///
+ /// This API requires Microsoft.ML.LightGBM nuget.
+ ///
+ ///
+ ///
+ /// pipeline.Add(new LightGbmClassifier() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 })
+ ///
+ ///
+ public sealed partial class LightGbmClassifier
+ {
+
+ }
+
+ ///
+ /// This API requires Microsoft.ML.LightGBM nuget.
+ ///
+ ///
+ ///
+ /// pipeline.Add(new LightGbmRanker() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 })
+ ///
+ ///
+ public sealed partial class LightGbmRanker
+ {
+
+ }
+
+ ///
+ /// This API requires Microsoft.ML.LightGBM nuget.
+ ///
+ ///
+ ///
+ /// pipeline.Add(new LightGbmRegressor() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 })
+ ///
+ ///
+ public sealed partial class LightGbmRegressor
+ {
+
+ }
+}
diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv
index ce4f2b09fa..6b65317c62 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv
+++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv
@@ -36,7 +36,7 @@ Models.Summarizer Summarize a linear regression predictor. Microsoft.ML.Runtime.
Models.SweepResultExtractor Extracts the sweep result. Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro ExtractSweepResult Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro+ResultInput Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro+Output
Models.TrainTestBinaryEvaluator Train test for binary classification Microsoft.ML.Runtime.EntryPoints.TrainTestBinaryMacro TrainTestBinary Microsoft.ML.Runtime.EntryPoints.TrainTestBinaryMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.TrainTestBinaryMacro+Output]
Models.TrainTestEvaluator General train test for any supported evaluator Microsoft.ML.Runtime.EntryPoints.TrainTestMacro TrainTest Microsoft.ML.Runtime.EntryPoints.TrainTestMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.TrainTestMacro+Output]
-Trainers.AveragedPerceptronBinaryClassifier Train a Average perceptron. Microsoft.ML.Runtime.Learners.AveragedPerceptronTrainer TrainBinary Microsoft.ML.Runtime.Learners.AveragedPerceptronTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
+Trainers.AveragedPerceptronBinaryClassifier Averaged Perceptron Binary Classifier. Microsoft.ML.Runtime.Learners.AveragedPerceptronTrainer TrainBinary Microsoft.ML.Runtime.Learners.AveragedPerceptronTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
Trainers.EnsembleBinaryClassifier Train binary ensemble. Microsoft.ML.Ensemble.EntryPoints.Ensemble CreateBinaryEnsemble Microsoft.ML.Runtime.Ensemble.EnsembleTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
Trainers.EnsembleClassification Train multiclass ensemble. Microsoft.ML.Ensemble.EntryPoints.Ensemble CreateMultiClassEnsemble Microsoft.ML.Runtime.Ensemble.MulticlassDataPartitionEnsembleTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
Trainers.EnsembleRegression Train regression ensemble. Microsoft.ML.Ensemble.EntryPoints.Ensemble CreateRegressionEnsemble Microsoft.ML.Runtime.Ensemble.RegressionEnsembleTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
@@ -50,16 +50,20 @@ Trainers.FieldAwareFactorizationMachineBinaryClassifier Train a field-aware fact
Trainers.GeneralizedAdditiveModelBinaryClassifier Trains a gradient boosted stump per feature, on all features simultaneously, to fit target values using least-squares. It mantains no interactions between features. Microsoft.ML.Runtime.FastTree.Gam TrainBinary Microsoft.ML.Runtime.FastTree.BinaryClassificationGamTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
Trainers.GeneralizedAdditiveModelRegressor Trains a gradient boosted stump per feature, on all features simultaneously, to fit target values using least-squares. It mantains no interactions between features. Microsoft.ML.Runtime.FastTree.Gam TrainRegression Microsoft.ML.Runtime.FastTree.RegressionGamTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
Trainers.KMeansPlusPlusClusterer K-means is a popular clustering algorithm. With K-means, the data is clustered into a specified number of clusters in order to minimize the within-cluster sum of squares. K-means++ improves upon K-means by using a better method for choosing the initial cluster centers. Microsoft.ML.Runtime.KMeans.KMeansPlusPlusTrainer TrainKMeans Microsoft.ML.Runtime.KMeans.KMeansPlusPlusTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+ClusteringOutput
+Trainers.LightGbmBinaryClassifier Train a LightGBM binary classification model. Microsoft.ML.Runtime.LightGBM.LightGbm TrainBinary Microsoft.ML.Runtime.LightGBM.LightGbmArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
+Trainers.LightGbmClassifier Train a LightGBM multi class model. Microsoft.ML.Runtime.LightGBM.LightGbm TrainMultiClass Microsoft.ML.Runtime.LightGBM.LightGbmArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
+Trainers.LightGbmRanker Train a LightGBM ranking model. Microsoft.ML.Runtime.LightGBM.LightGbm TrainRanking Microsoft.ML.Runtime.LightGBM.LightGbmArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RankingOutput
+Trainers.LightGbmRegressor LightGBM Regression Microsoft.ML.Runtime.LightGBM.LightGbm TrainRegression Microsoft.ML.Runtime.LightGBM.LightGbmArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
Trainers.LinearSvmBinaryClassifier Train a linear SVM. Microsoft.ML.Runtime.Learners.LinearSvm TrainLinearSvm Microsoft.ML.Runtime.Learners.LinearSvm+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
-Trainers.LogisticRegressionBinaryClassifier Train a logistic regression binary model Microsoft.ML.Runtime.Learners.LogisticRegression TrainBinary Microsoft.ML.Runtime.Learners.LogisticRegression+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
-Trainers.LogisticRegressionClassifier Train a logistic regression multi class model Microsoft.ML.Runtime.Learners.LogisticRegression TrainMultiClass Microsoft.ML.Runtime.Learners.MulticlassLogisticRegression+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
+Trainers.LogisticRegressionBinaryClassifier Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function. Microsoft.ML.Runtime.Learners.LogisticRegression TrainBinary Microsoft.ML.Runtime.Learners.LogisticRegression+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
+Trainers.LogisticRegressionClassifier Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function. Microsoft.ML.Runtime.Learners.LogisticRegression TrainMultiClass Microsoft.ML.Runtime.Learners.MulticlassLogisticRegression+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
Trainers.NaiveBayesClassifier Train a MultiClassNaiveBayesTrainer. Microsoft.ML.Runtime.Learners.MultiClassNaiveBayesTrainer TrainMultiClassNaiveBayesTrainer Microsoft.ML.Runtime.Learners.MultiClassNaiveBayesTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
Trainers.OnlineGradientDescentRegressor Train a Online gradient descent perceptron. Microsoft.ML.Runtime.Learners.OnlineGradientDescentTrainer TrainRegression Microsoft.ML.Runtime.Learners.OnlineGradientDescentTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
Trainers.PcaAnomalyDetector Train an PCA Anomaly model. Microsoft.ML.Runtime.PCA.RandomizedPcaTrainer TrainPcaAnomaly Microsoft.ML.Runtime.PCA.RandomizedPcaTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+AnomalyDetectionOutput
Trainers.PoissonRegressor Train an Poisson regression model. Microsoft.ML.Runtime.Learners.PoissonRegression TrainRegression Microsoft.ML.Runtime.Learners.PoissonRegression+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
Trainers.StochasticDualCoordinateAscentBinaryClassifier Train an SDCA binary model. Microsoft.ML.Runtime.Learners.Sdca TrainBinary Microsoft.ML.Runtime.Learners.LinearClassificationTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
-Trainers.StochasticDualCoordinateAscentClassifier Train an SDCA multi class model Microsoft.ML.Runtime.Learners.Sdca TrainMultiClass Microsoft.ML.Runtime.Learners.SdcaMultiClassTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
-Trainers.StochasticDualCoordinateAscentRegressor Train an SDCA regression model Microsoft.ML.Runtime.Learners.Sdca TrainRegression Microsoft.ML.Runtime.Learners.SdcaRegressionTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
+Trainers.StochasticDualCoordinateAscentClassifier The SDCA linear multi-class classification trainer. Microsoft.ML.Runtime.Learners.Sdca TrainMultiClass Microsoft.ML.Runtime.Learners.SdcaMultiClassTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
+Trainers.StochasticDualCoordinateAscentRegressor The SDCA linear regression trainer. Microsoft.ML.Runtime.Learners.Sdca TrainRegression Microsoft.ML.Runtime.Learners.SdcaRegressionTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
Trainers.StochasticGradientDescentBinaryClassifier Train an Hogwild SGD binary model. Microsoft.ML.Runtime.Learners.StochasticGradientDescentClassificationTrainer TrainBinary Microsoft.ML.Runtime.Learners.StochasticGradientDescentClassificationTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput
Transforms.ApproximateBootstrapSampler Approximate bootstrap sampling. Microsoft.ML.Runtime.Data.BootstrapSample GetSample Microsoft.ML.Runtime.Data.BootstrapSampleTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
Transforms.BinaryPredictionScoreColumnsRenamer For binary prediction, it renames the PredictedLabel and Score columns to include the name of the positive class. Microsoft.ML.Runtime.EntryPoints.ScoreModel RenameBinaryPredictionScoreColumns Microsoft.ML.Runtime.EntryPoints.ScoreModel+RenameBinaryPredictionScoreColumnsInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput
diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
index 0acb5971b0..b4c4229cdf 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json
+++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -3719,7 +3719,7 @@
},
{
"Name": "Trainers.AveragedPerceptronBinaryClassifier",
- "Desc": "Train a Average perceptron.",
+ "Desc": "Averaged Perceptron Binary Classifier.",
"FriendlyName": "Averaged Perceptron",
"ShortName": "ap",
"Inputs": [
@@ -10674,11 +10674,34 @@
]
},
{
- "Name": "Trainers.LinearSvmBinaryClassifier",
- "Desc": "Train a linear SVM.",
- "FriendlyName": "SVM (Pegasos-Linear)",
- "ShortName": "svm",
+ "Name": "Trainers.LightGbmBinaryClassifier",
+ "Desc": "Train a LightGBM binary classification model.",
+ "FriendlyName": "LightGBM Binary Classifier",
+ "ShortName": "LightGBM",
"Inputs": [
+ {
+ "Name": "NumBoostRound",
+ "Type": "Int",
+ "Desc": "Number of iterations.",
+ "Aliases": [
+ "iter"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 100,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 20,
+ 50,
+ 100,
+ 150,
+ 200
+ ]
+ }
+ },
{
"Name": "TrainingData",
"Type": "DataView",
@@ -10690,6 +10713,64 @@
"SortOrder": 1.0,
"IsNullable": false
},
+ {
+ "Name": "LearningRate",
+ "Type": "Float",
+ "Desc": "Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1].",
+ "Aliases": [
+ "lr"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.025,
+ "Max": 0.4,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "NumLeaves",
+ "Type": "Int",
+ "Desc": "Maximum leaves for trees.",
+ "Aliases": [
+ "nl"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Long",
+ "Min": 2,
+ "Max": 128,
+ "StepSize": 4.0,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "MinDataPerLeaf",
+ "Type": "Int",
+ "Desc": "Minimum number of instances needed in a child.",
+ "Aliases": [
+ "mil"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 10,
+ 20,
+ 50
+ ]
+ }
+ },
{
"Name": "FeatureColumn",
"Type": "String",
@@ -10702,6 +10783,20 @@
"IsNullable": false,
"Default": "Features"
},
+ {
+ "Name": "Booster",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "BoosterParameterFunction"
+ },
+ "Desc": "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "gbdt"
+ }
+ },
{
"Name": "LabelColumn",
"Type": "String",
@@ -10714,6 +10809,30 @@
"IsNullable": false,
"Default": "Label"
},
+ {
+ "Name": "WeightColumn",
+ "Type": "String",
+ "Desc": "Column to use for example weight",
+ "Aliases": [
+ "weight"
+ ],
+ "Required": false,
+ "SortOrder": 4.0,
+ "IsNullable": false,
+ "Default": "Weight"
+ },
+ {
+ "Name": "GroupIdColumn",
+ "Type": "String",
+ "Desc": "Column to use for example groupId",
+ "Aliases": [
+ "groupId"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": "GroupId"
+ },
{
"Name": "NormalizeFeatures",
"Type": {
@@ -10755,175 +10874,270 @@
"Default": "Auto"
},
{
- "Name": "Lambda",
- "Type": "Float",
- "Desc": "Regularizer constant",
+ "Name": "MaxBin",
+ "Type": "Int",
+ "Desc": "Max number of bucket bin for features.",
"Aliases": [
- "lambda"
+ "mb"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.001,
- "SweepRange": {
- "RangeType": "Float",
- "Min": 1E-05,
- "Max": 0.1,
- "StepSize": 10.0,
- "IsLogScale": true
- }
+ "Default": 255
},
{
- "Name": "PerformProjection",
+ "Name": "VerboseEval",
"Type": "Bool",
- "Desc": "Perform projection to unit-ball? Typically used with batch size > 1.",
+ "Desc": "Verbose",
"Aliases": [
- "project"
+ "v"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": false,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- false,
- true
- ]
- }
+ "Default": false
},
{
- "Name": "NumIterations",
+ "Name": "Silent",
+ "Type": "Bool",
+ "Desc": "Printing running messages.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "NThread",
"Type": "Int",
- "Desc": "Number of iterations",
+ "Desc": "Number of parallel threads used to run LightGBM.",
"Aliases": [
- "iter"
+ "nt"
],
"Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": 1,
- "SweepRange": {
- "RangeType": "Long",
- "Min": 1,
- "Max": 100,
- "StepSize": 10.0,
- "IsLogScale": true
- }
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
},
{
- "Name": "InitWtsDiameter",
- "Type": "Float",
- "Desc": "Init weights diameter",
+ "Name": "EvalMetric",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "DefaultMetric",
+ "Rmse",
+ "Mae",
+ "Logloss",
+ "Error",
+ "Merror",
+ "Mlogloss",
+ "Auc",
+ "Ndcg",
+ "Map"
+ ]
+ },
+ "Desc": "Evaluation metrics.",
"Aliases": [
- "initwts"
+ "em"
],
"Required": false,
- "SortOrder": 140.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.0,
- "SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 1.0,
- "NumSteps": 5
- }
+ "Default": "DefaultMetric"
},
{
- "Name": "NoBias",
+ "Name": "UseSoftmax",
"Type": "Bool",
- "Desc": "No bias",
+ "Desc": "Use softmax loss for the multi classification.",
"Required": false,
"SortOrder": 150.0,
- "IsNullable": false,
- "Default": false,
+ "IsNullable": true,
+ "Default": null,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- false,
- true
+ true,
+ false
]
}
},
{
- "Name": "Calibrator",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "CalibratorTrainer"
- },
- "Desc": "The calibrator kind to apply to the predictor. Specify null for no calibration",
+ "Name": "EarlyStoppingRound",
+ "Type": "Int",
+ "Desc": "Rounds of early stopping, 0 will disable it.",
+ "Aliases": [
+ "es"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": {
- "Name": "PlattCalibrator"
- }
+ "Default": 0
},
{
- "Name": "MaxCalibrationExamples",
- "Type": "Int",
- "Desc": "The maximum number of examples to use when training the calibrator",
+ "Name": "CustomGains",
+ "Type": "String",
+ "Desc": "Comma seperated list of gains associated to each relevance label.",
+ "Aliases": [
+ "gains"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1000000
+ "Default": "0,3,7,15,31,63,127,255,511,1023,2047,4095"
},
{
- "Name": "InitialWeights",
- "Type": "String",
- "Desc": "Initial Weights and bias, comma-separated",
- "Aliases": [
- "initweights"
- ],
+ "Name": "BatchSize",
+ "Type": "Int",
+ "Desc": "Number of entries in a batch when loading data.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": null
+ "Default": 1048576
},
{
- "Name": "Shuffle",
+ "Name": "UseCat",
"Type": "Bool",
- "Desc": "Whether to shuffle for each training iteration",
+ "Desc": "Enable categorical split or not.",
"Aliases": [
- "shuf"
+ "cat"
],
"Required": false,
"SortOrder": 150.0,
- "IsNullable": false,
- "Default": true,
+ "IsNullable": true,
+ "Default": null,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- false,
- true
+ true,
+ false
]
}
},
{
- "Name": "StreamingCacheSize",
- "Type": "Int",
- "Desc": "Size of cache when trained in Scope",
- "Aliases": [
- "cache"
- ],
+ "Name": "UseMissing",
+ "Type": "Bool",
+ "Desc": "Enable missing value auto infer or not.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1000000
+ "Default": false,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ true,
+ false
+ ]
+ }
},
{
- "Name": "BatchSize",
+ "Name": "MinDataPerGroup",
"Type": "Int",
- "Desc": "Batch size",
+ "Desc": "Min number of instances per categorical group.",
"Aliases": [
- "batch"
+ "mdpg"
],
"Required": false,
- "SortOrder": 190.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 1
- }
- ],
+ "Default": 100,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 50,
+ 100,
+ 200
+ ]
+ }
+ },
+ {
+ "Name": "MaxCatThreshold",
+ "Type": "Int",
+ "Desc": "Max number of categorical thresholds.",
+ "Aliases": [
+ "maxcat"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 32,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 8,
+ 16,
+ 32,
+ 64
+ ]
+ }
+ },
+ {
+ "Name": "CatSmooth",
+ "Type": "Float",
+ "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 10.0,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 10,
+ 20
+ ]
+ }
+ },
+ {
+ "Name": "CatL2",
+ "Type": "Float",
+ "Desc": "L2 Regularization for categorical split.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 10.0,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.1,
+ 0.5,
+ 1,
+ 5,
+ 10
+ ]
+ }
+ },
+ {
+ "Name": "ParallelTrainer",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "ParallelLightGBM"
+ },
+ "Desc": "Parallel LightGBM Learning Algorithm",
+ "Aliases": [
+ "parag"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "Single"
+ }
+ }
+ ],
"Outputs": [
{
"Name": "PredictorModel",
@@ -10932,6 +11146,8 @@
}
],
"InputKind": [
+ "ITrainerInputWithGroupId",
+ "ITrainerInputWithWeight",
"ITrainerInputWithLabel",
"ITrainerInput"
],
@@ -10941,11 +11157,34 @@
]
},
{
- "Name": "Trainers.LogisticRegressionBinaryClassifier",
- "Desc": "Train a logistic regression binary model",
- "FriendlyName": "Logistic Regression",
- "ShortName": "lr",
+ "Name": "Trainers.LightGbmClassifier",
+ "Desc": "Train a LightGBM multi class model.",
+ "FriendlyName": "LightGBM Multi Class Classifier",
+ "ShortName": "LightGBMMC",
"Inputs": [
+ {
+ "Name": "NumBoostRound",
+ "Type": "Int",
+ "Desc": "Number of iterations.",
+ "Aliases": [
+ "iter"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 100,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 20,
+ 50,
+ 100,
+ 150,
+ 200
+ ]
+ }
+ },
{
"Name": "TrainingData",
"Type": "DataView",
@@ -10957,6 +11196,64 @@
"SortOrder": 1.0,
"IsNullable": false
},
+ {
+ "Name": "LearningRate",
+ "Type": "Float",
+ "Desc": "Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1].",
+ "Aliases": [
+ "lr"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.025,
+ "Max": 0.4,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "NumLeaves",
+ "Type": "Int",
+ "Desc": "Maximum leaves for trees.",
+ "Aliases": [
+ "nl"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Long",
+ "Min": 2,
+ "Max": 128,
+ "StepSize": 4.0,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "MinDataPerLeaf",
+ "Type": "Int",
+ "Desc": "Minimum number of instances needed in a child.",
+ "Aliases": [
+ "mil"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 10,
+ 20,
+ 50
+ ]
+ }
+ },
{
"Name": "FeatureColumn",
"Type": "String",
@@ -10969,6 +11266,20 @@
"IsNullable": false,
"Default": "Features"
},
+ {
+ "Name": "Booster",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "BoosterParameterFunction"
+ },
+ "Desc": "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "gbdt"
+ }
+ },
{
"Name": "LabelColumn",
"Type": "String",
@@ -10993,6 +11304,18 @@
"IsNullable": false,
"Default": "Weight"
},
+ {
+ "Name": "GroupIdColumn",
+ "Type": "String",
+ "Desc": "Column to use for example groupId",
+ "Aliases": [
+ "groupId"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": "GroupId"
+ },
{
"Name": "NormalizeFeatures",
"Type": {
@@ -11034,205 +11357,268 @@
"Default": "Auto"
},
{
- "Name": "ShowTrainingStats",
- "Type": "Bool",
- "Desc": "Show statistics of training examples.",
+ "Name": "MaxBin",
+ "Type": "Int",
+ "Desc": "Max number of bucket bin for features.",
"Aliases": [
- "stat"
+ "mb"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": 255
},
{
- "Name": "L2Weight",
- "Type": "Float",
- "Desc": "L2 regularization weight",
+ "Name": "VerboseEval",
+ "Type": "Bool",
+ "Desc": "Verbose",
"Aliases": [
- "l2"
+ "v"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 1.0,
- "SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 1.0,
- "NumSteps": 4
- }
+ "Default": false
},
{
- "Name": "L1Weight",
- "Type": "Float",
- "Desc": "L1 regularization weight",
+ "Name": "Silent",
+ "Type": "Bool",
+ "Desc": "Printing running messages.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "NThread",
+ "Type": "Int",
+ "Desc": "Number of parallel threads used to run LightGBM.",
"Aliases": [
- "l1"
+ "nt"
],
"Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": 1.0,
- "SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 1.0,
- "NumSteps": 4
- }
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
},
{
- "Name": "OptTol",
- "Type": "Float",
- "Desc": "Tolerance parameter for optimization convergence. Lower = slower, more accurate",
+ "Name": "EvalMetric",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "DefaultMetric",
+ "Rmse",
+ "Mae",
+ "Logloss",
+ "Error",
+ "Merror",
+ "Mlogloss",
+ "Auc",
+ "Ndcg",
+ "Map"
+ ]
+ },
+ "Desc": "Evaluation metrics.",
"Aliases": [
- "ot"
+ "em"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 1E-07,
+ "Default": "DefaultMetric"
+ },
+ {
+ "Name": "UseSoftmax",
+ "Type": "Bool",
+ "Desc": "Use softmax loss for the multi classification.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 0.0001,
- 1E-07
+ true,
+ false
]
}
},
{
- "Name": "MemorySize",
+ "Name": "EarlyStoppingRound",
"Type": "Int",
- "Desc": "Memory size for L-BFGS. Lower=faster, less accurate",
+ "Desc": "Rounds of early stopping, 0 will disable it.",
"Aliases": [
- "m"
+ "es"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 20,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 5,
- 20,
- 50
- ]
- }
+ "Default": 0
},
{
- "Name": "EnforceNonNegativity",
- "Type": "Bool",
- "Desc": "Enforce non-negative weights",
+ "Name": "CustomGains",
+ "Type": "String",
+ "Desc": "Comma seperated list of gains associated to each relevance label.",
"Aliases": [
- "nn"
+ "gains"
],
"Required": false,
- "SortOrder": 90.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": "0,3,7,15,31,63,127,255,511,1023,2047,4095"
},
{
- "Name": "InitWtsDiameter",
- "Type": "Float",
- "Desc": "Init weights diameter",
- "Aliases": [
- "initwts"
- ],
+ "Name": "BatchSize",
+ "Type": "Int",
+ "Desc": "Number of entries in a batch when loading data.",
"Required": false,
- "SortOrder": 140.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.0,
- "SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 1.0,
- "NumSteps": 5
- }
+ "Default": 1048576
},
{
- "Name": "MaxIterations",
- "Type": "Int",
- "Desc": "Maximum iterations.",
+ "Name": "UseCat",
+ "Type": "Bool",
+ "Desc": "Enable categorical split or not.",
"Aliases": [
- "maxiter"
+ "cat"
],
"Required": false,
"SortOrder": 150.0,
- "IsNullable": false,
- "Default": 2147483647,
+ "IsNullable": true,
+ "Default": null,
"SweepRange": {
- "RangeType": "Long",
- "Min": 1,
- "Max": 2147483647
+ "RangeType": "Discrete",
+ "Values": [
+ true,
+ false
+ ]
}
},
{
- "Name": "SgdInitializationTolerance",
- "Type": "Float",
- "Desc": "Run SGD to initialize LR weights, converging to this tolerance",
- "Aliases": [
- "sgd"
- ],
+ "Name": "UseMissing",
+ "Type": "Bool",
+ "Desc": "Enable missing value auto infer or not.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.0
+ "Default": false,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ true,
+ false
+ ]
+ }
},
{
- "Name": "Quiet",
- "Type": "Bool",
- "Desc": "If set to true, produce no output during training.",
+ "Name": "MinDataPerGroup",
+ "Type": "Int",
+ "Desc": "Min number of instances per categorical group.",
"Aliases": [
- "q"
+ "mdpg"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": 100,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 50,
+ 100,
+ 200
+ ]
+ }
},
{
- "Name": "UseThreads",
- "Type": "Bool",
- "Desc": "Whether or not to use threads. Default is true",
+ "Name": "MaxCatThreshold",
+ "Type": "Int",
+ "Desc": "Max number of categorical thresholds.",
"Aliases": [
- "t"
+ "maxcat"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": true
+ "Default": 32,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 8,
+ 16,
+ 32,
+ 64
+ ]
+ }
},
{
- "Name": "NumThreads",
- "Type": "Int",
- "Desc": "Number of threads",
- "Aliases": [
- "nt"
- ],
+ "Name": "CatSmooth",
+ "Type": "Float",
+ "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.",
"Required": false,
"SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
+ "IsNullable": false,
+ "Default": 10.0,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 10,
+ 20
+ ]
+ }
},
{
- "Name": "DenseOptimizer",
- "Type": "Bool",
- "Desc": "Force densification of the internal optimization vectors",
- "Aliases": [
- "do"
- ],
+ "Name": "CatL2",
+ "Type": "Float",
+ "Desc": "L2 Regularization for categorical split.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": false,
+ "Default": 10.0,
+ "Range": {
+ "Min": 0.0
+ },
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- false,
- true
+ 0.1,
+ 0.5,
+ 1,
+ 5,
+ 10
]
}
+ },
+ {
+ "Name": "ParallelTrainer",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "ParallelLightGBM"
+ },
+ "Desc": "Parallel LightGBM Learning Algorithm",
+ "Aliases": [
+ "parag"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "Single"
+ }
}
],
"Outputs": [
@@ -11243,21 +11629,45 @@
}
],
"InputKind": [
+ "ITrainerInputWithGroupId",
"ITrainerInputWithWeight",
"ITrainerInputWithLabel",
"ITrainerInput"
],
"OutputKind": [
- "IBinaryClassificationOutput",
+ "IMulticlassClassificationOutput",
"ITrainerOutput"
]
},
{
- "Name": "Trainers.LogisticRegressionClassifier",
- "Desc": "Train a logistic regression multi class model",
- "FriendlyName": "Multi-class Logistic Regression",
- "ShortName": "mlr",
+ "Name": "Trainers.LightGbmRanker",
+ "Desc": "Train a LightGBM ranking model.",
+ "FriendlyName": "LightGBM Ranking",
+ "ShortName": "LightGBMRank",
"Inputs": [
+ {
+ "Name": "NumBoostRound",
+ "Type": "Int",
+ "Desc": "Number of iterations.",
+ "Aliases": [
+ "iter"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 100,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 20,
+ 50,
+ 100,
+ 150,
+ 200
+ ]
+ }
+ },
{
"Name": "TrainingData",
"Type": "DataView",
@@ -11269,6 +11679,64 @@
"SortOrder": 1.0,
"IsNullable": false
},
+ {
+ "Name": "LearningRate",
+ "Type": "Float",
+ "Desc": "Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1].",
+ "Aliases": [
+ "lr"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.025,
+ "Max": 0.4,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "NumLeaves",
+ "Type": "Int",
+ "Desc": "Maximum leaves for trees.",
+ "Aliases": [
+ "nl"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Long",
+ "Min": 2,
+ "Max": 128,
+ "StepSize": 4.0,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "MinDataPerLeaf",
+ "Type": "Int",
+ "Desc": "Minimum number of instances needed in a child.",
+ "Aliases": [
+ "mil"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 10,
+ 20,
+ 50
+ ]
+ }
+ },
{
"Name": "FeatureColumn",
"Type": "String",
@@ -11281,6 +11749,20 @@
"IsNullable": false,
"Default": "Features"
},
+ {
+ "Name": "Booster",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "BoosterParameterFunction"
+ },
+ "Desc": "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "gbdt"
+ }
+ },
{
"Name": "LabelColumn",
"Type": "String",
@@ -11306,16 +11788,28 @@
"Default": "Weight"
},
{
- "Name": "NormalizeFeatures",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "No",
- "Warn",
- "Auto",
- "Yes"
- ]
- },
+ "Name": "GroupIdColumn",
+ "Type": "String",
+ "Desc": "Column to use for example groupId",
+ "Aliases": [
+ "groupId"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": "GroupId"
+ },
+ {
+ "Name": "NormalizeFeatures",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "No",
+ "Warn",
+ "Auto",
+ "Yes"
+ ]
+ },
"Desc": "Normalize option for the feature column",
"Aliases": [
"norm"
@@ -11346,205 +11840,268 @@
"Default": "Auto"
},
{
- "Name": "ShowTrainingStats",
- "Type": "Bool",
- "Desc": "Show statistics of training examples.",
+ "Name": "MaxBin",
+ "Type": "Int",
+ "Desc": "Max number of bucket bin for features.",
"Aliases": [
- "stat"
+ "mb"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": 255
},
{
- "Name": "L2Weight",
- "Type": "Float",
- "Desc": "L2 regularization weight",
+ "Name": "VerboseEval",
+ "Type": "Bool",
+ "Desc": "Verbose",
"Aliases": [
- "l2"
+ "v"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 1.0,
- "SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 1.0,
- "NumSteps": 4
- }
+ "Default": false
},
{
- "Name": "L1Weight",
- "Type": "Float",
- "Desc": "L1 regularization weight",
- "Aliases": [
- "l1"
- ],
+ "Name": "Silent",
+ "Type": "Bool",
+ "Desc": "Printing running messages.",
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 1.0,
- "SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 1.0,
- "NumSteps": 4
- }
+ "Default": true
},
{
- "Name": "OptTol",
- "Type": "Float",
- "Desc": "Tolerance parameter for optimization convergence. Lower = slower, more accurate",
+ "Name": "NThread",
+ "Type": "Int",
+ "Desc": "Number of parallel threads used to run LightGBM.",
"Aliases": [
- "ot"
+ "nt"
],
"Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": 1E-07,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 0.0001,
- 1E-07
- ]
- }
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
},
{
- "Name": "MemorySize",
- "Type": "Int",
- "Desc": "Memory size for L-BFGS. Lower=faster, less accurate",
+ "Name": "EvalMetric",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "DefaultMetric",
+ "Rmse",
+ "Mae",
+ "Logloss",
+ "Error",
+ "Merror",
+ "Mlogloss",
+ "Auc",
+ "Ndcg",
+ "Map"
+ ]
+ },
+ "Desc": "Evaluation metrics.",
"Aliases": [
- "m"
+ "em"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 20,
+ "Default": "DefaultMetric"
+ },
+ {
+ "Name": "UseSoftmax",
+ "Type": "Bool",
+ "Desc": "Use softmax loss for the multi classification.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 5,
- 20,
- 50
+ true,
+ false
]
}
},
{
- "Name": "EnforceNonNegativity",
- "Type": "Bool",
- "Desc": "Enforce non-negative weights",
+ "Name": "EarlyStoppingRound",
+ "Type": "Int",
+ "Desc": "Rounds of early stopping, 0 will disable it.",
"Aliases": [
- "nn"
+ "es"
],
"Required": false,
- "SortOrder": 90.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": 0
},
{
- "Name": "InitWtsDiameter",
- "Type": "Float",
- "Desc": "Init weights diameter",
+ "Name": "CustomGains",
+ "Type": "String",
+ "Desc": "Comma seperated list of gains associated to each relevance label.",
"Aliases": [
- "initwts"
+ "gains"
],
"Required": false,
- "SortOrder": 140.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.0,
- "SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 1.0,
- "NumSteps": 5
- }
+ "Default": "0,3,7,15,31,63,127,255,511,1023,2047,4095"
},
{
- "Name": "MaxIterations",
+ "Name": "BatchSize",
"Type": "Int",
- "Desc": "Maximum iterations.",
- "Aliases": [
- "maxiter"
- ],
+ "Desc": "Number of entries in a batch when loading data.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 2147483647,
- "SweepRange": {
- "RangeType": "Long",
- "Min": 1,
- "Max": 2147483647
- }
+ "Default": 1048576
},
{
- "Name": "SgdInitializationTolerance",
- "Type": "Float",
- "Desc": "Run SGD to initialize LR weights, converging to this tolerance",
+ "Name": "UseCat",
+ "Type": "Bool",
+ "Desc": "Enable categorical split or not.",
"Aliases": [
- "sgd"
+ "cat"
],
"Required": false,
"SortOrder": 150.0,
- "IsNullable": false,
- "Default": 0.0
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ true,
+ false
+ ]
+ }
},
{
- "Name": "Quiet",
+ "Name": "UseMissing",
"Type": "Bool",
- "Desc": "If set to true, produce no output during training.",
- "Aliases": [
- "q"
- ],
+ "Desc": "Enable missing value auto infer or not.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": false,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ true,
+ false
+ ]
+ }
},
{
- "Name": "UseThreads",
- "Type": "Bool",
- "Desc": "Whether or not to use threads. Default is true",
+ "Name": "MinDataPerGroup",
+ "Type": "Int",
+ "Desc": "Min number of instances per categorical group.",
"Aliases": [
- "t"
+ "mdpg"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": true
+ "Default": 100,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 50,
+ 100,
+ 200
+ ]
+ }
},
{
- "Name": "NumThreads",
+ "Name": "MaxCatThreshold",
"Type": "Int",
- "Desc": "Number of threads",
+ "Desc": "Max number of categorical thresholds.",
"Aliases": [
- "nt"
+ "maxcat"
],
"Required": false,
"SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
+ "IsNullable": false,
+ "Default": 32,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 8,
+ 16,
+ 32,
+ 64
+ ]
+ }
},
{
- "Name": "DenseOptimizer",
- "Type": "Bool",
- "Desc": "Force densification of the internal optimization vectors",
- "Aliases": [
- "do"
- ],
+ "Name": "CatSmooth",
+ "Type": "Float",
+ "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": false,
+ "Default": 10.0,
+ "Range": {
+ "Min": 0.0
+ },
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- false,
- true
+ 1,
+ 10,
+ 20
+ ]
+ }
+ },
+ {
+ "Name": "CatL2",
+ "Type": "Float",
+ "Desc": "L2 Regularization for categorical split.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 10.0,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.1,
+ 0.5,
+ 1,
+ 5,
+ 10
]
}
+ },
+ {
+ "Name": "ParallelTrainer",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "ParallelLightGBM"
+ },
+ "Desc": "Parallel LightGBM Learning Algorithm",
+ "Aliases": [
+ "parag"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "Single"
+ }
}
],
"Outputs": [
@@ -11555,21 +12112,45 @@
}
],
"InputKind": [
+ "ITrainerInputWithGroupId",
"ITrainerInputWithWeight",
"ITrainerInputWithLabel",
"ITrainerInput"
],
"OutputKind": [
- "IMulticlassClassificationOutput",
+ "IRankingOutput",
"ITrainerOutput"
]
},
{
- "Name": "Trainers.NaiveBayesClassifier",
- "Desc": "Train a MultiClassNaiveBayesTrainer.",
- "FriendlyName": "Multiclass Naive Bayes",
- "ShortName": "MNB",
+ "Name": "Trainers.LightGbmRegressor",
+ "Desc": "LightGBM Regression",
+ "FriendlyName": "LightGBM Regressor",
+ "ShortName": "LightGBMR",
"Inputs": [
+ {
+ "Name": "NumBoostRound",
+ "Type": "Int",
+ "Desc": "Number of iterations.",
+ "Aliases": [
+ "iter"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 100,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 20,
+ 50,
+ 100,
+ 150,
+ 200
+ ]
+ }
+ },
{
"Name": "TrainingData",
"Type": "DataView",
@@ -11581,6 +12162,64 @@
"SortOrder": 1.0,
"IsNullable": false
},
+ {
+ "Name": "LearningRate",
+ "Type": "Float",
+ "Desc": "Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1].",
+ "Aliases": [
+ "lr"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.025,
+ "Max": 0.4,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "NumLeaves",
+ "Type": "Int",
+ "Desc": "Maximum leaves for trees.",
+ "Aliases": [
+ "nl"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Long",
+ "Min": 2,
+ "Max": 128,
+ "StepSize": 4.0,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "MinDataPerLeaf",
+ "Type": "Int",
+ "Desc": "Minimum number of instances needed in a child.",
+ "Aliases": [
+ "mil"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 10,
+ 20,
+ 50
+ ]
+ }
+ },
{
"Name": "FeatureColumn",
"Type": "String",
@@ -11593,6 +12232,20 @@
"IsNullable": false,
"Default": "Features"
},
+ {
+ "Name": "Booster",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "BoosterParameterFunction"
+ },
+ "Desc": "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "gbdt"
+ }
+ },
{
"Name": "LabelColumn",
"Type": "String",
@@ -11605,6 +12258,30 @@
"IsNullable": false,
"Default": "Label"
},
+ {
+ "Name": "WeightColumn",
+ "Type": "String",
+ "Desc": "Column to use for example weight",
+ "Aliases": [
+ "weight"
+ ],
+ "Required": false,
+ "SortOrder": 4.0,
+ "IsNullable": false,
+ "Default": "Weight"
+ },
+ {
+ "Name": "GroupIdColumn",
+ "Type": "String",
+ "Desc": "Column to use for example groupId",
+ "Aliases": [
+ "groupId"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": "GroupId"
+ },
{
"Name": "NormalizeFeatures",
"Type": {
@@ -11644,330 +12321,270 @@
"SortOrder": 6.0,
"IsNullable": false,
"Default": "Auto"
- }
- ],
- "Outputs": [
- {
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "The trained model"
- }
- ],
- "InputKind": [
- "ITrainerInputWithLabel",
- "ITrainerInput"
- ],
- "OutputKind": [
- "IMulticlassClassificationOutput",
- "ITrainerOutput"
- ]
- },
- {
- "Name": "Trainers.OnlineGradientDescentRegressor",
- "Desc": "Train a Online gradient descent perceptron.",
- "FriendlyName": "Stochastic Gradient Descent (Regression)",
- "ShortName": "ogd",
- "Inputs": [
+ },
{
- "Name": "TrainingData",
- "Type": "DataView",
- "Desc": "The data to be used for training",
+ "Name": "MaxBin",
+ "Type": "Int",
+ "Desc": "Max number of bucket bin for features.",
"Aliases": [
- "data"
+ "mb"
],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 255
},
{
- "Name": "FeatureColumn",
- "Type": "String",
- "Desc": "Column to use for features",
+ "Name": "VerboseEval",
+ "Type": "Bool",
+ "Desc": "Verbose",
"Aliases": [
- "feat"
+ "v"
],
"Required": false,
- "SortOrder": 2.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": "Features"
+ "Default": false
},
{
- "Name": "LabelColumn",
- "Type": "String",
- "Desc": "Column to use for labels",
+ "Name": "Silent",
+ "Type": "Bool",
+ "Desc": "Printing running messages.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "NThread",
+ "Type": "Int",
+ "Desc": "Number of parallel threads used to run LightGBM.",
"Aliases": [
- "lab"
+ "nt"
],
"Required": false,
- "SortOrder": 3.0,
- "IsNullable": false,
- "Default": "Label"
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
},
{
- "Name": "NormalizeFeatures",
+ "Name": "EvalMetric",
"Type": {
"Kind": "Enum",
"Values": [
- "No",
- "Warn",
- "Auto",
- "Yes"
+ "DefaultMetric",
+ "Rmse",
+ "Mae",
+ "Logloss",
+ "Error",
+ "Merror",
+ "Mlogloss",
+ "Auc",
+ "Ndcg",
+ "Map"
]
},
- "Desc": "Normalize option for the feature column",
+ "Desc": "Evaluation metrics.",
"Aliases": [
- "norm"
+ "em"
],
"Required": false,
- "SortOrder": 5.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": "Auto"
+ "Default": "DefaultMetric"
},
{
- "Name": "Caching",
- "Type": {
- "Kind": "Enum",
+ "Name": "UseSoftmax",
+ "Type": "Bool",
+ "Desc": "Use softmax loss for the multi classification.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
"Values": [
- "Auto",
- "Memory",
- "Disk",
- "None"
+ true,
+ false
]
- },
- "Desc": "Whether learner should cache input training data",
+ }
+ },
+ {
+ "Name": "EarlyStoppingRound",
+ "Type": "Int",
+ "Desc": "Rounds of early stopping, 0 will disable it.",
"Aliases": [
- "cache"
+ "es"
],
"Required": false,
- "SortOrder": 6.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": "Auto"
+ "Default": 0
},
{
- "Name": "LossFunction",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "RegressionLossFunction"
- },
- "Desc": "Loss Function",
+ "Name": "CustomGains",
+ "Type": "String",
+ "Desc": "Comma seperated list of gains associated to each relevance label.",
"Aliases": [
- "loss"
+ "gains"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": {
- "Name": "SquaredLoss"
- }
+ "Default": "0,3,7,15,31,63,127,255,511,1023,2047,4095"
},
{
- "Name": "LearningRate",
- "Type": "Float",
- "Desc": "Learning rate",
+ "Name": "BatchSize",
+ "Type": "Int",
+ "Desc": "Number of entries in a batch when loading data.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1048576
+ },
+ {
+ "Name": "UseCat",
+ "Type": "Bool",
+ "Desc": "Enable categorical split or not.",
"Aliases": [
- "lr"
+ "cat"
],
"Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": 0.1,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 0.01,
- 0.1,
- 0.5,
- 1.0
+ true,
+ false
]
}
},
{
- "Name": "DecreaseLearningRate",
+ "Name": "UseMissing",
"Type": "Bool",
- "Desc": "Decrease learning rate",
- "Aliases": [
- "decreaselr"
- ],
+ "Desc": "Enable missing value auto infer or not.",
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": true,
+ "Default": false,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- false,
- true
+ true,
+ false
]
}
},
{
- "Name": "L2RegularizerWeight",
- "Type": "Float",
- "Desc": "L2 Regularization Weight",
+ "Name": "MinDataPerGroup",
+ "Type": "Int",
+ "Desc": "Min number of instances per categorical group.",
"Aliases": [
- "reg"
+ "mdpg"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.0,
+ "Default": 100,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ },
"SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 0.5
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 50,
+ 100,
+ 200
+ ]
}
},
{
- "Name": "NumIterations",
+ "Name": "MaxCatThreshold",
"Type": "Int",
- "Desc": "Number of iterations",
+ "Desc": "Max number of categorical thresholds.",
"Aliases": [
- "iter"
+ "maxcat"
],
"Required": false,
- "SortOrder": 50.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 1,
+ "Default": 32,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ },
"SweepRange": {
- "RangeType": "Long",
- "Min": 1,
- "Max": 100,
- "StepSize": 10.0,
- "IsLogScale": true
+ "RangeType": "Discrete",
+ "Values": [
+ 8,
+ 16,
+ 32,
+ 64
+ ]
}
},
{
- "Name": "InitWtsDiameter",
+ "Name": "CatSmooth",
"Type": "Float",
- "Desc": "Init weights diameter",
- "Aliases": [
- "initwts"
- ],
+ "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.",
"Required": false,
- "SortOrder": 140.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.0,
+ "Default": 10.0,
+ "Range": {
+ "Min": 0.0
+ },
"SweepRange": {
- "RangeType": "Float",
- "Min": 0.0,
- "Max": 1.0,
- "NumSteps": 5
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 10,
+ 20
+ ]
}
},
{
- "Name": "ResetWeightsAfterXExamples",
- "Type": "Int",
- "Desc": "Number of examples after which weights will be reset to the current average",
- "Aliases": [
- "numreset"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "DoLazyUpdates",
- "Type": "Bool",
- "Desc": "Instead of updating averaged weights on every example, only update when loss is nonzero",
- "Aliases": [
- "lazy"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true
- },
- {
- "Name": "RecencyGain",
- "Type": "Float",
- "Desc": "Extra weight given to more recent updates",
- "Aliases": [
- "rg"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 0.0
- },
- {
- "Name": "RecencyGainMulti",
- "Type": "Bool",
- "Desc": "Whether Recency Gain is multiplicative (vs. additive)",
- "Aliases": [
- "rgm"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": false
- },
- {
- "Name": "Averaged",
- "Type": "Bool",
- "Desc": "Do averaging?",
- "Aliases": [
- "avg"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true
- },
- {
- "Name": "AveragedTolerance",
+ "Name": "CatL2",
"Type": "Float",
- "Desc": "The inexactness tolerance for averaging",
- "Aliases": [
- "avgtol"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 0.01
- },
- {
- "Name": "InitialWeights",
- "Type": "String",
- "Desc": "Initial Weights and bias, comma-separated",
- "Aliases": [
- "initweights"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Shuffle",
- "Type": "Bool",
- "Desc": "Whether to shuffle for each training iteration",
- "Aliases": [
- "shuf"
- ],
+ "Desc": "L2 Regularization for categorical split.",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": true,
+ "Default": 10.0,
+ "Range": {
+ "Min": 0.0
+ },
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- false,
- true
+ 0.1,
+ 0.5,
+ 1,
+ 5,
+ 10
]
}
},
{
- "Name": "StreamingCacheSize",
- "Type": "Int",
- "Desc": "Size of cache when trained in Scope",
+ "Name": "ParallelTrainer",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "ParallelLightGBM"
+ },
+ "Desc": "Parallel LightGBM Learning Algorithm",
"Aliases": [
- "cache"
+ "parag"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1000000
+ "Default": {
+ "Name": "Single"
+ }
}
],
"Outputs": [
@@ -11978,6 +12595,8 @@
}
],
"InputKind": [
+ "ITrainerInputWithGroupId",
+ "ITrainerInputWithWeight",
"ITrainerInputWithLabel",
"ITrainerInput"
],
@@ -11987,10 +12606,10 @@
]
},
{
- "Name": "Trainers.PcaAnomalyDetector",
- "Desc": "Train an PCA Anomaly model.",
- "FriendlyName": "PCA Anomaly Detector",
- "ShortName": "pcaAnom",
+ "Name": "Trainers.LinearSvmBinaryClassifier",
+ "Desc": "Train a linear SVM.",
+ "FriendlyName": "SVM (Pegasos-Linear)",
+ "ShortName": "svm",
"Inputs": [
{
"Name": "TrainingData",
@@ -12016,16 +12635,16 @@
"Default": "Features"
},
{
- "Name": "WeightColumn",
+ "Name": "LabelColumn",
"Type": "String",
- "Desc": "Column to use for example weight",
+ "Desc": "Column to use for labels",
"Aliases": [
- "weight"
+ "lab"
],
"Required": false,
- "SortOrder": 4.0,
+ "SortOrder": 3.0,
"IsNullable": false,
- "Default": "Weight"
+ "Default": "Label"
},
{
"Name": "NormalizeFeatures",
@@ -12068,54 +12687,35 @@
"Default": "Auto"
},
{
- "Name": "Rank",
- "Type": "Int",
- "Desc": "The number of components in the PCA",
+ "Name": "Lambda",
+ "Type": "Float",
+ "Desc": "Regularizer constant",
"Aliases": [
- "k"
+ "lambda"
],
"Required": false,
"SortOrder": 50.0,
"IsNullable": false,
- "Default": 20,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 10,
- 20,
- 40,
- 80
- ]
- }
- },
- {
- "Name": "Oversampling",
- "Type": "Int",
- "Desc": "Oversampling parameter for randomized PCA training",
- "Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": 20,
+ "Default": 0.001,
"SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 10,
- 20,
- 40
- ]
+ "RangeType": "Float",
+ "Min": 1E-05,
+ "Max": 0.1,
+ "StepSize": 10.0,
+ "IsLogScale": true
}
},
{
- "Name": "Center",
+ "Name": "PerformProjection",
"Type": "Bool",
- "Desc": "If enabled, data is centered to be zero mean",
+ "Desc": "Perform projection to unit-ball? Typically used with batch size > 1.",
"Aliases": [
- "center"
+ "project"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 50.0,
"IsNullable": false,
- "Default": true,
+ "Default": false,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
@@ -12125,16 +12725,135 @@
}
},
{
- "Name": "Seed",
+ "Name": "NumIterations",
"Type": "Int",
- "Desc": "The seed for random number generation",
+ "Desc": "Number of iterations",
"Aliases": [
- "seed"
+ "iter"
],
"Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 1,
+ "SweepRange": {
+ "RangeType": "Long",
+ "Min": 1,
+ "Max": 100,
+ "StepSize": 10.0,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "InitWtsDiameter",
+ "Type": "Float",
+ "Desc": "Init weights diameter",
+ "Aliases": [
+ "initwts"
+ ],
+ "Required": false,
+ "SortOrder": 140.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 1.0,
+ "NumSteps": 5
+ }
+ },
+ {
+ "Name": "NoBias",
+ "Type": "Bool",
+ "Desc": "No bias",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ false,
+ true
+ ]
+ }
+ },
+ {
+ "Name": "Calibrator",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "CalibratorTrainer"
+ },
+ "Desc": "The calibrator kind to apply to the predictor. Specify null for no calibration",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "PlattCalibrator"
+ }
+ },
+ {
+ "Name": "MaxCalibrationExamples",
+ "Type": "Int",
+ "Desc": "The maximum number of examples to use when training the calibrator",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1000000
+ },
+ {
+ "Name": "InitialWeights",
+ "Type": "String",
+ "Desc": "Initial Weights and bias, comma-separated",
+ "Aliases": [
+ "initweights"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
"Default": null
+ },
+ {
+ "Name": "Shuffle",
+ "Type": "Bool",
+ "Desc": "Whether to shuffle for each training iteration",
+ "Aliases": [
+ "shuf"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ false,
+ true
+ ]
+ }
+ },
+ {
+ "Name": "StreamingCacheSize",
+ "Type": "Int",
+ "Desc": "Size of cache when trained in Scope",
+ "Aliases": [
+ "cache"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1000000
+ },
+ {
+ "Name": "BatchSize",
+ "Type": "Int",
+ "Desc": "Batch size",
+ "Aliases": [
+ "batch"
+ ],
+ "Required": false,
+ "SortOrder": 190.0,
+ "IsNullable": false,
+ "Default": 1
}
],
"Outputs": [
@@ -12145,19 +12864,19 @@
}
],
"InputKind": [
- "IUnsupervisedTrainerWithWeight",
+ "ITrainerInputWithLabel",
"ITrainerInput"
],
"OutputKind": [
- "IAnomalyDetectionOutput",
+ "IBinaryClassificationOutput",
"ITrainerOutput"
]
},
{
- "Name": "Trainers.PoissonRegressor",
- "Desc": "Train an Poisson regression model.",
- "FriendlyName": "Poisson Regression",
- "ShortName": "PR",
+ "Name": "Trainers.LogisticRegressionBinaryClassifier",
+ "Desc": "Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function.",
+ "FriendlyName": "Logistic Regression",
+ "ShortName": "lr",
"Inputs": [
{
"Name": "TrainingData",
@@ -12246,6 +12965,18 @@
"IsNullable": false,
"Default": "Auto"
},
+ {
+ "Name": "ShowTrainingStats",
+ "Type": "Bool",
+ "Desc": "Show statistics of training examples.",
+ "Aliases": [
+ "stat"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": false
+ },
{
"Name": "L2Weight",
"Type": "Float",
@@ -12449,40 +13180,16 @@
"ITrainerInput"
],
"OutputKind": [
- "IRegressionOutput",
+ "IBinaryClassificationOutput",
"ITrainerOutput"
]
},
{
- "Name": "Trainers.StochasticDualCoordinateAscentBinaryClassifier",
- "Desc": "Train an SDCA binary model.",
- "FriendlyName": "Fast Linear (SA-SDCA)",
- "ShortName": "SDCA",
+ "Name": "Trainers.LogisticRegressionClassifier",
+ "Desc": "Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function.",
+ "FriendlyName": "Multi-class Logistic Regression",
+ "ShortName": "mlr",
"Inputs": [
- {
- "Name": "L2Const",
- "Type": "Float",
- "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
- "Aliases": [
- "l2"
- ],
- "Required": false,
- "SortOrder": 1.0,
- "IsNullable": true,
- "Default": null,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 1E-07,
- 1E-06,
- 1E-05,
- 0.0001,
- 0.001,
- 0.01
- ]
- }
- },
{
"Name": "TrainingData",
"Type": "DataView",
@@ -12494,29 +13201,6 @@
"SortOrder": 1.0,
"IsNullable": false
},
- {
- "Name": "L1Threshold",
- "Type": "Float",
- "Desc": "L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set.",
- "Aliases": [
- "l1"
- ],
- "Required": false,
- "SortOrder": 2.0,
- "IsNullable": true,
- "Default": null,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 0.0,
- 0.25,
- 0.5,
- 0.75,
- 1.0
- ]
- }
- },
{
"Name": "FeatureColumn",
"Type": "String",
@@ -12541,6 +13225,18 @@
"IsNullable": false,
"Default": "Label"
},
+ {
+ "Name": "WeightColumn",
+ "Type": "String",
+ "Desc": "Column to use for example weight",
+ "Aliases": [
+ "weight"
+ ],
+ "Required": false,
+ "SortOrder": 4.0,
+ "IsNullable": false,
+ "Default": "Weight"
+ },
{
"Name": "NormalizeFeatures",
"Type": {
@@ -12582,138 +13278,181 @@
"Default": "Auto"
},
{
- "Name": "LossFunction",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "SDCAClassificationLossFunction"
- },
- "Desc": "Loss Function",
+ "Name": "ShowTrainingStats",
+ "Type": "Bool",
+ "Desc": "Show statistics of training examples.",
"Aliases": [
- "loss"
+ "stat"
],
"Required": false,
"SortOrder": 50.0,
"IsNullable": false,
- "Default": {
- "Name": "LogLoss"
- }
+ "Default": false
},
{
- "Name": "NumThreads",
- "Type": "Int",
- "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
+ "Name": "L2Weight",
+ "Type": "Float",
+ "Desc": "L2 regularization weight",
"Aliases": [
- "nt",
- "t",
- "threads"
+ "l2"
],
"Required": false,
"SortOrder": 50.0,
- "IsNullable": true,
- "Default": null
+ "IsNullable": false,
+ "Default": 1.0,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 1.0,
+ "NumSteps": 4
+ }
},
{
- "Name": "PositiveInstanceWeight",
+ "Name": "L1Weight",
"Type": "Float",
- "Desc": "Apply weight to the positive class, for imbalanced data",
+ "Desc": "L1 regularization weight",
"Aliases": [
- "piw"
+ "l1"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 50.0,
"IsNullable": false,
- "Default": 1.0
+ "Default": 1.0,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 1.0,
+ "NumSteps": 4
+ }
},
{
- "Name": "Calibrator",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "CalibratorTrainer"
- },
- "Desc": "The calibrator kind to apply to the predictor. Specify null for no calibration",
+ "Name": "OptTol",
+ "Type": "Float",
+ "Desc": "Tolerance parameter for optimization convergence. Lower = slower, more accurate",
+ "Aliases": [
+ "ot"
+ ],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 50.0,
"IsNullable": false,
- "Default": {
- "Name": "PlattCalibrator"
+ "Default": 1E-07,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0001,
+ 1E-07
+ ]
}
},
{
- "Name": "MaxCalibrationExamples",
+ "Name": "MemorySize",
"Type": "Int",
- "Desc": "The maximum number of examples to use when training the calibrator",
+ "Desc": "Memory size for L-BFGS. Lower=faster, less accurate",
+ "Aliases": [
+ "m"
+ ],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 50.0,
"IsNullable": false,
- "Default": 1000000
+ "Default": 20,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 5,
+ 20,
+ 50
+ ]
+ }
},
{
- "Name": "ConvergenceTolerance",
+ "Name": "EnforceNonNegativity",
+ "Type": "Bool",
+ "Desc": "Enforce non-negative weights",
+ "Aliases": [
+ "nn"
+ ],
+ "Required": false,
+ "SortOrder": 90.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "InitWtsDiameter",
"Type": "Float",
- "Desc": "The tolerance for the ratio between duality gap and primal loss for convergence checking.",
+ "Desc": "Init weights diameter",
"Aliases": [
- "tol"
+ "initwts"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 140.0,
"IsNullable": false,
- "Default": 0.1,
+ "Default": 0.0,
"SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 0.001,
- 0.01,
- 0.1,
- 0.2
- ]
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 1.0,
+ "NumSteps": 5
}
},
{
"Name": "MaxIterations",
"Type": "Int",
- "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
+ "Desc": "Maximum iterations.",
"Aliases": [
- "iter"
+ "maxiter"
],
"Required": false,
"SortOrder": 150.0,
- "IsNullable": true,
- "Default": null,
+ "IsNullable": false,
+ "Default": 2147483647,
"SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 10,
- 20,
- 100
- ]
+ "RangeType": "Long",
+ "Min": 1,
+ "Max": 2147483647
}
},
{
- "Name": "Shuffle",
+ "Name": "SgdInitializationTolerance",
+ "Type": "Float",
+ "Desc": "Run SGD to initialize LR weights, converging to this tolerance",
+ "Aliases": [
+ "sgd"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0
+ },
+ {
+ "Name": "Quiet",
"Type": "Bool",
- "Desc": "Shuffle data every epoch?",
+ "Desc": "If set to true, produce no output during training.",
"Aliases": [
- "shuf"
+ "q"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": true,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- false,
- true
- ]
- }
+ "Default": false
},
{
- "Name": "CheckFrequency",
+ "Name": "UseThreads",
+ "Type": "Bool",
+ "Desc": "Whether or not to use threads. Default is true",
+ "Aliases": [
+ "t"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "NumThreads",
"Type": "Int",
- "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
+ "Desc": "Number of threads",
"Aliases": [
- "checkFreq"
+ "nt"
],
"Required": false,
"SortOrder": 150.0,
@@ -12721,23 +13460,21 @@
"Default": null
},
{
- "Name": "BiasLearningRate",
- "Type": "Float",
- "Desc": "The learning rate for adjusting bias from being regularized.",
+ "Name": "DenseOptimizer",
+ "Type": "Bool",
+ "Desc": "Force densification of the internal optimization vectors",
"Aliases": [
- "blr"
+ "do"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.0,
+ "Default": false,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 0.0,
- 0.01,
- 0.1,
- 1.0
+ false,
+ true
]
}
}
@@ -12750,44 +13487,21 @@
}
],
"InputKind": [
+ "ITrainerInputWithWeight",
"ITrainerInputWithLabel",
"ITrainerInput"
],
"OutputKind": [
- "IBinaryClassificationOutput",
+ "IMulticlassClassificationOutput",
"ITrainerOutput"
]
},
{
- "Name": "Trainers.StochasticDualCoordinateAscentClassifier",
- "Desc": "Train an SDCA multi class model",
- "FriendlyName": "Fast Linear Multi-class Classification (SA-SDCA)",
- "ShortName": "sasdcamc",
+ "Name": "Trainers.NaiveBayesClassifier",
+ "Desc": "Train a MultiClassNaiveBayesTrainer.",
+ "FriendlyName": "Multiclass Naive Bayes",
+ "ShortName": "MNB",
"Inputs": [
- {
- "Name": "L2Const",
- "Type": "Float",
- "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
- "Aliases": [
- "l2"
- ],
- "Required": false,
- "SortOrder": 1.0,
- "IsNullable": true,
- "Default": null,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 1E-07,
- 1E-06,
- 1E-05,
- 0.0001,
- 0.001,
- 0.01
- ]
- }
- },
{
"Name": "TrainingData",
"Type": "DataView",
@@ -12799,29 +13513,6 @@
"SortOrder": 1.0,
"IsNullable": false
},
- {
- "Name": "L1Threshold",
- "Type": "Float",
- "Desc": "L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set.",
- "Aliases": [
- "l1"
- ],
- "Required": false,
- "SortOrder": 2.0,
- "IsNullable": true,
- "Default": null,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 0.0,
- 0.25,
- 0.5,
- 0.75,
- 1.0
- ]
- }
- },
{
"Name": "FeatureColumn",
"Type": "String",
@@ -12885,213 +13576,41 @@
"SortOrder": 6.0,
"IsNullable": false,
"Default": "Auto"
- },
+ }
+ ],
+ "Outputs": [
{
- "Name": "LossFunction",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "SDCAClassificationLossFunction"
- },
- "Desc": "Loss Function",
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The trained model"
+ }
+ ],
+ "InputKind": [
+ "ITrainerInputWithLabel",
+ "ITrainerInput"
+ ],
+ "OutputKind": [
+ "IMulticlassClassificationOutput",
+ "ITrainerOutput"
+ ]
+ },
+ {
+ "Name": "Trainers.OnlineGradientDescentRegressor",
+ "Desc": "Train a Online gradient descent perceptron.",
+ "FriendlyName": "Stochastic Gradient Descent (Regression)",
+ "ShortName": "ogd",
+ "Inputs": [
+ {
+ "Name": "TrainingData",
+ "Type": "DataView",
+ "Desc": "The data to be used for training",
"Aliases": [
- "loss"
- ],
- "Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": {
- "Name": "LogLoss"
- }
- },
- {
- "Name": "NumThreads",
- "Type": "Int",
- "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
- "Aliases": [
- "nt",
- "t",
- "threads"
- ],
- "Required": false,
- "SortOrder": 50.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "ConvergenceTolerance",
- "Type": "Float",
- "Desc": "The tolerance for the ratio between duality gap and primal loss for convergence checking.",
- "Aliases": [
- "tol"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 0.1,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 0.001,
- 0.01,
- 0.1,
- 0.2
- ]
- }
- },
- {
- "Name": "MaxIterations",
- "Type": "Int",
- "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
- "Aliases": [
- "iter"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 10,
- 20,
- 100
- ]
- }
- },
- {
- "Name": "Shuffle",
- "Type": "Bool",
- "Desc": "Shuffle data every epoch?",
- "Aliases": [
- "shuf"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- false,
- true
- ]
- }
- },
- {
- "Name": "CheckFrequency",
- "Type": "Int",
- "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
- "Aliases": [
- "checkFreq"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "BiasLearningRate",
- "Type": "Float",
- "Desc": "The learning rate for adjusting bias from being regularized.",
- "Aliases": [
- "blr"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 0.0,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 0.0,
- 0.01,
- 0.1,
- 1.0
- ]
- }
- }
- ],
- "Outputs": [
- {
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "The trained model"
- }
- ],
- "InputKind": [
- "ITrainerInputWithLabel",
- "ITrainerInput"
- ],
- "OutputKind": [
- "IMulticlassClassificationOutput",
- "ITrainerOutput"
- ]
- },
- {
- "Name": "Trainers.StochasticDualCoordinateAscentRegressor",
- "Desc": "Train an SDCA regression model",
- "FriendlyName": "Fast Linear Regression (SA-SDCA)",
- "ShortName": "sasdcar",
- "Inputs": [
- {
- "Name": "L2Const",
- "Type": "Float",
- "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
- "Aliases": [
- "l2"
- ],
- "Required": false,
- "SortOrder": 1.0,
- "IsNullable": true,
- "Default": null,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 1E-07,
- 1E-06,
- 1E-05,
- 0.0001,
- 0.001,
- 0.01
- ]
- }
- },
- {
- "Name": "TrainingData",
- "Type": "DataView",
- "Desc": "The data to be used for training",
- "Aliases": [
- "data"
+ "data"
],
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
- {
- "Name": "L1Threshold",
- "Type": "Float",
- "Desc": "L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set.",
- "Aliases": [
- "l1"
- ],
- "Required": false,
- "SortOrder": 2.0,
- "IsNullable": true,
- "Default": null,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 0.0,
- 0.25,
- 0.5,
- 0.75,
- 1.0
- ]
- }
- },
{
"Name": "FeatureColumn",
"Type": "String",
@@ -13160,7 +13679,7 @@
"Name": "LossFunction",
"Type": {
"Kind": "Component",
- "ComponentKind": "SDCARegressionLossFunction"
+ "ComponentKind": "RegressionLossFunction"
},
"Desc": "Loss Function",
"Aliases": [
@@ -13174,70 +13693,35 @@
}
},
{
- "Name": "NumThreads",
- "Type": "Int",
- "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
- "Aliases": [
- "nt",
- "t",
- "threads"
- ],
- "Required": false,
- "SortOrder": 50.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "ConvergenceTolerance",
+ "Name": "LearningRate",
"Type": "Float",
- "Desc": "The tolerance for the ratio between duality gap and primal loss for convergence checking.",
+ "Desc": "Learning rate",
"Aliases": [
- "tol"
+ "lr"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 50.0,
"IsNullable": false,
- "Default": 0.01,
+ "Default": 0.1,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 0.001,
0.01,
0.1,
- 0.2
- ]
- }
- },
- {
- "Name": "MaxIterations",
- "Type": "Int",
- "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
- "Aliases": [
- "iter"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- "",
- 10,
- 20,
- 100
+ 0.5,
+ 1.0
]
}
},
{
- "Name": "Shuffle",
+ "Name": "DecreaseLearningRate",
"Type": "Bool",
- "Desc": "Shuffle data every epoch?",
+ "Desc": "Decrease learning rate",
"Aliases": [
- "shuf"
+ "decreaselr"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 50.0,
"IsNullable": false,
"Default": true,
"SweepRange": {
@@ -13249,11 +13733,65 @@
}
},
{
- "Name": "CheckFrequency",
- "Type": "Int",
- "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
+ "Name": "L2RegularizerWeight",
+ "Type": "Float",
+ "Desc": "L2 Regularization Weight",
"Aliases": [
- "checkFreq"
+ "reg"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 0.5
+ }
+ },
+ {
+ "Name": "NumIterations",
+ "Type": "Int",
+ "Desc": "Number of iterations",
+ "Aliases": [
+ "iter"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 1,
+ "SweepRange": {
+ "RangeType": "Long",
+ "Min": 1,
+ "Max": 100,
+ "StepSize": 10.0,
+ "IsLogScale": true
+ }
+ },
+ {
+ "Name": "InitWtsDiameter",
+ "Type": "Float",
+ "Desc": "Init weights diameter",
+ "Aliases": [
+ "initwts"
+ ],
+ "Required": false,
+ "SortOrder": 140.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 1.0,
+ "NumSteps": 5
+ }
+ },
+ {
+ "Name": "ResetWeightsAfterXExamples",
+ "Type": "Int",
+ "Desc": "Number of examples after which weights will be reset to the current average",
+ "Aliases": [
+ "numreset"
],
"Required": false,
"SortOrder": 150.0,
@@ -13261,25 +13799,107 @@
"Default": null
},
{
- "Name": "BiasLearningRate",
+ "Name": "DoLazyUpdates",
+ "Type": "Bool",
+ "Desc": "Instead of updating averaged weights on every example, only update when loss is nonzero",
+ "Aliases": [
+ "lazy"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "RecencyGain",
"Type": "Float",
- "Desc": "The learning rate for adjusting bias from being regularized.",
+ "Desc": "Extra weight given to more recent updates",
"Aliases": [
- "blr"
+ "rg"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1.0,
+ "Default": 0.0
+ },
+ {
+ "Name": "RecencyGainMulti",
+ "Type": "Bool",
+ "Desc": "Whether Recency Gain is multiplicative (vs. additive)",
+ "Aliases": [
+ "rgm"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "Averaged",
+ "Type": "Bool",
+ "Desc": "Do averaging?",
+ "Aliases": [
+ "avg"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "AveragedTolerance",
+ "Type": "Float",
+ "Desc": "The inexactness tolerance for averaging",
+ "Aliases": [
+ "avgtol"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.01
+ },
+ {
+ "Name": "InitialWeights",
+ "Type": "String",
+ "Desc": "Initial Weights and bias, comma-separated",
+ "Aliases": [
+ "initweights"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Shuffle",
+ "Type": "Bool",
+ "Desc": "Whether to shuffle for each training iteration",
+ "Aliases": [
+ "shuf"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 0.0,
- 0.01,
- 0.1,
- 1.0
+ false,
+ true
]
}
+ },
+ {
+ "Name": "StreamingCacheSize",
+ "Type": "Int",
+ "Desc": "Size of cache when trained in Scope",
+ "Aliases": [
+ "cache"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1000000
}
],
"Outputs": [
@@ -13299,10 +13919,10 @@
]
},
{
- "Name": "Trainers.StochasticGradientDescentBinaryClassifier",
- "Desc": "Train an Hogwild SGD binary model.",
- "FriendlyName": "Hogwild SGD (binary)",
- "ShortName": "HogwildSGD",
+ "Name": "Trainers.PcaAnomalyDetector",
+ "Desc": "Train an PCA Anomaly model.",
+ "FriendlyName": "PCA Anomaly Detector",
+ "ShortName": "pcaAnom",
"Inputs": [
{
"Name": "TrainingData",
@@ -13327,18 +13947,6 @@
"IsNullable": false,
"Default": "Features"
},
- {
- "Name": "LabelColumn",
- "Type": "String",
- "Desc": "Column to use for labels",
- "Aliases": [
- "lab"
- ],
- "Required": false,
- "SortOrder": 3.0,
- "IsNullable": false,
- "Default": "Label"
- },
{
"Name": "WeightColumn",
"Type": "String",
@@ -13392,178 +14000,73 @@
"Default": "Auto"
},
{
- "Name": "LossFunction",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "ClassificationLossFunction"
- },
- "Desc": "Loss Function",
- "Aliases": [
- "loss"
- ],
- "Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": {
- "Name": "LogLoss"
- }
- },
- {
- "Name": "L2Const",
- "Type": "Float",
- "Desc": "L2 regularizer constant",
+ "Name": "Rank",
+ "Type": "Int",
+ "Desc": "The number of components in the PCA",
"Aliases": [
- "l2"
+ "k"
],
"Required": false,
"SortOrder": 50.0,
"IsNullable": false,
- "Default": 1E-06,
+ "Default": 20,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 1E-07,
- 5E-07,
- 1E-06,
- 5E-06,
- 1E-05
+ 10,
+ 20,
+ 40,
+ 80
]
}
},
{
- "Name": "NumThreads",
+ "Name": "Oversampling",
"Type": "Int",
- "Desc": "Degree of lock-free parallelism. Defaults to automatic depending on data sparseness. Determinism not guaranteed.",
- "Aliases": [
- "nt",
- "t",
- "threads"
- ],
+ "Desc": "Oversampling parameter for randomized PCA training",
"Required": false,
"SortOrder": 50.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "ConvergenceTolerance",
- "Type": "Float",
- "Desc": "Exponential moving averaged improvement tolerance for convergence",
- "Aliases": [
- "tol"
- ],
- "Required": false,
- "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0.0001,
+ "Default": 20,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 0.01,
- 0.001,
- 0.0001,
- 1E-05
+ 10,
+ 20,
+ 40
]
}
},
{
- "Name": "MaxIterations",
- "Type": "Int",
- "Desc": "Maximum number of iterations; set to 1 to simulate online learning.",
+ "Name": "Center",
+ "Type": "Bool",
+ "Desc": "If enabled, data is centered to be zero mean",
"Aliases": [
- "iter"
+ "center"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 20,
+ "Default": true,
"SweepRange": {
"RangeType": "Discrete",
"Values": [
- 1,
- 5,
- 10,
- 20
+ false,
+ true
]
}
},
{
- "Name": "InitLearningRate",
- "Type": "Float",
- "Desc": "Initial learning rate (only used by SGD)",
- "Aliases": [
- "ilr",
- "lr"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 0.01
- },
- {
- "Name": "Shuffle",
- "Type": "Bool",
- "Desc": "Shuffle data every epoch?",
- "Aliases": [
- "shuf"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- false,
- true
- ]
- }
- },
- {
- "Name": "PositiveInstanceWeight",
- "Type": "Float",
- "Desc": "Apply weight to the positive class, for imbalanced data",
- "Aliases": [
- "piw"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 1.0
- },
- {
- "Name": "CheckFrequency",
+ "Name": "Seed",
"Type": "Int",
- "Desc": "Convergence check frequency (in terms of number of iterations). Default equals number of threads",
+ "Desc": "The seed for random number generation",
"Aliases": [
- "checkFreq"
+ "seed"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
"Default": null
- },
- {
- "Name": "Calibrator",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "CalibratorTrainer"
- },
- "Desc": "The calibrator kind to apply to the predictor. Specify null for no calibration",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": {
- "Name": "PlattCalibrator"
- }
- },
- {
- "Name": "MaxCalibrationExamples",
- "Type": "Int",
- "Desc": "The maximum number of examples to use when training the calibrator",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 1000000
}
],
"Outputs": [
@@ -13574,247 +14077,259 @@
}
],
"InputKind": [
- "ITrainerInputWithWeight",
- "ITrainerInputWithLabel",
+ "IUnsupervisedTrainerWithWeight",
"ITrainerInput"
],
"OutputKind": [
- "IBinaryClassificationOutput",
+ "IAnomalyDetectionOutput",
"ITrainerOutput"
]
},
{
- "Name": "Transforms.ApproximateBootstrapSampler",
- "Desc": "Approximate bootstrap sampling.",
- "FriendlyName": "Bootstrap Sample Transform",
- "ShortName": "BootstrapSample",
+ "Name": "Trainers.PoissonRegressor",
+ "Desc": "Train an Poisson regression model.",
+ "FriendlyName": "Poisson Regression",
+ "ShortName": "PR",
"Inputs": [
{
- "Name": "Data",
+ "Name": "TrainingData",
"Type": "DataView",
- "Desc": "Input dataset",
+ "Desc": "The data to be used for training",
+ "Aliases": [
+ "data"
+ ],
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "Complement",
- "Type": "Bool",
- "Desc": "Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform.",
+ "Name": "FeatureColumn",
+ "Type": "String",
+ "Desc": "Column to use for features",
"Aliases": [
- "comp"
+ "feat"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 2.0,
"IsNullable": false,
- "Default": false
+ "Default": "Features"
},
{
- "Name": "Seed",
- "Type": "UInt",
- "Desc": "The random seed. If unspecified random state will be instead derived from the environment.",
+ "Name": "LabelColumn",
+ "Type": "String",
+ "Desc": "Column to use for labels",
+ "Aliases": [
+ "lab"
+ ],
"Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": "Label"
},
{
- "Name": "ShuffleInput",
- "Type": "Bool",
- "Desc": "Whether we should attempt to shuffle the source data. By default on, but can be turned off for efficiency.",
+ "Name": "WeightColumn",
+ "Type": "String",
+ "Desc": "Column to use for example weight",
"Aliases": [
- "si"
+ "weight"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 4.0,
"IsNullable": false,
- "Default": true
+ "Default": "Weight"
},
{
- "Name": "PoolSize",
- "Type": "Int",
- "Desc": "When shuffling the output, the number of output rows to keep in that pool. Note that shuffling of output is completely distinct from shuffling of input.",
+ "Name": "NormalizeFeatures",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "No",
+ "Warn",
+ "Auto",
+ "Yes"
+ ]
+ },
+ "Desc": "Normalize option for the feature column",
"Aliases": [
- "pool"
+ "norm"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 5.0,
"IsNullable": false,
- "Default": 1000
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Default": "Auto"
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.BinaryPredictionScoreColumnsRenamer",
- "Desc": "For binary prediction, it renames the PredictedLabel and Score columns to include the name of the positive class.",
- "FriendlyName": "Rename Binary Prediction Score Columns",
- "ShortName": null,
- "Inputs": [
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "Caching",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Auto",
+ "Memory",
+ "Disk",
+ "None"
+ ]
+ },
+ "Desc": "Whether learner should cache input training data",
+ "Aliases": [
+ "cache"
+ ],
+ "Required": false,
+ "SortOrder": 6.0,
+ "IsNullable": false,
+ "Default": "Auto"
},
{
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "The predictor model used in scoring",
- "Required": true,
- "SortOrder": 2.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
+ "Name": "L2Weight",
+ "Type": "Float",
+ "Desc": "L2 regularization weight",
+ "Aliases": [
+ "l2"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 1.0,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 1.0,
+ "NumSteps": 4
+ }
+ },
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Name": "L1Weight",
+ "Type": "Float",
+ "Desc": "L1 regularization weight",
+ "Aliases": [
+ "l1"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 1.0,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 1.0,
+ "NumSteps": 4
+ }
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.BinNormalizer",
- "Desc": "The values are assigned into equidensity bins and a value is mapped to its bin_number/number_of_bins.",
- "FriendlyName": "Binning Normalizer",
- "ShortName": "Bin",
- "Inputs": [
+ "Name": "OptTol",
+ "Type": "Float",
+ "Desc": "Tolerance parameter for optimization convergence. Lower = slower, more accurate",
+ "Aliases": [
+ "ot"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 1E-07,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0001,
+ 1E-07
+ ]
+ }
+ },
{
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "NumBins",
- "Type": "Int",
- "Desc": "Max number of bins, power of 2 recommended",
- "Aliases": [
- "bins"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "FixZero",
- "Type": "Bool",
- "Desc": "Whether to map zero to zero, preserving sparsity",
- "Aliases": [
- "zero"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "MaxTrainingExamples",
- "Type": "Int",
- "Desc": "Max number of examples used to train the normalizer",
- "Aliases": [
- "maxtrain"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:src)",
+ "Name": "MemorySize",
+ "Type": "Int",
+ "Desc": "Memory size for L-BFGS. Lower=faster, less accurate",
"Aliases": [
- "col"
+ "m"
],
"Required": false,
- "SortOrder": 1.0,
+ "SortOrder": 50.0,
"IsNullable": false,
- "Default": null
+ "Default": 20,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 5,
+ 20,
+ 50
+ ]
+ }
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "EnforceNonNegativity",
+ "Type": "Bool",
+ "Desc": "Enforce non-negative weights",
+ "Aliases": [
+ "nn"
+ ],
+ "Required": false,
+ "SortOrder": 90.0,
+ "IsNullable": false,
+ "Default": false
},
{
- "Name": "NumBins",
+ "Name": "InitWtsDiameter",
+ "Type": "Float",
+ "Desc": "Init weights diameter",
+ "Aliases": [
+ "initwts"
+ ],
+ "Required": false,
+ "SortOrder": 140.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "SweepRange": {
+ "RangeType": "Float",
+ "Min": 0.0,
+ "Max": 1.0,
+ "NumSteps": 5
+ }
+ },
+ {
+ "Name": "MaxIterations",
"Type": "Int",
- "Desc": "Max number of bins, power of 2 recommended",
+ "Desc": "Maximum iterations.",
"Aliases": [
- "bins"
+ "maxiter"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1024
+ "Default": 2147483647,
+ "SweepRange": {
+ "RangeType": "Long",
+ "Min": 1,
+ "Max": 2147483647
+ }
},
{
- "Name": "FixZero",
+ "Name": "SgdInitializationTolerance",
+ "Type": "Float",
+ "Desc": "Run SGD to initialize LR weights, converging to this tolerance",
+ "Aliases": [
+ "sgd"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0
+ },
+ {
+ "Name": "Quiet",
"Type": "Bool",
- "Desc": "Whether to map zero to zero, preserving sparsity",
+ "Desc": "If set to true, produce no output during training.",
"Aliases": [
- "zero"
+ "q"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "UseThreads",
+ "Type": "Bool",
+ "Desc": "Whether or not to use threads. Default is true",
+ "Aliases": [
+ "t"
],
"Required": false,
"SortOrder": 150.0,
@@ -13822,1471 +14337,1189 @@
"Default": true
},
{
- "Name": "MaxTrainingExamples",
+ "Name": "NumThreads",
"Type": "Int",
- "Desc": "Max number of examples used to train the normalizer",
+ "Desc": "Number of threads",
"Aliases": [
- "maxtrain"
+ "nt"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "DenseOptimizer",
+ "Type": "Bool",
+ "Desc": "Force densification of the internal optimization vectors",
+ "Aliases": [
+ "do"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1000000000
+ "Default": false,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ false,
+ true
+ ]
+ }
}
],
"Outputs": [
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
- },
- {
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The trained model"
}
],
"InputKind": [
- "ITransformInput"
+ "ITrainerInputWithWeight",
+ "ITrainerInputWithLabel",
+ "ITrainerInput"
],
"OutputKind": [
- "ITransformOutput"
+ "IRegressionOutput",
+ "ITrainerOutput"
]
},
{
- "Name": "Transforms.CategoricalHashOneHotVectorizer",
- "Desc": "Encodes the categorical variable with hash-based encoding",
- "FriendlyName": "Categorical Hash Transform",
- "ShortName": null,
+ "Name": "Trainers.StochasticDualCoordinateAscentBinaryClassifier",
+ "Desc": "Train an SDCA binary model.",
+ "FriendlyName": "Fast Linear (SA-SDCA)",
+ "ShortName": "SDCA",
"Inputs": [
{
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "OutputKind",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Bag",
- "Ind",
- "Key",
- "Bin"
- ]
- },
- "Desc": "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)",
- "Aliases": [
- "kind"
- ],
- "Required": false,
- "SortOrder": 102.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "HashBits",
- "Type": "Int",
- "Desc": "The number of bits to hash into. Must be between 1 and 30, inclusive.",
- "Aliases": [
- "bits"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Seed",
- "Type": "UInt",
- "Desc": "Hashing seed",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Ordered",
- "Type": "Bool",
- "Desc": "Whether the position of each term should be included in the hash",
- "Aliases": [
- "ord"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "InvertHash",
- "Type": "Int",
- "Desc": "Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.",
- "Aliases": [
- "ih"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:hashBits:src)",
+ "Name": "L2Const",
+ "Type": "Float",
+ "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
"Aliases": [
- "col"
+ "l2"
],
- "Required": true,
+ "Required": false,
"SortOrder": 1.0,
- "IsNullable": false
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ "",
+ 1E-07,
+ 1E-06,
+ 1E-05,
+ 0.0001,
+ 0.001,
+ 0.01
+ ]
+ }
},
{
- "Name": "Data",
+ "Name": "TrainingData",
"Type": "DataView",
- "Desc": "Input dataset",
+ "Desc": "The data to be used for training",
+ "Aliases": [
+ "data"
+ ],
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "HashBits",
- "Type": "Int",
- "Desc": "Number of bits to hash into. Must be between 1 and 30, inclusive.",
+ "Name": "L1Threshold",
+ "Type": "Float",
+ "Desc": "L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set.",
"Aliases": [
- "bits"
+ "l1"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ "",
+ 0.0,
+ 0.25,
+ 0.5,
+ 0.75,
+ 1.0
+ ]
+ }
+ },
+ {
+ "Name": "FeatureColumn",
+ "Type": "String",
+ "Desc": "Column to use for features",
+ "Aliases": [
+ "feat"
],
"Required": false,
"SortOrder": 2.0,
"IsNullable": false,
- "Default": 16
+ "Default": "Features"
},
{
- "Name": "OutputKind",
+ "Name": "LabelColumn",
+ "Type": "String",
+ "Desc": "Column to use for labels",
+ "Aliases": [
+ "lab"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": "Label"
+ },
+ {
+ "Name": "NormalizeFeatures",
"Type": {
"Kind": "Enum",
"Values": [
- "Bag",
- "Ind",
- "Key",
- "Bin"
+ "No",
+ "Warn",
+ "Auto",
+ "Yes"
]
},
- "Desc": "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)",
+ "Desc": "Normalize option for the feature column",
"Aliases": [
- "kind"
+ "norm"
],
"Required": false,
- "SortOrder": 102.0,
+ "SortOrder": 5.0,
"IsNullable": false,
- "Default": "Bag"
+ "Default": "Auto"
},
{
- "Name": "Seed",
- "Type": "UInt",
- "Desc": "Hashing seed",
+ "Name": "Caching",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Auto",
+ "Memory",
+ "Disk",
+ "None"
+ ]
+ },
+ "Desc": "Whether learner should cache input training data",
+ "Aliases": [
+ "cache"
+ ],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 6.0,
"IsNullable": false,
- "Default": 314489979
+ "Default": "Auto"
},
{
- "Name": "Ordered",
- "Type": "Bool",
- "Desc": "Whether the position of each term should be included in the hash",
+ "Name": "LossFunction",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "SDCAClassificationLossFunction"
+ },
+ "Desc": "Loss Function",
"Aliases": [
- "ord"
+ "loss"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 50.0,
"IsNullable": false,
- "Default": true
+ "Default": {
+ "Name": "LogLoss"
+ }
},
{
- "Name": "InvertHash",
+ "Name": "NumThreads",
"Type": "Int",
- "Desc": "Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.",
+ "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
"Aliases": [
- "ih"
+ "nt",
+ "t",
+ "threads"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "PositiveInstanceWeight",
+ "Type": "Float",
+ "Desc": "Apply weight to the positive class, for imbalanced data",
+ "Aliases": [
+ "piw"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 0
- }
- ],
- "Outputs": [
+ "Default": 1.0
+ },
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Name": "Calibrator",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "CalibratorTrainer"
+ },
+ "Desc": "The calibrator kind to apply to the predictor. Specify null for no calibration",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "PlattCalibrator"
+ }
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.CategoricalOneHotVectorizer",
- "Desc": "Encodes the categorical variable with one-hot encoding based on term dictionary",
- "FriendlyName": "Categorical Transform",
- "ShortName": null,
- "Inputs": [
- {
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "OutputKind",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Bag",
- "Ind",
- "Key",
- "Bin"
- ]
- },
- "Desc": "Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector",
- "Aliases": [
- "kind"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "MaxNumTerms",
- "Type": "Int",
- "Desc": "Maximum number of terms to keep when auto-training",
- "Aliases": [
- "max"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Term",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "List of terms",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Sort",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Occurrence",
- "Value"
- ]
- },
- "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "TextKeyValues",
- "Type": "Bool",
- "Desc": "Whether key value metadata should be text, regardless of the actual input type",
- "Aliases": [
- "textkv"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:src)",
- "Aliases": [
- "col"
- ],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "MaxNumTerms",
+ "Name": "MaxCalibrationExamples",
"Type": "Int",
- "Desc": "Maximum number of terms to keep per column when auto-training",
- "Aliases": [
- "max"
- ],
+ "Desc": "The maximum number of examples to use when training the calibrator",
"Required": false,
- "SortOrder": 5.0,
+ "SortOrder": 150.0,
"IsNullable": false,
"Default": 1000000
},
{
- "Name": "OutputKind",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Bag",
- "Ind",
- "Key",
- "Bin"
- ]
- },
- "Desc": "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)",
+ "Name": "ConvergenceTolerance",
+ "Type": "Float",
+ "Desc": "The tolerance for the ratio between duality gap and primal loss for convergence checking.",
"Aliases": [
- "kind"
+ "tol"
],
"Required": false,
- "SortOrder": 102.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": "Ind"
+ "Default": 0.1,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.001,
+ 0.01,
+ 0.1,
+ 0.2
+ ]
+ }
},
{
- "Name": "Term",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "List of terms",
+ "Name": "MaxIterations",
+ "Type": "Int",
+ "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
+ "Aliases": [
+ "iter"
+ ],
"Required": false,
- "SortOrder": 106.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Sort",
- "Type": {
- "Kind": "Enum",
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
"Values": [
- "Occurrence",
- "Value"
+ "",
+ 10,
+ 20,
+ 100
]
- },
- "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
- "Required": false,
- "SortOrder": 113.0,
- "IsNullable": false,
- "Default": "Occurrence"
+ }
},
{
- "Name": "TextKeyValues",
+ "Name": "Shuffle",
"Type": "Bool",
- "Desc": "Whether key value metadata should be text, regardless of the actual input type",
+ "Desc": "Shuffle data every epoch?",
"Aliases": [
- "textkv"
+ "shuf"
],
"Required": false,
- "SortOrder": 114.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": true
- }
- ],
- "Outputs": [
+ "Default": true,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ false,
+ true
+ ]
+ }
+ },
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Name": "CheckFrequency",
+ "Type": "Int",
+ "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
+ "Aliases": [
+ "checkFreq"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
+ "Name": "BiasLearningRate",
+ "Type": "Float",
+ "Desc": "The learning rate for adjusting bias from being regularized.",
+ "Aliases": [
+ "blr"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.01,
+ 0.1,
+ 1.0
+ ]
+ }
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The trained model"
+ }
+ ],
+ "InputKind": [
+ "ITrainerInputWithLabel",
+ "ITrainerInput"
],
"OutputKind": [
- "ITransformOutput"
+ "IBinaryClassificationOutput",
+ "ITrainerOutput"
]
},
{
- "Name": "Transforms.CharacterTokenizer",
- "Desc": "Character-oriented tokenizer where text is considered a sequence of characters.",
- "FriendlyName": "Character Tokenizer Transform",
- "ShortName": "CharToken",
+ "Name": "Trainers.StochasticDualCoordinateAscentClassifier",
+ "Desc": "The SDCA linear multi-class classification trainer.",
+ "FriendlyName": "Fast Linear Multi-class Classification (SA-SDCA)",
+ "ShortName": "sasdcamc",
"Inputs": [
{
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:src)",
+ "Name": "L2Const",
+ "Type": "Float",
+ "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
"Aliases": [
- "col"
+ "l2"
],
- "Required": true,
+ "Required": false,
"SortOrder": 1.0,
- "IsNullable": false
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ "",
+ 1E-07,
+ 1E-06,
+ 1E-05,
+ 0.0001,
+ 0.001,
+ 0.01
+ ]
+ }
},
{
- "Name": "Data",
+ "Name": "TrainingData",
"Type": "DataView",
- "Desc": "Input dataset",
+ "Desc": "The data to be used for training",
+ "Aliases": [
+ "data"
+ ],
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "UseMarkerChars",
- "Type": "Bool",
- "Desc": "Whether to mark the beginning/end of each row/slot with start of text character (0x02)/end of text character (0x03)",
+ "Name": "L1Threshold",
+ "Type": "Float",
+ "Desc": "L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set.",
"Aliases": [
- "mark"
+ "l1"
],
"Required": false,
"SortOrder": 2.0,
- "IsNullable": false,
- "Default": true
- }
- ],
- "Outputs": [
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ "",
+ 0.0,
+ 0.25,
+ 0.5,
+ 0.75,
+ 1.0
+ ]
+ }
+ },
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Name": "FeatureColumn",
+ "Type": "String",
+ "Desc": "Column to use for features",
+ "Aliases": [
+ "feat"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": "Features"
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.ColumnConcatenator",
- "Desc": "Concatenates two columns of the same item type.",
- "FriendlyName": "Concat Transform",
- "ShortName": "Concat",
- "Inputs": [
+ "Name": "LabelColumn",
+ "Type": "String",
+ "Desc": "Column to use for labels",
+ "Aliases": [
+ "lab"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": "Label"
+ },
{
- "Name": "Column",
+ "Name": "NormalizeFeatures",
"Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
+ "Kind": "Enum",
+ "Values": [
+ "No",
+ "Warn",
+ "Auto",
+ "Yes"
+ ]
},
- "Desc": "New column definition(s) (optional form: name:srcs)",
+ "Desc": "Normalize option for the feature column",
"Aliases": [
- "col"
+ "norm"
],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": "Auto"
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
+ "Name": "Caching",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Auto",
+ "Memory",
+ "Disk",
+ "None"
+ ]
+ },
+ "Desc": "Whether learner should cache input training data",
+ "Aliases": [
+ "cache"
+ ],
+ "Required": false,
+ "SortOrder": 6.0,
+ "IsNullable": false,
+ "Default": "Auto"
+ },
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
- },
- {
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.ColumnCopier",
- "Desc": "Duplicates columns from the dataset",
- "FriendlyName": "Copy Columns Transform",
- "ShortName": "Copy",
- "Inputs": [
- {
- "Name": "Column",
+ "Name": "LossFunction",
"Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
+ "Kind": "Component",
+ "ComponentKind": "SDCAClassificationLossFunction"
},
- "Desc": "New column definition(s) (optional form: name:src)",
+ "Desc": "Loss Function",
"Aliases": [
- "col"
+ "loss"
],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "LogLoss"
+ }
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
+ "Name": "NumThreads",
+ "Type": "Int",
+ "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
+ "Aliases": [
+ "nt",
+ "t",
+ "threads"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": true,
+ "Default": null
+ },
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Name": "ConvergenceTolerance",
+ "Type": "Float",
+ "Desc": "The tolerance for the ratio between duality gap and primal loss for convergence checking.",
+ "Aliases": [
+ "tol"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.1,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.001,
+ 0.01,
+ 0.1,
+ 0.2
+ ]
+ }
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.ColumnDropper",
- "Desc": "Drops columns from the dataset",
- "FriendlyName": "Drop Columns Transform",
- "ShortName": "Drop",
- "Inputs": [
+ "Name": "MaxIterations",
+ "Type": "Int",
+ "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
+ "Aliases": [
+ "iter"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ "",
+ 10,
+ 20,
+ 100
+ ]
+ }
+ },
{
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "Column name to drop",
+ "Name": "Shuffle",
+ "Type": "Bool",
+ "Desc": "Shuffle data every epoch?",
"Aliases": [
- "col"
+ "shuf"
],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ false,
+ true
+ ]
+ }
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "CheckFrequency",
+ "Type": "Int",
+ "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
+ "Aliases": [
+ "checkFreq"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "BiasLearningRate",
+ "Type": "Float",
+ "Desc": "The learning rate for adjusting bias from being regularized.",
+ "Aliases": [
+ "blr"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.01,
+ 0.1,
+ 1.0
+ ]
+ }
}
],
"Outputs": [
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
- },
- {
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The trained model"
}
],
"InputKind": [
- "ITransformInput"
+ "ITrainerInputWithLabel",
+ "ITrainerInput"
],
"OutputKind": [
- "ITransformOutput"
+ "IMulticlassClassificationOutput",
+ "ITrainerOutput"
]
},
{
- "Name": "Transforms.ColumnSelector",
- "Desc": "Selects a set of columns, dropping all others",
- "FriendlyName": "Select Columns",
- "ShortName": null,
+ "Name": "Trainers.StochasticDualCoordinateAscentRegressor",
+ "Desc": "The SDCA linear regression trainer.",
+ "FriendlyName": "Fast Linear Regression (SA-SDCA)",
+ "ShortName": "sasdcar",
"Inputs": [
{
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "Column name to keep",
+ "Name": "L2Const",
+ "Type": "Float",
+ "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
"Aliases": [
- "col"
+ "l2"
],
"Required": false,
"SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ "",
+ 1E-07,
+ 1E-06,
+ 1E-05,
+ 0.0001,
+ 0.001,
+ 0.01
+ ]
+ }
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.ColumnTypeConverter",
- "Desc": "Converts a column to a different type, using standard conversions.",
- "FriendlyName": "Convert Transform",
- "ShortName": "Convert",
- "Inputs": [
- {
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "ResultType",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "I1",
- "U1",
- "I2",
- "U2",
- "I4",
- "U4",
- "I8",
- "U8",
- "R4",
- "Num",
- "R8",
- "TX",
- "Text",
- "TXT",
- "BL",
- "Bool",
- "TimeSpan",
- "TS",
- "DT",
- "DateTime",
- "DZ",
- "DateTimeZone",
- "UG",
- "U16"
- ]
- },
- "Desc": "The result type",
- "Aliases": [
- "type"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Range",
- "Type": "String",
- "Desc": "For a key column, this defines the range of values",
- "Aliases": [
- "key"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:type:src)",
+ "Name": "TrainingData",
+ "Type": "DataView",
+ "Desc": "The data to be used for training",
"Aliases": [
- "col"
+ "data"
],
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "ResultType",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "I1",
- "U1",
- "I2",
- "U2",
- "I4",
- "U4",
- "I8",
- "U8",
- "R4",
- "Num",
- "R8",
- "TX",
- "Text",
- "TXT",
- "BL",
- "Bool",
- "TimeSpan",
- "TS",
- "DT",
- "DateTime",
- "DZ",
- "DateTimeZone",
- "UG",
- "U16"
- ]
- },
- "Desc": "The result type",
+ "Name": "L1Threshold",
+ "Type": "Float",
+ "Desc": "L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set.",
"Aliases": [
- "type"
+ "l1"
],
"Required": false,
"SortOrder": 2.0,
"IsNullable": true,
- "Default": null
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ "",
+ 0.0,
+ 0.25,
+ 0.5,
+ 0.75,
+ 1.0
+ ]
+ }
},
{
- "Name": "Range",
+ "Name": "FeatureColumn",
"Type": "String",
- "Desc": "For a key column, this defines the range of values",
+ "Desc": "Column to use for features",
"Aliases": [
- "key"
+ "feat"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 2.0,
"IsNullable": false,
- "Default": null
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Default": "Features"
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.CombinerByContiguousGroupId",
- "Desc": "Groups values of a scalar column into a vector, by a contiguous group ID",
- "FriendlyName": "Group Transform",
- "ShortName": "Group",
- "Inputs": [
+ "Name": "LabelColumn",
+ "Type": "String",
+ "Desc": "Column to use for labels",
+ "Aliases": [
+ "lab"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": "Label"
+ },
{
- "Name": "GroupKey",
+ "Name": "NormalizeFeatures",
"Type": {
- "Kind": "Array",
- "ItemType": "String"
+ "Kind": "Enum",
+ "Values": [
+ "No",
+ "Warn",
+ "Auto",
+ "Yes"
+ ]
},
- "Desc": "Columns to group by",
+ "Desc": "Normalize option for the feature column",
"Aliases": [
- "g"
+ "norm"
],
"Required": false,
- "SortOrder": 1.0,
+ "SortOrder": 5.0,
"IsNullable": false,
- "Default": null
+ "Default": "Auto"
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "Caching",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Auto",
+ "Memory",
+ "Disk",
+ "None"
+ ]
+ },
+ "Desc": "Whether learner should cache input training data",
+ "Aliases": [
+ "cache"
+ ],
+ "Required": false,
+ "SortOrder": 6.0,
+ "IsNullable": false,
+ "Default": "Auto"
},
{
- "Name": "Column",
+ "Name": "LossFunction",
"Type": {
- "Kind": "Array",
- "ItemType": "String"
+ "Kind": "Component",
+ "ComponentKind": "SDCARegressionLossFunction"
},
- "Desc": "Columns to group together",
+ "Desc": "Loss Function",
"Aliases": [
- "col"
+ "loss"
],
- "Required": true,
- "SortOrder": 2.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "SquaredLoss"
+ }
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.ConditionalNormalizer",
- "Desc": "Normalize the columns only if needed",
- "FriendlyName": "Normalize If Needed",
- "ShortName": null,
- "Inputs": [
+ "Name": "NumThreads",
+ "Type": "Int",
+ "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
+ "Aliases": [
+ "nt",
+ "t",
+ "threads"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": true,
+ "Default": null
+ },
{
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "FixZero",
- "Type": "Bool",
- "Desc": "Whether to map zero to zero, preserving sparsity",
- "Aliases": [
- "zero"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "MaxTrainingExamples",
- "Type": "Int",
- "Desc": "Max number of examples used to train the normalizer",
- "Aliases": [
- "maxtrain"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:src)",
+ "Name": "ConvergenceTolerance",
+ "Type": "Float",
+ "Desc": "The tolerance for the ratio between duality gap and primal loss for convergence checking.",
"Aliases": [
- "col"
+ "tol"
],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.01,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.001,
+ 0.01,
+ 0.1,
+ 0.2
+ ]
+ }
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "MaxIterations",
+ "Type": "Int",
+ "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
+ "Aliases": [
+ "iter"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ "",
+ 10,
+ 20,
+ 100
+ ]
+ }
},
{
- "Name": "FixZero",
+ "Name": "Shuffle",
"Type": "Bool",
- "Desc": "Whether to map zero to zero, preserving sparsity",
+ "Desc": "Shuffle data every epoch?",
"Aliases": [
- "zero"
+ "shuf"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": true
+ "Default": true,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ false,
+ true
+ ]
+ }
},
{
- "Name": "MaxTrainingExamples",
+ "Name": "CheckFrequency",
"Type": "Int",
- "Desc": "Max number of examples used to train the normalizer",
+ "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
"Aliases": [
- "maxtrain"
+ "checkFreq"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "BiasLearningRate",
+ "Type": "Float",
+ "Desc": "The learning rate for adjusting bias from being regularized.",
+ "Aliases": [
+ "blr"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1000000000
+ "Default": 1.0,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.01,
+ 0.1,
+ 1.0
+ ]
+ }
}
],
"Outputs": [
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
- },
- {
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The trained model"
}
],
"InputKind": [
- "ITransformInput"
+ "ITrainerInputWithLabel",
+ "ITrainerInput"
+ ],
+ "OutputKind": [
+ "IRegressionOutput",
+ "ITrainerOutput"
]
},
{
- "Name": "Transforms.DataCache",
- "Desc": "Caches using the specified cache option.",
- "FriendlyName": "Cache Data",
- "ShortName": null,
+ "Name": "Trainers.StochasticGradientDescentBinaryClassifier",
+ "Desc": "Train an Hogwild SGD binary model.",
+ "FriendlyName": "Hogwild SGD (binary)",
+ "ShortName": "HogwildSGD",
"Inputs": [
{
- "Name": "Data",
+ "Name": "TrainingData",
"Type": "DataView",
- "Desc": "Input dataset",
+ "Desc": "The data to be used for training",
+ "Aliases": [
+ "data"
+ ],
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "Caching",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Memory",
- "Disk"
- ]
- },
- "Desc": "Caching strategy",
- "Required": true,
+ "Name": "FeatureColumn",
+ "Type": "String",
+ "Desc": "Column to use for features",
+ "Aliases": [
+ "feat"
+ ],
+ "Required": false,
"SortOrder": 2.0,
"IsNullable": false,
- "Default": "Memory"
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Dataset"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ]
- },
- {
- "Name": "Transforms.DatasetScorer",
- "Desc": "Score a dataset with a predictor model",
- "FriendlyName": null,
- "ShortName": null,
- "Inputs": [
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "The dataset to be scored",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "The predictor model to apply to data",
- "Required": true,
- "SortOrder": 2.0,
- "IsNullable": false
+ "Default": "Features"
},
{
- "Name": "Suffix",
+ "Name": "LabelColumn",
"Type": "String",
- "Desc": "Suffix to append to the score columns",
+ "Desc": "Column to use for labels",
+ "Aliases": [
+ "lab"
+ ],
"Required": false,
"SortOrder": 3.0,
"IsNullable": false,
- "Default": null
- }
- ],
- "Outputs": [
- {
- "Name": "ScoredData",
- "Type": "DataView",
- "Desc": "The scored dataset"
- },
- {
- "Name": "ScoringTransform",
- "Type": "TransformModel",
- "Desc": "The scoring transform"
- }
- ]
- },
- {
- "Name": "Transforms.DatasetTransformScorer",
- "Desc": "Score a dataset with a transform model",
- "FriendlyName": null,
- "ShortName": null,
- "Inputs": [
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "The dataset to be scored",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "TransformModel",
- "Type": "TransformModel",
- "Desc": "The transform model to apply to data",
- "Required": true,
- "SortOrder": 2.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
- {
- "Name": "ScoredData",
- "Type": "DataView",
- "Desc": "The scored dataset"
+ "Default": "Label"
},
{
- "Name": "ScoringTransform",
- "Type": "TransformModel",
- "Desc": "The scoring transform"
- }
- ]
- },
- {
- "Name": "Transforms.Dictionarizer",
- "Desc": "Converts input values (words, numbers, etc.) to index in a dictionary.",
- "FriendlyName": "Term Transform",
- "ShortName": "TermTransform",
- "Inputs": [
- {
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "MaxNumTerms",
- "Type": "Int",
- "Desc": "Maximum number of terms to keep when auto-training",
- "Aliases": [
- "max"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Term",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "List of terms",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Sort",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Occurrence",
- "Value"
- ]
- },
- "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "TextKeyValues",
- "Type": "Bool",
- "Desc": "Whether key value metadata should be text, regardless of the actual input type",
- "Aliases": [
- "textkv"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:src)",
+ "Name": "WeightColumn",
+ "Type": "String",
+ "Desc": "Column to use for example weight",
"Aliases": [
- "col"
+ "weight"
],
"Required": false,
- "SortOrder": 1.0,
+ "SortOrder": 4.0,
"IsNullable": false,
- "Default": null
- },
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Default": "Weight"
},
{
- "Name": "MaxNumTerms",
- "Type": "Int",
- "Desc": "Maximum number of terms to keep per column when auto-training",
+ "Name": "NormalizeFeatures",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "No",
+ "Warn",
+ "Auto",
+ "Yes"
+ ]
+ },
+ "Desc": "Normalize option for the feature column",
"Aliases": [
- "max"
+ "norm"
],
"Required": false,
"SortOrder": 5.0,
"IsNullable": false,
- "Default": 1000000
+ "Default": "Auto"
},
{
- "Name": "Term",
+ "Name": "Caching",
"Type": {
- "Kind": "Array",
- "ItemType": "String"
+ "Kind": "Enum",
+ "Values": [
+ "Auto",
+ "Memory",
+ "Disk",
+ "None"
+ ]
},
- "Desc": "List of terms",
+ "Desc": "Whether learner should cache input training data",
+ "Aliases": [
+ "cache"
+ ],
"Required": false,
- "SortOrder": 106.0,
+ "SortOrder": 6.0,
"IsNullable": false,
- "Default": null
+ "Default": "Auto"
},
{
- "Name": "Sort",
+ "Name": "LossFunction",
"Type": {
- "Kind": "Enum",
+ "Kind": "Component",
+ "ComponentKind": "ClassificationLossFunction"
+ },
+ "Desc": "Loss Function",
+ "Aliases": [
+ "loss"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "LogLoss"
+ }
+ },
+ {
+ "Name": "L2Const",
+ "Type": "Float",
+ "Desc": "L2 regularizer constant",
+ "Aliases": [
+ "l2"
+ ],
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 1E-06,
+ "SweepRange": {
+ "RangeType": "Discrete",
"Values": [
- "Occurrence",
- "Value"
+ 1E-07,
+ 5E-07,
+ 1E-06,
+ 5E-06,
+ 1E-05
]
- },
- "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
+ }
+ },
+ {
+ "Name": "NumThreads",
+ "Type": "Int",
+ "Desc": "Degree of lock-free parallelism. Defaults to automatic depending on data sparseness. Determinism not guaranteed.",
+ "Aliases": [
+ "nt",
+ "t",
+ "threads"
+ ],
"Required": false,
- "SortOrder": 113.0,
+ "SortOrder": 50.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "ConvergenceTolerance",
+ "Type": "Float",
+ "Desc": "Exponential moving averaged improvement tolerance for convergence",
+ "Aliases": [
+ "tol"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": "Occurrence"
+ "Default": 0.0001,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.01,
+ 0.001,
+ 0.0001,
+ 1E-05
+ ]
+ }
},
{
- "Name": "TextKeyValues",
+ "Name": "MaxIterations",
+ "Type": "Int",
+ "Desc": "Maximum number of iterations; set to 1 to simulate online learning.",
+ "Aliases": [
+ "iter"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 20,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 5,
+ 10,
+ 20
+ ]
+ }
+ },
+ {
+ "Name": "InitLearningRate",
+ "Type": "Float",
+ "Desc": "Initial learning rate (only used by SGD)",
+ "Aliases": [
+ "ilr",
+ "lr"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.01
+ },
+ {
+ "Name": "Shuffle",
"Type": "Bool",
- "Desc": "Whether key value metadata should be text, regardless of the actual input type",
+ "Desc": "Shuffle data every epoch?",
"Aliases": [
- "textkv"
+ "shuf"
],
"Required": false,
- "SortOrder": 114.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": true,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ false,
+ true
+ ]
+ }
+ },
+ {
+ "Name": "PositiveInstanceWeight",
+ "Type": "Float",
+ "Desc": "Apply weight to the positive class, for imbalanced data",
+ "Aliases": [
+ "piw"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0
+ },
+ {
+ "Name": "CheckFrequency",
+ "Type": "Int",
+ "Desc": "Convergence check frequency (in terms of number of iterations). Default equals number of threads",
+ "Aliases": [
+ "checkFreq"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Calibrator",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "CalibratorTrainer"
+ },
+ "Desc": "The calibrator kind to apply to the predictor. Specify null for no calibration",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "PlattCalibrator"
+ }
+ },
+ {
+ "Name": "MaxCalibrationExamples",
+ "Type": "Int",
+ "Desc": "The maximum number of examples to use when training the calibrator",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1000000
}
],
"Outputs": [
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
- },
- {
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The trained model"
}
],
"InputKind": [
- "ITransformInput"
+ "ITrainerInputWithWeight",
+ "ITrainerInputWithLabel",
+ "ITrainerInput"
],
"OutputKind": [
- "ITransformOutput"
+ "IBinaryClassificationOutput",
+ "ITrainerOutput"
]
},
{
- "Name": "Transforms.FeatureCombiner",
- "Desc": "Combines all the features into one feature column.",
- "FriendlyName": "Feature Combiner",
- "ShortName": "fc",
+ "Name": "Transforms.ApproximateBootstrapSampler",
+ "Desc": "Approximate bootstrap sampling.",
+ "FriendlyName": "Bootstrap Sample Transform",
+ "ShortName": "BootstrapSample",
"Inputs": [
{
"Name": "Data",
@@ -15297,16 +15530,49 @@
"IsNullable": false
},
{
- "Name": "Features",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "Features",
+ "Name": "Complement",
+ "Type": "Bool",
+ "Desc": "Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform.",
+ "Aliases": [
+ "comp"
+ ],
"Required": false,
- "SortOrder": 2.0,
+ "SortOrder": 150.0,
"IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "Seed",
+ "Type": "UInt",
+ "Desc": "The random seed. If unspecified random state will be instead derived from the environment.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
"Default": null
+ },
+ {
+ "Name": "ShuffleInput",
+ "Type": "Bool",
+ "Desc": "Whether we should attempt to shuffle the source data. By default on, but can be turned off for efficiency.",
+ "Aliases": [
+ "si"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "PoolSize",
+ "Type": "Int",
+ "Desc": "When shuffling the output, the number of output rows to keep in that pool. Note that shuffling of output is completely distinct from shuffling of input.",
+ "Aliases": [
+ "pool"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1000
}
],
"Outputs": [
@@ -15329,37 +15595,11 @@
]
},
{
- "Name": "Transforms.FeatureSelectorByCount",
- "Desc": "Selects the slots for which the count of non-default values is greater than or equal to a threshold.",
- "FriendlyName": "Count Feature Selection Transform",
+ "Name": "Transforms.BinaryPredictionScoreColumnsRenamer",
+ "Desc": "For binary prediction, it renames the PredictedLabel and Score columns to include the name of the positive class.",
+ "FriendlyName": "Rename Binary Prediction Score Columns",
"ShortName": null,
"Inputs": [
- {
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "Columns to use for feature selection",
- "Aliases": [
- "col"
- ],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "Count",
- "Type": "Int",
- "Desc": "If the count of non-default values for a slot is greater than or equal to this threshold, the slot is preserved",
- "Aliases": [
- "c"
- ],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false,
- "Default": 1
- },
{
"Name": "Data",
"Type": "DataView",
@@ -15367,91 +15607,14 @@
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
- },
- {
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.FeatureSelectorByMutualInformation",
- "Desc": "Selects the top k slots across all specified columns ordered by their mutual information with the label column.",
- "FriendlyName": "Mutual Information Feature Selection Transform",
- "ShortName": "MIFeatureSelection",
- "Inputs": [
- {
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "Columns to use for feature selection",
- "Aliases": [
- "col"
- ],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "SlotsInOutput",
- "Type": "Int",
- "Desc": "The maximum number of slots to preserve in output",
- "Aliases": [
- "topk",
- "numSlotsToKeep"
- ],
- "Required": false,
- "SortOrder": 1.0,
- "IsNullable": false,
- "Default": 1000
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The predictor model used in scoring",
"Required": true,
- "SortOrder": 1.0,
+ "SortOrder": 2.0,
"IsNullable": false
- },
- {
- "Name": "LabelColumn",
- "Type": "String",
- "Desc": "Column to use for labels",
- "Aliases": [
- "lab"
- ],
- "Required": false,
- "SortOrder": 4.0,
- "IsNullable": false,
- "Default": "Label"
- },
- {
- "Name": "NumBins",
- "Type": "Int",
- "Desc": "Max number of bins for R4/R8 columns, power of 2 recommended",
- "Aliases": [
- "bins"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 256
}
],
"Outputs": [
@@ -15474,10 +15637,10 @@
]
},
{
- "Name": "Transforms.GlobalContrastNormalizer",
- "Desc": "Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation.",
- "FriendlyName": "Global Contrast Normalization Transform",
- "ShortName": "Gcn",
+ "Name": "Transforms.BinNormalizer",
+ "Desc": "The values are assigned into equidensity bins and a value is mapped to its bin_number/number_of_bins.",
+ "FriendlyName": "Binning Normalizer",
+ "ShortName": "Bin",
"Inputs": [
{
"Name": "Column",
@@ -15487,27 +15650,36 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "UseStdDev",
- "Type": "Bool",
- "Desc": "Normalize by standard deviation rather than L2 norm",
+ "Name": "NumBins",
+ "Type": "Int",
+ "Desc": "Max number of bins, power of 2 recommended",
+ "Aliases": [
+ "bins"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
"Default": null
},
{
- "Name": "Scale",
- "Type": "Float",
- "Desc": "Scale features by this value",
+ "Name": "FixZero",
+ "Type": "Bool",
+ "Desc": "Whether to map zero to zero, preserving sparsity",
+ "Aliases": [
+ "zero"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
"Default": null
},
{
- "Name": "SubMean",
- "Type": "Bool",
- "Desc": "Subtract mean from each value before normalizing",
+ "Name": "MaxTrainingExamples",
+ "Type": "Int",
+ "Desc": "Max number of examples used to train the normalizer",
+ "Aliases": [
+ "maxtrain"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
@@ -15549,15 +15721,6 @@
"IsNullable": false,
"Default": null
},
- {
- "Name": "SubMean",
- "Type": "Bool",
- "Desc": "Subtract mean from each value before normalizing",
- "Required": false,
- "SortOrder": 1.0,
- "IsNullable": false,
- "Default": true
- },
{
"Name": "Data",
"Type": "DataView",
@@ -15567,25 +15730,40 @@
"IsNullable": false
},
{
- "Name": "UseStdDev",
+ "Name": "NumBins",
+ "Type": "Int",
+ "Desc": "Max number of bins, power of 2 recommended",
+ "Aliases": [
+ "bins"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1024
+ },
+ {
+ "Name": "FixZero",
"Type": "Bool",
- "Desc": "Normalize by standard deviation rather than L2 norm",
+ "Desc": "Whether to map zero to zero, preserving sparsity",
"Aliases": [
- "useStd"
+ "zero"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": true
},
{
- "Name": "Scale",
- "Type": "Float",
- "Desc": "Scale features by this value",
+ "Name": "MaxTrainingExamples",
+ "Type": "Int",
+ "Desc": "Max number of examples used to train the normalizer",
+ "Aliases": [
+ "maxtrain"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1.0
+ "Default": 1000000000
}
],
"Outputs": [
@@ -15608,10 +15786,10 @@
]
},
{
- "Name": "Transforms.HashConverter",
- "Desc": "Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. This is a part of the Dracula transform.",
- "FriendlyName": "Hash Join Transform",
- "ShortName": "HashJoin",
+ "Name": "Transforms.CategoricalHashOneHotVectorizer",
+ "Desc": "Encodes the categorical variable with hash-based encoding",
+ "FriendlyName": "Categorical Hash Transform",
+ "ShortName": null,
"Inputs": [
{
"Name": "Column",
@@ -15621,27 +15799,29 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "Join",
- "Type": "Bool",
- "Desc": "Whether the values need to be combined for a single hash",
+ "Name": "OutputKind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Bag",
+ "Ind",
+ "Key",
+ "Bin"
+ ]
+ },
+ "Desc": "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)",
+ "Aliases": [
+ "kind"
+ ],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 102.0,
"IsNullable": true,
"Default": null
},
- {
- "Name": "CustomSlotMap",
- "Type": "String",
- "Desc": "Which slots should be combined together. Example: 0,3,5;0,1;3;2,1,0. Overrides 'join'.",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
{
"Name": "HashBits",
"Type": "Int",
- "Desc": "Number of bits to hash into. Must be between 1 and 31, inclusive.",
+ "Desc": "The number of bits to hash into. Must be between 1 and 30, inclusive.",
"Aliases": [
"bits"
],
@@ -15671,6 +15851,18 @@
"IsNullable": true,
"Default": null
},
+ {
+ "Name": "InvertHash",
+ "Type": "Int",
+ "Desc": "Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.",
+ "Aliases": [
+ "ih"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
{
"Name": "Name",
"Type": "String",
@@ -15698,7 +15890,7 @@
]
}
},
- "Desc": "New column definition(s) (optional form: name:src)",
+ "Desc": "New column definition(s) (optional form: name:hashBits:src)",
"Aliases": [
"col"
],
@@ -15717,26 +15909,37 @@
{
"Name": "HashBits",
"Type": "Int",
- "Desc": "Number of bits to hash into. Must be between 1 and 31, inclusive.",
+ "Desc": "Number of bits to hash into. Must be between 1 and 30, inclusive.",
"Aliases": [
"bits"
],
"Required": false,
"SortOrder": 2.0,
"IsNullable": false,
- "Default": 31
- },
- {
- "Name": "Join",
- "Type": "Bool",
- "Desc": "Whether the values need to be combined for a single hash",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true
+ "Default": 16
},
{
- "Name": "Seed",
+ "Name": "OutputKind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Bag",
+ "Ind",
+ "Key",
+ "Bin"
+ ]
+ },
+ "Desc": "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)",
+ "Aliases": [
+ "kind"
+ ],
+ "Required": false,
+ "SortOrder": 102.0,
+ "IsNullable": false,
+ "Default": "Bag"
+ },
+ {
+ "Name": "Seed",
"Type": "UInt",
"Desc": "Hashing seed",
"Required": false,
@@ -15755,6 +15958,18 @@
"SortOrder": 150.0,
"IsNullable": false,
"Default": true
+ },
+ {
+ "Name": "InvertHash",
+ "Type": "Int",
+ "Desc": "Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.",
+ "Aliases": [
+ "ih"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0
}
],
"Outputs": [
@@ -15777,9 +15992,9 @@
]
},
{
- "Name": "Transforms.KeyToTextConverter",
- "Desc": "KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.",
- "FriendlyName": "Key To Value Transform",
+ "Name": "Transforms.CategoricalOneHotVectorizer",
+ "Desc": "Encodes the categorical variable with one-hot encoding based on term dictionary",
+ "FriendlyName": "Categorical Transform",
"ShortName": null,
"Inputs": [
{
@@ -15789,6 +16004,77 @@
"ItemType": {
"Kind": "Struct",
"Fields": [
+ {
+ "Name": "OutputKind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Bag",
+ "Ind",
+ "Key",
+ "Bin"
+ ]
+ },
+ "Desc": "Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector",
+ "Aliases": [
+ "kind"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "MaxNumTerms",
+ "Type": "Int",
+ "Desc": "Maximum number of terms to keep when auto-training",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Term",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "List of terms",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Sort",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Occurrence",
+ "Value"
+ ]
+ },
+ "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "TextKeyValues",
+ "Type": "Bool",
+ "Desc": "Whether key value metadata should be text, regardless of the actual input type",
+ "Aliases": [
+ "textkv"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
{
"Name": "Name",
"Type": "String",
@@ -15831,55 +16117,75 @@
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
- }
- ],
- "Outputs": [
+ },
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "Name": "MaxNumTerms",
+ "Type": "Int",
+ "Desc": "Maximum number of terms to keep per column when auto-training",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": 1000000
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.LabelColumnKeyBooleanConverter",
- "Desc": "Transforms the label to either key or bool (if needed) to make it suitable for classification.",
- "FriendlyName": "Prepare Classification Label",
- "ShortName": null,
- "Inputs": [
+ "Name": "OutputKind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Bag",
+ "Ind",
+ "Key",
+ "Bin"
+ ]
+ },
+ "Desc": "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)",
+ "Aliases": [
+ "kind"
+ ],
+ "Required": false,
+ "SortOrder": 102.0,
+ "IsNullable": false,
+ "Default": "Ind"
+ },
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "Term",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "List of terms",
+ "Required": false,
+ "SortOrder": 106.0,
+ "IsNullable": false,
+ "Default": null
},
{
- "Name": "LabelColumn",
- "Type": "String",
- "Desc": "The label column",
- "Required": true,
- "SortOrder": 2.0,
- "IsNullable": false
+ "Name": "Sort",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Occurrence",
+ "Value"
+ ]
+ },
+ "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
+ "Required": false,
+ "SortOrder": 113.0,
+ "IsNullable": false,
+ "Default": "Occurrence"
},
{
"Name": "TextKeyValues",
"Type": "Bool",
- "Desc": "Convert the key values to text",
+ "Desc": "Whether key value metadata should be text, regardless of the actual input type",
+ "Aliases": [
+ "textkv"
+ ],
"Required": false,
- "SortOrder": 3.0,
+ "SortOrder": 114.0,
"IsNullable": false,
"Default": true
}
@@ -15904,10 +16210,10 @@
]
},
{
- "Name": "Transforms.LabelIndicator",
- "Desc": "Label remapper used by OVA",
- "FriendlyName": "LabelIndicator",
- "ShortName": "LabelIndictator",
+ "Name": "Transforms.CharacterTokenizer",
+ "Desc": "Character-oriented tokenizer where text is considered a sequence of characters.",
+ "FriendlyName": "Character Tokenizer Transform",
+ "ShortName": "CharToken",
"Inputs": [
{
"Name": "Column",
@@ -15916,18 +16222,6 @@
"ItemType": {
"Kind": "Struct",
"Fields": [
- {
- "Name": "ClassIndex",
- "Type": "Int",
- "Desc": "The positive example class for binary classification.",
- "Aliases": [
- "index"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
{
"Name": "Name",
"Type": "String",
@@ -15959,10 +16253,9 @@
"Aliases": [
"col"
],
- "Required": false,
+ "Required": true,
"SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
+ "IsNullable": false
},
{
"Name": "Data",
@@ -15973,16 +16266,16 @@
"IsNullable": false
},
{
- "Name": "ClassIndex",
- "Type": "Int",
- "Desc": "Label of the positive class.",
+ "Name": "UseMarkerChars",
+ "Type": "Bool",
+ "Desc": "Whether to mark the beginning/end of each row/slot with start of text character (0x02)/end of text character (0x03)",
"Aliases": [
- "index"
+ "mark"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 2.0,
"IsNullable": false,
- "Default": 0
+ "Default": true
}
],
"Outputs": [
@@ -16005,25 +16298,62 @@
]
},
{
- "Name": "Transforms.LabelToFloatConverter",
- "Desc": "Transforms the label to float to make it suitable for regression.",
- "FriendlyName": "Prepare Regression Label",
- "ShortName": null,
+ "Name": "Transforms.ColumnConcatenator",
+ "Desc": "Concatenates two columns of the same item type.",
+ "FriendlyName": "Concat Transform",
+ "ShortName": "Concat",
"Inputs": [
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:srcs)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "LabelColumn",
- "Type": "String",
- "Desc": "The label column",
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
"Required": true,
- "SortOrder": 2.0,
+ "SortOrder": 1.0,
"IsNullable": false
}
],
@@ -16047,19 +16377,11 @@
]
},
{
- "Name": "Transforms.LightLda",
- "Desc": "The LDA transform implements LightLDA, a state-of-the-art implementation of Latent Dirichlet Allocation.",
- "FriendlyName": "Latent Dirichlet Allocation Transform",
- "ShortName": "LightLda",
+ "Name": "Transforms.ColumnCopier",
+ "Desc": "Duplicates columns from the dataset",
+ "FriendlyName": "Copy Columns Transform",
+ "ShortName": "Copy",
"Inputs": [
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
{
"Name": "Column",
"Type": {
@@ -16067,126 +16389,6 @@
"ItemType": {
"Kind": "Struct",
"Fields": [
- {
- "Name": "NumTopic",
- "Type": "Int",
- "Desc": "The number of topics in the LDA",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "AlphaSum",
- "Type": "Float",
- "Desc": "Dirichlet prior on document-topic vectors",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Beta",
- "Type": "Float",
- "Desc": "Dirichlet prior on vocab-topic vectors",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Mhstep",
- "Type": "Int",
- "Desc": "Number of Metropolis Hasting step",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "NumIterations",
- "Type": "Int",
- "Desc": "Number of iterations",
- "Aliases": [
- "iter"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "LikelihoodInterval",
- "Type": "Int",
- "Desc": "Compute log likelihood over local dataset on this iteration interval",
- "Aliases": [
- "llInterval"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "NumThreads",
- "Type": "Int",
- "Desc": "The number of training threads",
- "Aliases": [
- "t"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "NumMaxDocToken",
- "Type": "Int",
- "Desc": "The threshold of maximum count of tokens per doc",
- "Aliases": [
- "maxNumToken"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "NumSummaryTermPerTopic",
- "Type": "Int",
- "Desc": "The number of words to summarize the topic",
- "Aliases": [
- "ns"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "NumBurninIterations",
- "Type": "Int",
- "Desc": "The number of burn-in iterations",
- "Aliases": [
- "burninIter"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": 10
- },
- {
- "Name": "ResetRandomGenerator",
- "Type": "Bool",
- "Desc": "Reset the random number generator for each document",
- "Aliases": [
- "reset"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
{
"Name": "Name",
"Type": "String",
@@ -16214,199 +16416,118 @@
]
}
},
- "Desc": "New column definition(s) (optional form: name:srcs)",
+ "Desc": "New column definition(s) (optional form: name:src)",
"Aliases": [
"col"
],
"Required": true,
- "SortOrder": 49.0,
+ "SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "NumTopic",
- "Type": "Int",
- "Desc": "The number of topics in the LDA",
- "Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": 100,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 20,
- 40,
- 100,
- 200
- ]
- }
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
},
{
- "Name": "NumMaxDocToken",
- "Type": "Int",
- "Desc": "The threshold of maximum count of tokens per doc",
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.ColumnDropper",
+ "Desc": "Drops columns from the dataset",
+ "FriendlyName": "Drop Columns Transform",
+ "ShortName": "Drop",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Column name to drop",
"Aliases": [
- "maxNumToken"
+ "col"
],
- "Required": false,
- "SortOrder": 50.0,
- "IsNullable": false,
- "Default": 512
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
},
{
- "Name": "NumThreads",
- "Type": "Int",
- "Desc": "The number of training threads. Default value depends on number of logical processors.",
- "Aliases": [
- "t"
- ],
- "Required": false,
- "SortOrder": 50.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "AlphaSum",
- "Type": "Float",
- "Desc": "Dirichlet prior on document-topic vectors",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 100.0,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 1,
- 10,
- 100,
- 200
- ]
- }
- },
- {
- "Name": "Beta",
- "Type": "Float",
- "Desc": "Dirichlet prior on vocab-topic vectors",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 0.01,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 0.01,
- 0.015,
- 0.07,
- 0.02
- ]
- }
- },
- {
- "Name": "Mhstep",
- "Type": "Int",
- "Desc": "Number of Metropolis Hasting step",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 4,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 2,
- 4,
- 8,
- 16
- ]
- }
- },
- {
- "Name": "NumIterations",
- "Type": "Int",
- "Desc": "Number of iterations",
- "Aliases": [
- "iter"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 200,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 100,
- 200,
- 300,
- 400
- ]
- }
- },
- {
- "Name": "LikelihoodInterval",
- "Type": "Int",
- "Desc": "Compute log likelihood over local dataset on this iteration interval",
- "Aliases": [
- "llInterval"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 5
- },
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
{
- "Name": "NumSummaryTermPerTopic",
- "Type": "Int",
- "Desc": "The number of words to summarize the topic",
- "Aliases": [
- "ns"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 10
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
},
{
- "Name": "NumBurninIterations",
- "Type": "Int",
- "Desc": "The number of burn-in iterations",
- "Aliases": [
- "burninIter"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 10,
- "SweepRange": {
- "RangeType": "Discrete",
- "Values": [
- 10,
- 20,
- 30,
- 40
- ]
- }
- },
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.ColumnSelector",
+ "Desc": "Selects a set of columns, dropping all others",
+ "FriendlyName": "Select Columns",
+ "ShortName": null,
+ "Inputs": [
{
- "Name": "ResetRandomGenerator",
- "Type": "Bool",
- "Desc": "Reset the random number generator for each document",
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Column name to keep",
"Aliases": [
- "reset"
+ "col"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 1.0,
"IsNullable": false,
- "Default": false
+ "Default": null
},
{
- "Name": "OutputTopicWordSummary",
- "Type": "Bool",
- "Desc": "Whether to output the topic-word summary in text format",
- "Aliases": [
- "summary"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": false
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
}
],
"Outputs": [
@@ -16429,10 +16550,10 @@
]
},
{
- "Name": "Transforms.LogMeanVarianceNormalizer",
- "Desc": "Normalizes the data based on the computed mean and variance of the logarithm of the data.",
- "FriendlyName": "LogMeanVar Normalizer",
- "ShortName": "LogMeanVar",
+ "Name": "Transforms.ColumnTypeConverter",
+ "Desc": "Converts a column to a different type, using standard conversions.",
+ "FriendlyName": "Convert Transform",
+ "ShortName": "Convert",
"Inputs": [
{
"Name": "Column",
@@ -16442,17 +16563,57 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "MaxTrainingExamples",
- "Type": "Int",
- "Desc": "Max number of examples used to train the normalizer",
+ "Name": "ResultType",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "I1",
+ "U1",
+ "I2",
+ "U2",
+ "I4",
+ "U4",
+ "I8",
+ "U8",
+ "R4",
+ "Num",
+ "R8",
+ "TX",
+ "Text",
+ "TXT",
+ "BL",
+ "Bool",
+ "TimeSpan",
+ "TS",
+ "DT",
+ "DateTime",
+ "DZ",
+ "DateTimeZone",
+ "UG",
+ "U16"
+ ]
+ },
+ "Desc": "The result type",
"Aliases": [
- "maxtrain"
+ "type"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
"Default": null
},
+ {
+ "Name": "Range",
+ "Type": "String",
+ "Desc": "For a key column, this defines the range of values",
+ "Aliases": [
+ "key"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
{
"Name": "Name",
"Type": "String",
@@ -16480,14 +16641,13 @@
]
}
},
- "Desc": "New column definition(s) (optional form: name:src)",
+ "Desc": "New column definition(s) (optional form: name:type:src)",
"Aliases": [
"col"
],
- "Required": false,
+ "Required": true,
"SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
+ "IsNullable": false
},
{
"Name": "Data",
@@ -16498,28 +16658,56 @@
"IsNullable": false
},
{
- "Name": "UseCdf",
- "Type": "Bool",
- "Desc": "Whether to use CDF as the output",
- "Aliases": [
- "cdf"
+ "Name": "ResultType",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "I1",
+ "U1",
+ "I2",
+ "U2",
+ "I4",
+ "U4",
+ "I8",
+ "U8",
+ "R4",
+ "Num",
+ "R8",
+ "TX",
+ "Text",
+ "TXT",
+ "BL",
+ "Bool",
+ "TimeSpan",
+ "TS",
+ "DT",
+ "DateTime",
+ "DZ",
+ "DateTimeZone",
+ "UG",
+ "U16"
+ ]
+ },
+ "Desc": "The result type",
+ "Aliases": [
+ "type"
],
"Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null
},
{
- "Name": "MaxTrainingExamples",
- "Type": "Int",
- "Desc": "Max number of examples used to train the normalizer",
+ "Name": "Range",
+ "Type": "String",
+ "Desc": "For a key column, this defines the range of values",
"Aliases": [
- "maxtrain"
+ "key"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1000000000
+ "Default": null
}
],
"Outputs": [
@@ -16542,101 +16730,25 @@
]
},
{
- "Name": "Transforms.LpNormalizer",
- "Desc": "Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm.",
- "FriendlyName": "Lp-Norm Normalizer",
- "ShortName": "lpnorm",
+ "Name": "Transforms.CombinerByContiguousGroupId",
+ "Desc": "Groups values of a scalar column into a vector, by a contiguous group ID",
+ "FriendlyName": "Group Transform",
+ "ShortName": "Group",
"Inputs": [
{
- "Name": "Column",
+ "Name": "GroupKey",
"Type": {
"Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "NormKind",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "L2Norm",
- "StdDev",
- "L1Norm",
- "LInf"
- ]
- },
- "Desc": "The norm to use to normalize each sample",
- "Aliases": [
- "norm"
- ],
- "Required": false,
- "SortOrder": 1.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "SubMean",
- "Type": "Bool",
- "Desc": "Subtract mean from each value before normalizing",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:src)",
- "Aliases": [
- "col"
- ],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "NormKind",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "L2Norm",
- "StdDev",
- "L1Norm",
- "LInf"
- ]
+ "ItemType": "String"
},
- "Desc": "The norm to use to normalize each sample",
+ "Desc": "Columns to group by",
"Aliases": [
- "norm"
+ "g"
],
"Required": false,
"SortOrder": 1.0,
"IsNullable": false,
- "Default": "L2Norm"
+ "Default": null
},
{
"Name": "Data",
@@ -16647,13 +16759,18 @@
"IsNullable": false
},
{
- "Name": "SubMean",
- "Type": "Bool",
- "Desc": "Subtract mean from each value before normalizing",
- "Required": false,
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Columns to group together",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
"SortOrder": 2.0,
- "IsNullable": false,
- "Default": false
+ "IsNullable": false
}
],
"Outputs": [
@@ -16676,44 +16793,10 @@
]
},
{
- "Name": "Transforms.ManyHeterogeneousModelCombiner",
- "Desc": "Combines a sequence of TransformModels and a PredictorModel into a single PredictorModel.",
- "FriendlyName": null,
+ "Name": "Transforms.ConditionalNormalizer",
+ "Desc": "Normalize the columns only if needed",
+ "FriendlyName": "Normalize If Needed",
"ShortName": null,
- "Inputs": [
- {
- "Name": "TransformModels",
- "Type": {
- "Kind": "Array",
- "ItemType": "TransformModel"
- },
- "Desc": "Transform model",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "Predictor model",
- "Required": true,
- "SortOrder": 2.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
- {
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "Predictor model"
- }
- ]
- },
- {
- "Name": "Transforms.MeanVarianceNormalizer",
- "Desc": "Normalizes the data based on the computed mean and variance of the data.",
- "FriendlyName": "MeanVar Normalizer",
- "ShortName": "MeanVar",
"Inputs": [
{
"Name": "Column",
@@ -16789,18 +16872,6 @@
"SortOrder": 1.0,
"IsNullable": false
},
- {
- "Name": "UseCdf",
- "Type": "Bool",
- "Desc": "Whether to use CDF as the output",
- "Aliases": [
- "cdf"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": false
- },
{
"Name": "FixZero",
"Type": "Bool",
@@ -16840,140 +16911,135 @@
],
"InputKind": [
"ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
]
},
{
- "Name": "Transforms.MinMaxNormalizer",
- "Desc": "Normalizes the data based on the observed minimum and maximum values of the data.",
- "FriendlyName": "Min-Max Normalizer",
- "ShortName": "MinMax",
+ "Name": "Transforms.DataCache",
+ "Desc": "Caches using the specified cache option.",
+ "FriendlyName": "Cache Data",
+ "ShortName": null,
"Inputs": [
{
- "Name": "Column",
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Caching",
"Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "FixZero",
- "Type": "Bool",
- "Desc": "Whether to map zero to zero, preserving sparsity",
- "Aliases": [
- "zero"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "MaxTrainingExamples",
- "Type": "Int",
- "Desc": "Max number of examples used to train the normalizer",
- "Aliases": [
- "maxtrain"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
+ "Kind": "Enum",
+ "Values": [
+ "Memory",
+ "Disk"
+ ]
},
- "Desc": "New column definition(s) (optional form: name:src)",
- "Aliases": [
- "col"
- ],
+ "Desc": "Caching strategy",
"Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": "Memory"
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Dataset"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ]
+ },
+ {
+ "Name": "Transforms.DatasetScorer",
+ "Desc": "Score a dataset with a predictor model",
+ "FriendlyName": null,
+ "ShortName": null,
+ "Inputs": [
{
"Name": "Data",
"Type": "DataView",
- "Desc": "Input dataset",
+ "Desc": "The dataset to be scored",
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "FixZero",
- "Type": "Bool",
- "Desc": "Whether to map zero to zero, preserving sparsity",
- "Aliases": [
- "zero"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The predictor model to apply to data",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false
},
{
- "Name": "MaxTrainingExamples",
- "Type": "Int",
- "Desc": "Max number of examples used to train the normalizer",
- "Aliases": [
- "maxtrain"
- ],
+ "Name": "Suffix",
+ "Type": "String",
+ "Desc": "Suffix to append to the score columns",
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 3.0,
"IsNullable": false,
- "Default": 1000000000
+ "Default": null
}
],
"Outputs": [
{
- "Name": "OutputData",
+ "Name": "ScoredData",
"Type": "DataView",
- "Desc": "Transformed dataset"
+ "Desc": "The scored dataset"
},
{
- "Name": "Model",
+ "Name": "ScoringTransform",
"Type": "TransformModel",
- "Desc": "Transform model"
+ "Desc": "The scoring transform"
+ }
+ ]
+ },
+ {
+ "Name": "Transforms.DatasetTransformScorer",
+ "Desc": "Score a dataset with a transform model",
+ "FriendlyName": null,
+ "ShortName": null,
+ "Inputs": [
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "The dataset to be scored",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "TransformModel",
+ "Type": "TransformModel",
+ "Desc": "The transform model to apply to data",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false
}
],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
+ "Outputs": [
+ {
+ "Name": "ScoredData",
+ "Type": "DataView",
+ "Desc": "The scored dataset"
+ },
+ {
+ "Name": "ScoringTransform",
+ "Type": "TransformModel",
+ "Desc": "The scoring transform"
+ }
]
},
{
- "Name": "Transforms.MissingValueHandler",
- "Desc": "Handle missing values by replacing them with either the default value or the mean/min/max value (for non-text columns only). An indicator column can optionally be concatenated, if theinput column type is numeric.",
- "FriendlyName": "NA Handle Transform",
- "ShortName": "NAHandle",
+ "Name": "Transforms.Dictionarizer",
+ "Desc": "Converts input values (words, numbers, etc.) to index in a dictionary.",
+ "FriendlyName": "Term Transform",
+ "ShortName": "TermTransform",
"Inputs": [
{
"Name": "Column",
@@ -16983,40 +17049,50 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "Kind",
+ "Name": "MaxNumTerms",
+ "Type": "Int",
+ "Desc": "Maximum number of terms to keep when auto-training",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Term",
"Type": {
- "Kind": "Enum",
- "Values": [
- "DefaultValue",
- "Mean",
- "Minimum",
- "Maximum"
- ]
+ "Kind": "Array",
+ "ItemType": "String"
},
- "Desc": "The replacement method to utilize",
+ "Desc": "List of terms",
"Required": false,
"SortOrder": 150.0,
- "IsNullable": true,
+ "IsNullable": false,
"Default": null
},
{
- "Name": "ImputeBySlot",
- "Type": "Bool",
- "Desc": "Whether to impute values by slot",
- "Aliases": [
- "slot"
- ],
+ "Name": "Sort",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Occurrence",
+ "Value"
+ ]
+ },
+ "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
"Default": null
},
{
- "Name": "ConcatIndicator",
+ "Name": "TextKeyValues",
"Type": "Bool",
- "Desc": "Whether or not to concatenate an indicator vector column to the value column",
+ "Desc": "Whether key value metadata should be text, regardless of the actual input type",
"Aliases": [
- "ind"
+ "textkv"
],
"Required": false,
"SortOrder": 150.0,
@@ -17050,13 +17126,14 @@
]
}
},
- "Desc": "New column definition(s) (optional form: name:rep:src)",
+ "Desc": "New column definition(s) (optional form: name:src)",
"Aliases": [
"col"
],
- "Required": true,
+ "Required": false,
"SortOrder": 1.0,
- "IsNullable": false
+ "IsNullable": false,
+ "Default": null
},
{
"Name": "Data",
@@ -17067,48 +17144,55 @@
"IsNullable": false
},
{
- "Name": "ReplaceWith",
+ "Name": "MaxNumTerms",
+ "Type": "Int",
+ "Desc": "Maximum number of terms to keep per column when auto-training",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": 1000000
+ },
+ {
+ "Name": "Term",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "List of terms",
+ "Required": false,
+ "SortOrder": 106.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Sort",
"Type": {
"Kind": "Enum",
"Values": [
- "DefaultValue",
- "Mean",
- "Minimum",
- "Maximum"
+ "Occurrence",
+ "Value"
]
},
- "Desc": "The replacement method to utilize",
- "Aliases": [
- "kind"
- ],
+ "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
"Required": false,
- "SortOrder": 2.0,
+ "SortOrder": 113.0,
"IsNullable": false,
- "Default": "Def"
+ "Default": "Occurrence"
},
{
- "Name": "ImputeBySlot",
+ "Name": "TextKeyValues",
"Type": "Bool",
- "Desc": "Whether to impute values by slot",
+ "Desc": "Whether key value metadata should be text, regardless of the actual input type",
"Aliases": [
- "slot"
+ "textkv"
],
"Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true
- },
- {
- "Name": "Concat",
- "Type": "Bool",
- "Desc": "Whether or not to concatenate an indicator vector column to the value column",
- "Aliases": [
- "ind"
- ],
- "Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 114.0,
"IsNullable": false,
- "Default": true
+ "Default": false
}
],
"Outputs": [
@@ -17131,53 +17215,11 @@
]
},
{
- "Name": "Transforms.MissingValueIndicator",
- "Desc": "Create a boolean output column with the same number of slots as the input column, where the output value is true if the value in the input column is missing.",
- "FriendlyName": "NA Indicator Transform",
- "ShortName": "NAInd",
+ "Name": "Transforms.FeatureCombiner",
+ "Desc": "Combines all the features into one feature column.",
+ "FriendlyName": "Feature Combiner",
+ "ShortName": "fc",
"Inputs": [
- {
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:src)",
- "Aliases": [
- "col"
- ],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
{
"Name": "Data",
"Type": "DataView",
@@ -17185,6 +17227,18 @@
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
+ },
+ {
+ "Name": "Features",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Features",
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": null
}
],
"Outputs": [
@@ -17207,46 +17261,18 @@
]
},
{
- "Name": "Transforms.MissingValuesDropper",
- "Desc": "Removes NAs from vector columns.",
- "FriendlyName": "NA Drop Transform",
- "ShortName": "NADrop",
+ "Name": "Transforms.FeatureSelectorByCount",
+ "Desc": "Selects the slots for which the count of non-default values is greater than or equal to a threshold.",
+ "FriendlyName": "Count Feature Selection Transform",
+ "ShortName": null,
"Inputs": [
{
"Name": "Column",
"Type": {
"Kind": "Array",
- "ItemType": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
- }
+ "ItemType": "String"
},
- "Desc": "Columns to drop the NAs for",
+ "Desc": "Columns to use for feature selection",
"Aliases": [
"col"
],
@@ -17254,6 +17280,18 @@
"SortOrder": 1.0,
"IsNullable": false
},
+ {
+ "Name": "Count",
+ "Type": "Int",
+ "Desc": "If the count of non-default values for a slot is greater than or equal to this threshold, the slot is preserved",
+ "Aliases": [
+ "c"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 1
+ },
{
"Name": "Data",
"Type": "DataView",
@@ -17283,10 +17321,10 @@
]
},
{
- "Name": "Transforms.MissingValuesRowDropper",
- "Desc": "Filters out rows that contain missing values.",
- "FriendlyName": "NA Filter",
- "ShortName": "NAFilter",
+ "Name": "Transforms.FeatureSelectorByMutualInformation",
+ "Desc": "Selects the top k slots across all specified columns ordered by their mutual information with the label column.",
+ "FriendlyName": "Mutual Information Feature Selection Transform",
+ "ShortName": "MIFeatureSelection",
"Inputs": [
{
"Name": "Column",
@@ -17294,7 +17332,7 @@
"Kind": "Array",
"ItemType": "String"
},
- "Desc": "Column",
+ "Desc": "Columns to use for feature selection",
"Aliases": [
"col"
],
@@ -17302,6 +17340,19 @@
"SortOrder": 1.0,
"IsNullable": false
},
+ {
+ "Name": "SlotsInOutput",
+ "Type": "Int",
+ "Desc": "The maximum number of slots to preserve in output",
+ "Aliases": [
+ "topk",
+ "numSlotsToKeep"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 1000
+ },
{
"Name": "Data",
"Type": "DataView",
@@ -17311,13 +17362,28 @@
"IsNullable": false
},
{
- "Name": "Complement",
- "Type": "Bool",
- "Desc": "If true, keep only rows that contain NA values, and filter the rest.",
+ "Name": "LabelColumn",
+ "Type": "String",
+ "Desc": "Column to use for labels",
+ "Aliases": [
+ "lab"
+ ],
+ "Required": false,
+ "SortOrder": 4.0,
+ "IsNullable": false,
+ "Default": "Label"
+ },
+ {
+ "Name": "NumBins",
+ "Type": "Int",
+ "Desc": "Max number of bins for R4/R8 columns, power of 2 recommended",
+ "Aliases": [
+ "bins"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": 256
}
],
"Outputs": [
@@ -17340,10 +17406,10 @@
]
},
{
- "Name": "Transforms.MissingValueSubstitutor",
- "Desc": "Create an output column of the same type and size of the input column, where missing values are replaced with either the default value or the mean/min/max value (for non-text columns only).",
- "FriendlyName": "NA Replace Transform",
- "ShortName": "NARep",
+ "Name": "Transforms.GlobalContrastNormalizer",
+ "Desc": "Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation.",
+ "FriendlyName": "Global Contrast Normalization Transform",
+ "ShortName": "Gcn",
"Inputs": [
{
"Name": "Column",
@@ -17353,39 +17419,27 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "ReplacementString",
- "Type": "String",
- "Desc": "Replacement value for NAs (uses default value if not given)",
- "Aliases": [
- "rep"
- ],
+ "Name": "UseStdDev",
+ "Type": "Bool",
+ "Desc": "Normalize by standard deviation rather than L2 norm",
"Required": false,
"SortOrder": 150.0,
- "IsNullable": false,
+ "IsNullable": true,
"Default": null
},
{
- "Name": "Kind",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "DefaultValue",
- "Mean",
- "Minimum",
- "Maximum",
- "SpecifiedValue"
- ]
- },
- "Desc": "The replacement method to utilize",
+ "Name": "Scale",
+ "Type": "Float",
+ "Desc": "Scale features by this value",
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
"Default": null
},
{
- "Name": "Slot",
+ "Name": "SubMean",
"Type": "Bool",
- "Desc": "Whether to impute values by slot",
+ "Desc": "Subtract mean from each value before normalizing",
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
@@ -17418,13 +17472,23 @@
]
}
},
- "Desc": "New column definition(s) (optional form: name:rep:src)",
+ "Desc": "New column definition(s) (optional form: name:src)",
"Aliases": [
"col"
],
- "Required": true,
+ "Required": false,
"SortOrder": 1.0,
- "IsNullable": false
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "SubMean",
+ "Type": "Bool",
+ "Desc": "Subtract mean from each value before normalizing",
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": true
},
{
"Name": "Data",
@@ -17435,37 +17499,25 @@
"IsNullable": false
},
{
- "Name": "ReplacementKind",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "DefaultValue",
- "Mean",
- "Minimum",
- "Maximum",
- "SpecifiedValue"
- ]
- },
- "Desc": "The replacement method to utilize",
+ "Name": "UseStdDev",
+ "Type": "Bool",
+ "Desc": "Normalize by standard deviation rather than L2 norm",
"Aliases": [
- "kind"
+ "useStd"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": "Default"
+ "Default": false
},
{
- "Name": "ImputeBySlot",
- "Type": "Bool",
- "Desc": "Whether to impute values by slot",
- "Aliases": [
- "slot"
- ],
- "Required": false,
+ "Name": "Scale",
+ "Type": "Float",
+ "Desc": "Scale features by this value",
+ "Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": true
+ "Default": 1.0
}
],
"Outputs": [
@@ -17488,37 +17540,10 @@
]
},
{
- "Name": "Transforms.ModelCombiner",
- "Desc": "Combines a sequence of TransformModels into a single model",
- "FriendlyName": null,
- "ShortName": null,
- "Inputs": [
- {
- "Name": "Models",
- "Type": {
- "Kind": "Array",
- "ItemType": "TransformModel"
- },
- "Desc": "Input models",
- "Required": false,
- "SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
- }
- ],
- "Outputs": [
- {
- "Name": "OutputModel",
- "Type": "TransformModel",
- "Desc": "Combined model"
- }
- ]
- },
- {
- "Name": "Transforms.NGramTranslator",
- "Desc": "Produces a bag of counts of ngrams (sequences of consecutive values of length 1-n) in a given vector of keys. It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.",
- "FriendlyName": "NGram Transform",
- "ShortName": "NgramTransform",
+ "Name": "Transforms.HashConverter",
+ "Desc": "Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. This is a part of the Dracula transform.",
+ "FriendlyName": "Hash Join Transform",
+ "ShortName": "HashJoin",
"Inputs": [
{
"Name": "Column",
@@ -17528,35 +17553,29 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "NgramLength",
- "Type": "Int",
- "Desc": "Maximum ngram length",
- "Aliases": [
- "ngram"
- ],
+ "Name": "Join",
+ "Type": "Bool",
+ "Desc": "Whether the values need to be combined for a single hash",
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
"Default": null
},
{
- "Name": "AllLengths",
- "Type": "Bool",
- "Desc": "Whether to include all ngram lengths up to NgramLength or only NgramLength",
- "Aliases": [
- "all"
- ],
+ "Name": "CustomSlotMap",
+ "Type": "String",
+ "Desc": "Which slots should be combined together. Example: 0,3,5;0,1;3;2,1,0. Overrides 'join'.",
"Required": false,
"SortOrder": 150.0,
- "IsNullable": true,
+ "IsNullable": false,
"Default": null
},
{
- "Name": "SkipLength",
+ "Name": "HashBits",
"Type": "Int",
- "Desc": "Maximum number of tokens to skip when constructing an ngram",
+ "Desc": "Number of bits to hash into. Must be between 1 and 31, inclusive.",
"Aliases": [
- "skips"
+ "bits"
],
"Required": false,
"SortOrder": 150.0,
@@ -17564,31 +17583,21 @@
"Default": null
},
{
- "Name": "MaxNumTerms",
- "Type": {
- "Kind": "Array",
- "ItemType": "Int"
- },
- "Desc": "Maximum number of ngrams to store in the dictionary",
- "Aliases": [
- "max"
- ],
+ "Name": "Seed",
+ "Type": "UInt",
+ "Desc": "Hashing seed",
"Required": false,
"SortOrder": 150.0,
- "IsNullable": false,
+ "IsNullable": true,
"Default": null
},
{
- "Name": "Weighting",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Tf",
- "Idf",
- "TfIdf"
- ]
- },
- "Desc": "Statistical measure used to evaluate how important a word is to a document in a corpus",
+ "Name": "Ordered",
+ "Type": "Bool",
+ "Desc": "Whether the position of each term should be included in the hash",
+ "Aliases": [
+ "ord"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
@@ -17625,10 +17634,9 @@
"Aliases": [
"col"
],
- "Required": false,
+ "Required": true,
"SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
+ "IsNullable": false
},
{
"Name": "Data",
@@ -17639,73 +17647,46 @@
"IsNullable": false
},
{
- "Name": "NgramLength",
+ "Name": "HashBits",
"Type": "Int",
- "Desc": "Maximum ngram length",
+ "Desc": "Number of bits to hash into. Must be between 1 and 31, inclusive.",
"Aliases": [
- "ngram"
+ "bits"
],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 2.0,
"IsNullable": false,
- "Default": 2
+ "Default": 31
},
{
- "Name": "AllLengths",
+ "Name": "Join",
"Type": "Bool",
- "Desc": "Whether to store all ngram lengths up to ngramLength, or only ngramLength",
- "Aliases": [
- "all"
- ],
+ "Desc": "Whether the values need to be combined for a single hash",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
"Default": true
},
{
- "Name": "SkipLength",
- "Type": "Int",
- "Desc": "Maximum number of tokens to skip when constructing an ngram",
- "Aliases": [
- "skips"
- ],
+ "Name": "Seed",
+ "Type": "UInt",
+ "Desc": "Hashing seed",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 0
+ "Default": 314489979
},
{
- "Name": "MaxNumTerms",
- "Type": {
- "Kind": "Array",
- "ItemType": "Int"
- },
- "Desc": "Maximum number of ngrams to store in the dictionary",
+ "Name": "Ordered",
+ "Type": "Bool",
+ "Desc": "Whether the position of each term should be included in the hash",
"Aliases": [
- "max"
+ "ord"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": [
- 10000000
- ]
- },
- {
- "Name": "Weighting",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Tf",
- "Idf",
- "TfIdf"
- ]
- },
- "Desc": "The weighting criteria",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": "Tf"
+ "Default": true
}
],
"Outputs": [
@@ -17728,11 +17709,53 @@
]
},
{
- "Name": "Transforms.NoOperation",
- "Desc": "Does nothing.",
- "FriendlyName": "No Op",
- "ShortName": "Nop",
+ "Name": "Transforms.KeyToTextConverter",
+ "Desc": "KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.",
+ "FriendlyName": "Key To Value Transform",
+ "ShortName": null,
"Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:src)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
{
"Name": "Data",
"Type": "DataView",
@@ -17762,32 +17785,35 @@
]
},
{
- "Name": "Transforms.OptionalColumnCreator",
- "Desc": "If the source column does not exist after deserialization, create a column with the right type and default values.",
- "FriendlyName": "Optional Column Transform",
- "ShortName": "optional",
+ "Name": "Transforms.LabelColumnKeyBooleanConverter",
+ "Desc": "Transforms the label to either key or bool (if needed) to make it suitable for classification.",
+ "FriendlyName": "Prepare Classification Label",
+ "ShortName": null,
"Inputs": [
{
- "Name": "Column",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "New column definition(s)",
- "Aliases": [
- "col"
- ],
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
+ "Name": "LabelColumn",
+ "Type": "String",
+ "Desc": "The label column",
"Required": true,
- "SortOrder": 1.0,
+ "SortOrder": 2.0,
"IsNullable": false
+ },
+ {
+ "Name": "TextKeyValues",
+ "Type": "Bool",
+ "Desc": "Convert the key values to text",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": true
}
],
"Outputs": [
@@ -17810,10 +17836,10 @@
]
},
{
- "Name": "Transforms.PcaCalculator",
- "Desc": "Train an PCA Anomaly model.",
- "FriendlyName": "Principal Component Analysis Transform",
- "ShortName": "Pca",
+ "Name": "Transforms.LabelIndicator",
+ "Desc": "Label remapper used by OVA",
+ "FriendlyName": "LabelIndicator",
+ "ShortName": "LabelIndictator",
"Inputs": [
{
"Name": "Column",
@@ -17823,59 +17849,11 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "WeightColumn",
- "Type": "String",
- "Desc": "The name of the weight column",
- "Aliases": [
- "weight"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Rank",
- "Type": "Int",
- "Desc": "The number of components in the PCA",
- "Aliases": [
- "k"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Oversampling",
- "Type": "Int",
- "Desc": "Oversampling parameter for randomized PCA training",
- "Aliases": [
- "over"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Center",
- "Type": "Bool",
- "Desc": "If enabled, data is centered to be zero mean",
- "Aliases": [
- "center"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Seed",
+ "Name": "ClassIndex",
"Type": "Int",
- "Desc": "The seed for random number generation",
+ "Desc": "The positive example class for binary classification.",
"Aliases": [
- "seed"
+ "index"
],
"Required": false,
"SortOrder": 150.0,
@@ -17913,9 +17891,10 @@
"Aliases": [
"col"
],
- "Required": true,
+ "Required": false,
"SortOrder": 1.0,
- "IsNullable": false
+ "IsNullable": false,
+ "Default": null
},
{
"Name": "Data",
@@ -17926,57 +17905,15 @@
"IsNullable": false
},
{
- "Name": "WeightColumn",
- "Type": "String",
- "Desc": "The name of the weight column",
- "Aliases": [
- "weight"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Rank",
- "Type": "Int",
- "Desc": "The number of components in the PCA",
- "Aliases": [
- "k"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 20
- },
- {
- "Name": "Oversampling",
+ "Name": "ClassIndex",
"Type": "Int",
- "Desc": "Oversampling parameter for randomized PCA training",
+ "Desc": "Label of the positive class.",
"Aliases": [
- "over"
+ "index"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 20
- },
- {
- "Name": "Center",
- "Type": "Bool",
- "Desc": "If enabled, data is centered to be zero mean",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": true
- },
- {
- "Name": "Seed",
- "Type": "Int",
- "Desc": "The seed for random number generation",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
"Default": 0
}
],
@@ -18000,9 +17937,9 @@
]
},
{
- "Name": "Transforms.PredictedLabelColumnOriginalValueConverter",
- "Desc": "Transforms a predicted label column to its original values, unless it is of type bool.",
- "FriendlyName": "Convert Predicted Label",
+ "Name": "Transforms.LabelToFloatConverter",
+ "Desc": "Transforms the label to float to make it suitable for regression.",
+ "FriendlyName": "Prepare Regression Label",
"ShortName": null,
"Inputs": [
{
@@ -18014,9 +17951,9 @@
"IsNullable": false
},
{
- "Name": "PredictedLabelColumn",
+ "Name": "LabelColumn",
"Type": "String",
- "Desc": "The predicted label column",
+ "Desc": "The label column",
"Required": true,
"SortOrder": 2.0,
"IsNullable": false
@@ -18042,11 +17979,19 @@
]
},
{
- "Name": "Transforms.RandomNumberGenerator",
- "Desc": "Adds a column with a generated number sequence.",
- "FriendlyName": "Generate Number Transform",
- "ShortName": "Generate",
+ "Name": "Transforms.LightLda",
+ "Desc": "The LDA transform implements LightLDA, a state-of-the-art implementation of Latent Dirichlet Allocation.",
+ "FriendlyName": "Latent Dirichlet Allocation Transform",
+ "ShortName": "LightLda",
"Inputs": [
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
{
"Name": "Column",
"Type": {
@@ -18055,23 +18000,59 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
+ "Name": "NumTopic",
+ "Type": "Int",
+ "Desc": "The number of topics in the LDA",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "AlphaSum",
+ "Type": "Float",
+ "Desc": "Dirichlet prior on document-topic vectors",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Beta",
+ "Type": "Float",
+ "Desc": "Dirichlet prior on vocab-topic vectors",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Mhstep",
+ "Type": "Int",
+ "Desc": "Number of Metropolis Hasting step",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "NumIterations",
+ "Type": "Int",
+ "Desc": "Number of iterations",
"Aliases": [
- "name"
+ "iter"
],
"Required": false,
"SortOrder": 150.0,
- "IsNullable": false,
+ "IsNullable": true,
"Default": null
},
{
- "Name": "UseCounter",
- "Type": "Bool",
- "Desc": "Use an auto-incremented integer starting at zero instead of a random number",
+ "Name": "LikelihoodInterval",
+ "Type": "Int",
+ "Desc": "Compute log likelihood over local dataset on this iteration interval",
"Aliases": [
- "cnt"
+ "llInterval"
],
"Required": false,
"SortOrder": 150.0,
@@ -18079,249 +18060,285 @@
"Default": null
},
{
- "Name": "Seed",
- "Type": "UInt",
- "Desc": "The random seed",
+ "Name": "NumThreads",
+ "Type": "Int",
+ "Desc": "The number of training threads",
+ "Aliases": [
+ "t"
+ ],
"Required": false,
"SortOrder": 150.0,
"IsNullable": true,
"Default": null
- }
- ]
- }
- },
- "Desc": "New column definition(s) (optional form: name:seed)",
- "Aliases": [
- "col"
- ],
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "UseCounter",
- "Type": "Bool",
- "Desc": "Use an auto-incremented integer starting at zero instead of a random number",
- "Aliases": [
- "cnt"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": false
- },
- {
- "Name": "Seed",
- "Type": "UInt",
- "Desc": "The random seed",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 42
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
- },
- {
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.RowRangeFilter",
- "Desc": "Filters a dataview on a column of type Single, Double or Key (contiguous). Keeps the values that are in the specified min/max range. NaNs are always filtered out. If the input is a Key type, the min/max are considered percentages of the number of values.",
- "FriendlyName": "Range Filter",
- "ShortName": "RangeFilter",
- "Inputs": [
- {
- "Name": "Column",
- "Type": "String",
- "Desc": "Column",
+ },
+ {
+ "Name": "NumMaxDocToken",
+ "Type": "Int",
+ "Desc": "The threshold of maximum count of tokens per doc",
+ "Aliases": [
+ "maxNumToken"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "NumSummaryTermPerTopic",
+ "Type": "Int",
+ "Desc": "The number of words to summarize the topic",
+ "Aliases": [
+ "ns"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "NumBurninIterations",
+ "Type": "Int",
+ "Desc": "The number of burn-in iterations",
+ "Aliases": [
+ "burninIter"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": 10
+ },
+ {
+ "Name": "ResetRandomGenerator",
+ "Type": "Bool",
+ "Desc": "Reset the random number generator for each document",
+ "Aliases": [
+ "reset"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:srcs)",
"Aliases": [
"col"
],
"Required": true,
- "SortOrder": 1.0,
+ "SortOrder": 49.0,
"IsNullable": false
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "NumTopic",
+ "Type": "Int",
+ "Desc": "The number of topics in the LDA",
+ "Required": false,
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 100,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 20,
+ 40,
+ 100,
+ 200
+ ]
+ }
},
{
- "Name": "Min",
- "Type": "Float",
- "Desc": "Minimum value (0 to 1 for key types)",
+ "Name": "NumMaxDocToken",
+ "Type": "Int",
+ "Desc": "The threshold of maximum count of tokens per doc",
+ "Aliases": [
+ "maxNumToken"
+ ],
"Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
+ "SortOrder": 50.0,
+ "IsNullable": false,
+ "Default": 512
},
{
- "Name": "Max",
- "Type": "Float",
- "Desc": "Maximum value (0 to 1 for key types)",
+ "Name": "NumThreads",
+ "Type": "Int",
+ "Desc": "The number of training threads. Default value depends on number of logical processors.",
+ "Aliases": [
+ "t"
+ ],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 50.0,
"IsNullable": true,
"Default": null
},
{
- "Name": "Complement",
- "Type": "Bool",
- "Desc": "If true, keep the values that fall outside the range.",
+ "Name": "AlphaSum",
+ "Type": "Float",
+ "Desc": "Dirichlet prior on document-topic vectors",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": false
+ "Default": 100.0,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 1,
+ 10,
+ 100,
+ 200
+ ]
+ }
},
{
- "Name": "IncludeMin",
- "Type": "Bool",
- "Desc": "If true, include in the range the values that are equal to min.",
+ "Name": "Beta",
+ "Type": "Float",
+ "Desc": "Dirichlet prior on vocab-topic vectors",
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": true
+ "Default": 0.01,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.01,
+ 0.015,
+ 0.07,
+ 0.02
+ ]
+ }
},
{
- "Name": "IncludeMax",
- "Type": "Bool",
- "Desc": "If true, include in the range the values that are equal to max.",
+ "Name": "Mhstep",
+ "Type": "Int",
+ "Desc": "Number of Metropolis Hasting step",
"Required": false,
"SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "IsNullable": false,
+ "Default": 4,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 2,
+ 4,
+ 8,
+ 16
+ ]
+ }
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.RowSkipAndTakeFilter",
- "Desc": "Allows limiting input to a subset of rows at an optional offset. Can be used to implement data paging.",
- "FriendlyName": "Skip and Take Filter",
- "ShortName": "SkipTake",
- "Inputs": [
- {
- "Name": "Skip",
+ "Name": "NumIterations",
"Type": "Int",
- "Desc": "Number of items to skip",
+ "Desc": "Number of iterations",
"Aliases": [
- "s"
+ "iter"
],
"Required": false,
- "SortOrder": 1.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 200,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 100,
+ 200,
+ 300,
+ 400
+ ]
+ }
},
{
- "Name": "Take",
+ "Name": "LikelihoodInterval",
"Type": "Int",
- "Desc": "Number of items to take",
+ "Desc": "Compute log likelihood over local dataset on this iteration interval",
"Aliases": [
- "t"
+ "llInterval"
],
"Required": false,
- "SortOrder": 2.0,
- "IsNullable": true,
- "Default": null
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 5
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.RowSkipFilter",
- "Desc": "Allows limiting input to a subset of rows by skipping a number of rows.",
- "FriendlyName": "Skip Filter",
- "ShortName": "Skip",
- "Inputs": [
+ "Name": "NumSummaryTermPerTopic",
+ "Type": "Int",
+ "Desc": "The number of words to summarize the topic",
+ "Aliases": [
+ "ns"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 10
+ },
{
- "Name": "Count",
+ "Name": "NumBurninIterations",
"Type": "Int",
- "Desc": "Number of items to skip",
+ "Desc": "The number of burn-in iterations",
"Aliases": [
- "c",
- "n",
- "s"
+ "burninIter"
],
- "Required": true,
- "SortOrder": 1.0,
+ "Required": false,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0
+ "Default": 10,
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 10,
+ 20,
+ 30,
+ 40
+ ]
+ }
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "ResetRandomGenerator",
+ "Type": "Bool",
+ "Desc": "Reset the random number generator for each document",
+ "Aliases": [
+ "reset"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "OutputTopicWordSummary",
+ "Type": "Bool",
+ "Desc": "Whether to output the topic-word summary in text format",
+ "Aliases": [
+ "summary"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
}
],
"Outputs": [
@@ -18344,24 +18361,65 @@
]
},
{
- "Name": "Transforms.RowTakeFilter",
- "Desc": "Allows limiting input to a subset of rows by taking N first rows.",
- "FriendlyName": "Take Filter",
- "ShortName": "Take",
+ "Name": "Transforms.LogMeanVarianceNormalizer",
+ "Desc": "Normalizes the data based on the computed mean and variance of the logarithm of the data.",
+ "FriendlyName": "LogMeanVar Normalizer",
+ "ShortName": "LogMeanVar",
"Inputs": [
{
- "Name": "Count",
- "Type": "Int",
- "Desc": "Number of items to take",
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "MaxTrainingExamples",
+ "Type": "Int",
+ "Desc": "Max number of examples used to train the normalizer",
+ "Aliases": [
+ "maxtrain"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:src)",
"Aliases": [
- "c",
- "n",
- "t"
+ "col"
],
- "Required": true,
+ "Required": false,
"SortOrder": 1.0,
"IsNullable": false,
- "Default": 9223372036854775807
+ "Default": null
},
{
"Name": "Data",
@@ -18370,52 +18428,30 @@
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
},
{
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
- }
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
- ]
- },
- {
- "Name": "Transforms.ScoreColumnSelector",
- "Desc": "Selects only the last score columns and the extra columns specified in the arguments.",
- "FriendlyName": "Choose Columns By Index",
- "ShortName": null,
- "Inputs": [
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
+ "Name": "UseCdf",
+ "Type": "Bool",
+ "Desc": "Whether to use CDF as the output",
+ "Aliases": [
+ "cdf"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
},
{
- "Name": "ExtraColumns",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "Extra columns to write",
+ "Name": "MaxTrainingExamples",
+ "Type": "Int",
+ "Desc": "Max number of examples used to train the normalizer",
+ "Aliases": [
+ "maxtrain"
+ ],
"Required": false,
- "SortOrder": 2.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": null
+ "Default": 1000000000
}
],
"Outputs": [
@@ -18438,76 +18474,118 @@
]
},
{
- "Name": "Transforms.Scorer",
- "Desc": "Turn the predictor model into a transform model",
- "FriendlyName": null,
- "ShortName": null,
- "Inputs": [
- {
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "The predictor model to turn into a transform",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
- {
- "Name": "ScoredData",
- "Type": "DataView",
- "Desc": "The scored dataset"
- },
- {
- "Name": "ScoringTransform",
- "Type": "TransformModel",
- "Desc": "The scoring transform"
- }
- ]
- },
- {
- "Name": "Transforms.Segregator",
- "Desc": "Un-groups vector columns into sequences of rows, inverse of Group transform",
- "FriendlyName": "Un-group Transform",
- "ShortName": "Ungroup",
+ "Name": "Transforms.LpNormalizer",
+ "Desc": "Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm.",
+ "FriendlyName": "Lp-Norm Normalizer",
+ "ShortName": "lpnorm",
"Inputs": [
- {
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
{
"Name": "Column",
"Type": {
"Kind": "Array",
- "ItemType": "String"
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "NormKind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "L2Norm",
+ "StdDev",
+ "L1Norm",
+ "LInf"
+ ]
+ },
+ "Desc": "The norm to use to normalize each sample",
+ "Aliases": [
+ "norm"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "SubMean",
+ "Type": "Bool",
+ "Desc": "Subtract mean from each value before normalizing",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
},
- "Desc": "Columns to unroll, or 'pivot'",
+ "Desc": "New column definition(s) (optional form: name:src)",
"Aliases": [
"col"
],
"Required": true,
- "SortOrder": 150.0,
+ "SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "Mode",
+ "Name": "NormKind",
"Type": {
"Kind": "Enum",
"Values": [
- "Inner",
- "Outer",
- "First"
+ "L2Norm",
+ "StdDev",
+ "L1Norm",
+ "LInf"
]
},
- "Desc": "Specifies how to unroll multiple pivot columns of different size.",
+ "Desc": "The norm to use to normalize each sample",
+ "Aliases": [
+ "norm"
+ ],
"Required": false,
- "SortOrder": 150.0,
+ "SortOrder": 1.0,
"IsNullable": false,
- "Default": "Inner"
+ "Default": "L2Norm"
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "SubMean",
+ "Type": "Bool",
+ "Desc": "Subtract mean from each value before normalizing",
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": false
}
],
"Outputs": [
@@ -18530,67 +18608,44 @@
]
},
{
- "Name": "Transforms.SentimentAnalyzer",
- "Desc": "Uses a pretrained sentiment model to score input strings",
- "FriendlyName": "Sentiment Analyzing Transform",
- "ShortName": "Senti",
+ "Name": "Transforms.ManyHeterogeneousModelCombiner",
+ "Desc": "Combines a sequence of TransformModels and a PredictorModel into a single PredictorModel.",
+ "FriendlyName": null,
+ "ShortName": null,
"Inputs": [
{
- "Name": "Source",
- "Type": "String",
- "Desc": "Name of the source column.",
- "Aliases": [
- "col"
- ],
+ "Name": "TransformModels",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "TransformModel"
+ },
+ "Desc": "Transform model",
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "Predictor model",
"Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column.",
- "Aliases": [
- "dst"
- ],
- "Required": false,
"SortOrder": 2.0,
- "IsNullable": false,
- "Default": null
+ "IsNullable": false
}
],
"Outputs": [
{
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
- },
- {
- "Name": "Model",
- "Type": "TransformModel",
- "Desc": "Transform model"
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "Predictor model"
}
- ],
- "InputKind": [
- "ITransformInput"
- ],
- "OutputKind": [
- "ITransformOutput"
]
},
{
- "Name": "Transforms.SupervisedBinNormalizer",
- "Desc": "Similar to BinNormalizer, but calculates bins based on correlation with the label column, not equi-density. The new value is bin_number / number_of_bins.",
- "FriendlyName": "Supervised Binning Normalizer",
- "ShortName": "SupBin",
+ "Name": "Transforms.MeanVarianceNormalizer",
+ "Desc": "Normalizes the data based on the computed mean and variance of the data.",
+ "FriendlyName": "MeanVar Normalizer",
+ "ShortName": "MeanVar",
"Inputs": [
{
"Name": "Column",
@@ -18599,18 +18654,6 @@
"ItemType": {
"Kind": "Struct",
"Fields": [
- {
- "Name": "NumBins",
- "Type": "Int",
- "Desc": "Max number of bins, power of 2 recommended",
- "Aliases": [
- "bins"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
{
"Name": "FixZero",
"Type": "Bool",
@@ -18666,10 +18709,9 @@
"Aliases": [
"col"
],
- "Required": false,
+ "Required": true,
"SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
+ "IsNullable": false
},
{
"Name": "Data",
@@ -18680,37 +18722,16 @@
"IsNullable": false
},
{
- "Name": "LabelColumn",
- "Type": "String",
- "Desc": "Label column for supervised binning",
- "Aliases": [
- "label",
- "lab"
- ],
- "Required": true,
- "SortOrder": 150.0,
- "IsNullable": false
- },
- {
- "Name": "MinBinSize",
- "Type": "Int",
- "Desc": "Minimum number of examples per bin",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": 10
- },
- {
- "Name": "NumBins",
- "Type": "Int",
- "Desc": "Max number of bins, power of 2 recommended",
+ "Name": "UseCdf",
+ "Type": "Bool",
+ "Desc": "Whether to use CDF as the output",
"Aliases": [
- "bins"
+ "cdf"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": 1024
+ "Default": false
},
{
"Name": "FixZero",
@@ -18757,46 +18778,70 @@
]
},
{
- "Name": "Transforms.TextFeaturizer",
- "Desc": "A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text.",
- "FriendlyName": "Text Transform",
- "ShortName": "Text",
+ "Name": "Transforms.MinMaxNormalizer",
+ "Desc": "Normalizes the data based on the observed minimum and maximum values of the data.",
+ "FriendlyName": "Min-Max Normalizer",
+ "ShortName": "MinMax",
"Inputs": [
{
"Name": "Column",
"Type": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "Name",
- "Type": "String",
- "Desc": "Name of the new column",
- "Aliases": [
- "name"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Source",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "FixZero",
+ "Type": "Bool",
+ "Desc": "Whether to map zero to zero, preserving sparsity",
+ "Aliases": [
+ "zero"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
},
- "Desc": "Name of the source column",
- "Aliases": [
- "src"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- }
- ]
+ {
+ "Name": "MaxTrainingExamples",
+ "Type": "Int",
+ "Desc": "Max number of examples used to train the normalizer",
+ "Aliases": [
+ "maxtrain"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
},
- "Desc": "New column definition (optional form: name:srcs).",
+ "Desc": "New column definition(s) (optional form: name:src)",
"Aliases": [
"col"
],
@@ -18813,239 +18858,196 @@
"IsNullable": false
},
{
- "Name": "Language",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "English",
- "French",
- "German",
- "Dutch",
- "Italian",
- "Spanish",
- "Japanese"
- ]
- },
- "Desc": "Dataset language or 'AutoDetect' to detect language per row.",
+ "Name": "FixZero",
+ "Type": "Bool",
+ "Desc": "Whether to map zero to zero, preserving sparsity",
"Aliases": [
- "lang"
+ "zero"
],
"Required": false,
- "SortOrder": 3.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": "English"
+ "Default": true
},
{
- "Name": "StopWordsRemover",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "StopWordsRemover"
- },
- "Desc": "Stopwords remover.",
+ "Name": "MaxTrainingExamples",
+ "Type": "Int",
+ "Desc": "Max number of examples used to train the normalizer",
"Aliases": [
- "remover"
+ "maxtrain"
],
"Required": false,
- "SortOrder": 4.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": null
+ "Default": 1000000000
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
},
{
- "Name": "TextCase",
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.MissingValueHandler",
+ "Desc": "Handle missing values by replacing them with either the default value or the mean/min/max value (for non-text columns only). An indicator column can optionally be concatenated, if theinput column type is numeric.",
+ "FriendlyName": "NA Handle Transform",
+ "ShortName": "NAHandle",
+ "Inputs": [
+ {
+ "Name": "Column",
"Type": {
- "Kind": "Enum",
- "Values": [
- "Lower",
- "Upper",
- "None"
- ]
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Kind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "DefaultValue",
+ "Mean",
+ "Minimum",
+ "Maximum"
+ ]
+ },
+ "Desc": "The replacement method to utilize",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "ImputeBySlot",
+ "Type": "Bool",
+ "Desc": "Whether to impute values by slot",
+ "Aliases": [
+ "slot"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "ConcatIndicator",
+ "Type": "Bool",
+ "Desc": "Whether or not to concatenate an indicator vector column to the value column",
+ "Aliases": [
+ "ind"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
},
- "Desc": "Casing text using the rules of the invariant culture.",
+ "Desc": "New column definition(s) (optional form: name:rep:src)",
"Aliases": [
- "case"
+ "col"
],
- "Required": false,
- "SortOrder": 5.0,
- "IsNullable": false,
- "Default": "Lower"
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
},
{
- "Name": "KeepDiacritics",
- "Type": "Bool",
- "Desc": "Whether to keep diacritical marks or remove them.",
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "ReplaceWith",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "DefaultValue",
+ "Mean",
+ "Minimum",
+ "Maximum"
+ ]
+ },
+ "Desc": "The replacement method to utilize",
"Aliases": [
- "diac"
+ "kind"
],
"Required": false,
- "SortOrder": 6.0,
+ "SortOrder": 2.0,
"IsNullable": false,
- "Default": false
+ "Default": "Def"
},
{
- "Name": "KeepPunctuations",
+ "Name": "ImputeBySlot",
"Type": "Bool",
- "Desc": "Whether to keep punctuation marks or remove them.",
+ "Desc": "Whether to impute values by slot",
"Aliases": [
- "punc"
+ "slot"
],
"Required": false,
- "SortOrder": 7.0,
+ "SortOrder": 150.0,
"IsNullable": false,
"Default": true
},
{
- "Name": "KeepNumbers",
+ "Name": "Concat",
"Type": "Bool",
- "Desc": "Whether to keep numbers or remove them.",
+ "Desc": "Whether or not to concatenate an indicator vector column to the value column",
"Aliases": [
- "num"
+ "ind"
],
"Required": false,
- "SortOrder": 8.0,
+ "SortOrder": 150.0,
"IsNullable": false,
"Default": true
- },
- {
- "Name": "OutputTokens",
- "Type": "Bool",
- "Desc": "Whether to output the transformed text tokens as an additional column.",
- "Aliases": [
- "tokens",
- "showtext",
- "showTransformedText"
- ],
- "Required": false,
- "SortOrder": 9.0,
- "IsNullable": false,
- "Default": false
- },
- {
- "Name": "Dictionary",
- "Type": {
- "Kind": "Struct",
- "Fields": [
- {
- "Name": "Term",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "List of terms",
- "Required": false,
- "SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Sort",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Occurrence",
- "Value"
- ]
- },
- "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
- "Required": false,
- "SortOrder": 5.0,
- "IsNullable": false,
- "Default": "Occurrence"
- },
- {
- "Name": "DropUnknowns",
- "Type": "Bool",
- "Desc": "Drop unknown terms instead of mapping them to NA term.",
- "Aliases": [
- "dropna"
- ],
- "Required": false,
- "SortOrder": 6.0,
- "IsNullable": false,
- "Default": false
- }
- ]
- },
- "Desc": "A dictionary of whitelisted terms.",
- "Aliases": [
- "dict"
- ],
- "Required": false,
- "SortOrder": 10.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "WordFeatureExtractor",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "NgramExtractor"
- },
- "Desc": "Ngram feature extractor to use for words (WordBag/WordHashBag).",
- "Aliases": [
- "wordExtractor"
- ],
- "Required": false,
- "SortOrder": 11.0,
- "IsNullable": false,
- "Default": {
- "Name": "NGram",
- "Settings": {
- "MaxNumTerms": [
- 10000000
- ]
- }
- }
- },
- {
- "Name": "CharFeatureExtractor",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "NgramExtractor"
- },
- "Desc": "Ngram feature extractor to use for characters (WordBag/WordHashBag).",
- "Aliases": [
- "charExtractor"
- ],
- "Required": false,
- "SortOrder": 12.0,
- "IsNullable": false,
- "Default": {
- "Name": "NGram",
- "Settings": {
- "NgramLength": 3,
- "AllLengths": false,
- "MaxNumTerms": [
- 10000000
- ]
- }
- }
- },
- {
- "Name": "VectorNormalizer",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "None",
- "L1",
- "L2",
- "LInf"
- ]
- },
- "Desc": "Normalize vectors (rows) individually by rescaling them to unit norm.",
- "Aliases": [
- "norm"
- ],
- "Required": false,
- "SortOrder": 13.0,
- "IsNullable": false,
- "Default": "L2"
- }
- ],
- "Outputs": [
- {
- "Name": "OutputData",
- "Type": "DataView",
- "Desc": "Transformed dataset"
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
},
{
"Name": "Model",
@@ -19061,10 +19063,10 @@
]
},
{
- "Name": "Transforms.TextToKeyConverter",
- "Desc": "Converts input values (words, numbers, etc.) to index in a dictionary.",
- "FriendlyName": "Term Transform",
- "ShortName": null,
+ "Name": "Transforms.MissingValueIndicator",
+ "Desc": "Create a boolean output column with the same number of slots as the input column, where the output value is true if the value in the input column is missing.",
+ "FriendlyName": "NA Indicator Transform",
+ "ShortName": "NAInd",
"Inputs": [
{
"Name": "Column",
@@ -19073,57 +19075,6 @@
"ItemType": {
"Kind": "Struct",
"Fields": [
- {
- "Name": "MaxNumTerms",
- "Type": "Int",
- "Desc": "Maximum number of terms to keep when auto-training",
- "Aliases": [
- "max"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "Term",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "List of terms",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Sort",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Occurrence",
- "Value"
- ]
- },
- "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
- {
- "Name": "TextKeyValues",
- "Type": "Bool",
- "Desc": "Whether key value metadata should be text, regardless of the actual input type",
- "Aliases": [
- "textkv"
- ],
- "Required": false,
- "SortOrder": 150.0,
- "IsNullable": true,
- "Default": null
- },
{
"Name": "Name",
"Type": "String",
@@ -19155,10 +19106,9 @@
"Aliases": [
"col"
],
- "Required": false,
+ "Required": true,
"SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
+ "IsNullable": false
},
{
"Name": "Data",
@@ -19167,57 +19117,6 @@
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
- },
- {
- "Name": "MaxNumTerms",
- "Type": "Int",
- "Desc": "Maximum number of terms to keep per column when auto-training",
- "Aliases": [
- "max"
- ],
- "Required": false,
- "SortOrder": 5.0,
- "IsNullable": false,
- "Default": 1000000
- },
- {
- "Name": "Term",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "List of terms",
- "Required": false,
- "SortOrder": 106.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "Sort",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Occurrence",
- "Value"
- ]
- },
- "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
- "Required": false,
- "SortOrder": 113.0,
- "IsNullable": false,
- "Default": "Occurrence"
- },
- {
- "Name": "TextKeyValues",
- "Type": "Bool",
- "Desc": "Whether key value metadata should be text, regardless of the actual input type",
- "Aliases": [
- "textkv"
- ],
- "Required": false,
- "SortOrder": 114.0,
- "IsNullable": false,
- "Default": false
}
],
"Outputs": [
@@ -19240,11 +19139,53 @@
]
},
{
- "Name": "Transforms.TrainTestDatasetSplitter",
- "Desc": "Split the dataset into train and test sets",
- "FriendlyName": "Dataset Train-Test Split",
- "ShortName": null,
+ "Name": "Transforms.MissingValuesDropper",
+ "Desc": "Removes NAs from vector columns.",
+ "FriendlyName": "NA Drop Transform",
+ "ShortName": "NADrop",
"Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "Columns to drop the NAs for",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
{
"Name": "Data",
"Type": "DataView",
@@ -19252,87 +19193,63 @@
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
- },
- {
- "Name": "Fraction",
- "Type": "Float",
- "Desc": "Fraction of training data",
- "Required": false,
- "SortOrder": 2.0,
- "IsNullable": false,
- "Default": 0.8
- },
- {
- "Name": "StratificationColumn",
- "Type": "String",
- "Desc": "Stratification column",
- "Aliases": [
- "strat"
- ],
- "Required": false,
- "SortOrder": 3.0,
- "IsNullable": false,
- "Default": null
}
],
"Outputs": [
{
- "Name": "TrainData",
+ "Name": "OutputData",
"Type": "DataView",
- "Desc": "Training data"
+ "Desc": "Transformed dataset"
},
{
- "Name": "TestData",
- "Type": "DataView",
- "Desc": "Testing data"
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
}
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
]
},
{
- "Name": "Transforms.TreeLeafFeaturizer",
- "Desc": "Trains a tree ensemble, or loads it from a file, then maps a numeric feature vector to three outputs: 1. A vector containing the individual tree outputs of the tree ensemble. 2. A vector indicating the leaves that the feature vector falls on in the tree ensemble. 3. A vector indicating the paths that the feature vector falls on in the tree ensemble. If a both a model file and a trainer are specified - will use the model file. If neither are specified, will train a default FastTree model. This can handle key labels by training a regression model towards their optionally permuted indices.",
- "FriendlyName": "Tree Ensemble Featurization Transform",
- "ShortName": "TreeFeat",
+ "Name": "Transforms.MissingValuesRowDropper",
+ "Desc": "Filters out rows that contain missing values.",
+ "FriendlyName": "NA Filter",
+ "ShortName": "NAFilter",
"Inputs": [
{
- "Name": "Data",
- "Type": "DataView",
- "Desc": "Input dataset",
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Column",
+ "Aliases": [
+ "col"
+ ],
"Required": true,
"SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "Trainer to use",
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
"Required": true,
- "SortOrder": 10.0,
+ "SortOrder": 1.0,
"IsNullable": false
},
{
- "Name": "Suffix",
- "Type": "String",
- "Desc": "Output column: The suffix to append to the default column names",
- "Aliases": [
- "ex"
- ],
- "Required": false,
- "SortOrder": 101.0,
- "IsNullable": false,
- "Default": null
- },
- {
- "Name": "LabelPermutationSeed",
- "Type": "Int",
- "Desc": "If specified, determines the permutation seed for applying this featurizer to a multiclass problem.",
- "Aliases": [
- "lps"
- ],
+ "Name": "Complement",
+ "Type": "Bool",
+ "Desc": "If true, keep only rows that contain NA values, and filter the rest.",
"Required": false,
- "SortOrder": 102.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 0
+ "Default": false
}
],
"Outputs": [
@@ -19348,7 +19265,6 @@
}
],
"InputKind": [
- "IFeaturizerInput",
"ITransformInput"
],
"OutputKind": [
@@ -19356,41 +19272,10 @@
]
},
{
- "Name": "Transforms.TwoHeterogeneousModelCombiner",
- "Desc": "Combines a TransformModel and a PredictorModel into a single PredictorModel.",
- "FriendlyName": null,
- "ShortName": null,
- "Inputs": [
- {
- "Name": "TransformModel",
- "Type": "TransformModel",
- "Desc": "Transform model",
- "Required": true,
- "SortOrder": 1.0,
- "IsNullable": false
- },
- {
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "Predictor model",
- "Required": true,
- "SortOrder": 2.0,
- "IsNullable": false
- }
- ],
- "Outputs": [
- {
- "Name": "PredictorModel",
- "Type": "PredictorModel",
- "Desc": "Predictor model"
- }
- ]
- },
- {
- "Name": "Transforms.WordTokenizer",
- "Desc": "The input to this transform is text, and the output is a vector of text containing the words (tokens) in the original text. The separator is space, but can be specified as any other character (or multiple characters) if needed.",
- "FriendlyName": "Tokenize Text Transform",
- "ShortName": "TokenizeTextTransform",
+ "Name": "Transforms.MissingValueSubstitutor",
+ "Desc": "Create an output column of the same type and size of the input column, where missing values are replaced with either the default value or the mean/min/max value (for non-text columns only).",
+ "FriendlyName": "NA Replace Transform",
+ "ShortName": "NARep",
"Inputs": [
{
"Name": "Column",
@@ -19400,17 +19285,44 @@
"Kind": "Struct",
"Fields": [
{
- "Name": "TermSeparators",
+ "Name": "ReplacementString",
"Type": "String",
- "Desc": "Comma separated set of term separator(s). Commonly: 'space', 'comma', 'semicolon' or other single character.",
+ "Desc": "Replacement value for NAs (uses default value if not given)",
"Aliases": [
- "sep"
+ "rep"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
"Default": null
},
+ {
+ "Name": "Kind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "DefaultValue",
+ "Mean",
+ "Minimum",
+ "Maximum",
+ "SpecifiedValue"
+ ]
+ },
+ "Desc": "The replacement method to utilize",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Slot",
+ "Type": "Bool",
+ "Desc": "Whether to impute values by slot",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
{
"Name": "Name",
"Type": "String",
@@ -19438,14 +19350,13 @@
]
}
},
- "Desc": "New column definition(s)",
+ "Desc": "New column definition(s) (optional form: name:rep:src)",
"Aliases": [
"col"
],
- "Required": false,
+ "Required": true,
"SortOrder": 1.0,
- "IsNullable": false,
- "Default": null
+ "IsNullable": false
},
{
"Name": "Data",
@@ -19456,16 +19367,37 @@
"IsNullable": false
},
{
- "Name": "TermSeparators",
- "Type": "String",
- "Desc": "Comma separated set of term separator(s). Commonly: 'space', 'comma', 'semicolon' or other single character.",
+ "Name": "ReplacementKind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "DefaultValue",
+ "Mean",
+ "Minimum",
+ "Maximum",
+ "SpecifiedValue"
+ ]
+ },
+ "Desc": "The replacement method to utilize",
"Aliases": [
- "sep"
+ "kind"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": "space"
+ "Default": "Default"
+ },
+ {
+ "Name": "ImputeBySlot",
+ "Type": "Bool",
+ "Desc": "Whether to impute values by slot",
+ "Aliases": [
+ "slot"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
}
],
"Outputs": [
@@ -19486,201 +19418,2748 @@
"OutputKind": [
"ITransformOutput"
]
- }
- ],
- "Components": [
+ },
{
- "Kind": "AutoMlEngine",
- "Components": [
- {
- "Name": "Defaults",
+ "Name": "Transforms.ModelCombiner",
+ "Desc": "Combines a sequence of TransformModels into a single model",
+ "FriendlyName": null,
+ "ShortName": null,
+ "Inputs": [
+ {
+ "Name": "Models",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "TransformModel"
+ },
+ "Desc": "Input models",
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputModel",
+ "Type": "TransformModel",
+ "Desc": "Combined model"
+ }
+ ]
+ },
+ {
+ "Name": "Transforms.NGramTranslator",
+ "Desc": "Produces a bag of counts of ngrams (sequences of consecutive values of length 1-n) in a given vector of keys. It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.",
+ "FriendlyName": "NGram Transform",
+ "ShortName": "NgramTransform",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "NgramLength",
+ "Type": "Int",
+ "Desc": "Maximum ngram length",
+ "Aliases": [
+ "ngram"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "AllLengths",
+ "Type": "Bool",
+ "Desc": "Whether to include all ngram lengths up to NgramLength or only NgramLength",
+ "Aliases": [
+ "all"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "SkipLength",
+ "Type": "Int",
+ "Desc": "Maximum number of tokens to skip when constructing an ngram",
+ "Aliases": [
+ "skips"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "MaxNumTerms",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "Int"
+ },
+ "Desc": "Maximum number of ngrams to store in the dictionary",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Weighting",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Tf",
+ "Idf",
+ "TfIdf"
+ ]
+ },
+ "Desc": "Statistical measure used to evaluate how important a word is to a document in a corpus",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:src)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "NgramLength",
+ "Type": "Int",
+ "Desc": "Maximum ngram length",
+ "Aliases": [
+ "ngram"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 2
+ },
+ {
+ "Name": "AllLengths",
+ "Type": "Bool",
+ "Desc": "Whether to store all ngram lengths up to ngramLength, or only ngramLength",
+ "Aliases": [
+ "all"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "SkipLength",
+ "Type": "Int",
+ "Desc": "Maximum number of tokens to skip when constructing an ngram",
+ "Aliases": [
+ "skips"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0
+ },
+ {
+ "Name": "MaxNumTerms",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "Int"
+ },
+ "Desc": "Maximum number of ngrams to store in the dictionary",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": [
+ 10000000
+ ]
+ },
+ {
+ "Name": "Weighting",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Tf",
+ "Idf",
+ "TfIdf"
+ ]
+ },
+ "Desc": "The weighting criteria",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": "Tf"
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.NoOperation",
+ "Desc": "Does nothing.",
+ "FriendlyName": "No Op",
+ "ShortName": "Nop",
+ "Inputs": [
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.OptionalColumnCreator",
+ "Desc": "If the source column does not exist after deserialization, create a column with the right type and default values.",
+ "FriendlyName": "Optional Column Transform",
+ "ShortName": "optional",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "New column definition(s)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.PcaCalculator",
+ "Desc": "Train an PCA Anomaly model.",
+ "FriendlyName": "Principal Component Analysis Transform",
+ "ShortName": "Pca",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "WeightColumn",
+ "Type": "String",
+ "Desc": "The name of the weight column",
+ "Aliases": [
+ "weight"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Rank",
+ "Type": "Int",
+ "Desc": "The number of components in the PCA",
+ "Aliases": [
+ "k"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Oversampling",
+ "Type": "Int",
+ "Desc": "Oversampling parameter for randomized PCA training",
+ "Aliases": [
+ "over"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Center",
+ "Type": "Bool",
+ "Desc": "If enabled, data is centered to be zero mean",
+ "Aliases": [
+ "center"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Seed",
+ "Type": "Int",
+ "Desc": "The seed for random number generation",
+ "Aliases": [
+ "seed"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:src)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "WeightColumn",
+ "Type": "String",
+ "Desc": "The name of the weight column",
+ "Aliases": [
+ "weight"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Rank",
+ "Type": "Int",
+ "Desc": "The number of components in the PCA",
+ "Aliases": [
+ "k"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 20
+ },
+ {
+ "Name": "Oversampling",
+ "Type": "Int",
+ "Desc": "Oversampling parameter for randomized PCA training",
+ "Aliases": [
+ "over"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 20
+ },
+ {
+ "Name": "Center",
+ "Type": "Bool",
+ "Desc": "If enabled, data is centered to be zero mean",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "Seed",
+ "Type": "Int",
+ "Desc": "The seed for random number generation",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.PredictedLabelColumnOriginalValueConverter",
+ "Desc": "Transforms a predicted label column to its original values, unless it is of type bool.",
+ "FriendlyName": "Convert Predicted Label",
+ "ShortName": null,
+ "Inputs": [
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "PredictedLabelColumn",
+ "Type": "String",
+ "Desc": "The predicted label column",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.RandomNumberGenerator",
+ "Desc": "Adds a column with a generated number sequence.",
+ "FriendlyName": "Generate Number Transform",
+ "ShortName": "Generate",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "UseCounter",
+ "Type": "Bool",
+ "Desc": "Use an auto-incremented integer starting at zero instead of a random number",
+ "Aliases": [
+ "cnt"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Seed",
+ "Type": "UInt",
+ "Desc": "The random seed",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:seed)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "UseCounter",
+ "Type": "Bool",
+ "Desc": "Use an auto-incremented integer starting at zero instead of a random number",
+ "Aliases": [
+ "cnt"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "Seed",
+ "Type": "UInt",
+ "Desc": "The random seed",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 42
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.RowRangeFilter",
+ "Desc": "Filters a dataview on a column of type Single, Double or Key (contiguous). Keeps the values that are in the specified min/max range. NaNs are always filtered out. If the input is a Key type, the min/max are considered percentages of the number of values.",
+ "FriendlyName": "Range Filter",
+ "ShortName": "RangeFilter",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": "String",
+ "Desc": "Column",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Min",
+ "Type": "Float",
+ "Desc": "Minimum value (0 to 1 for key types)",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Max",
+ "Type": "Float",
+ "Desc": "Maximum value (0 to 1 for key types)",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Complement",
+ "Type": "Bool",
+ "Desc": "If true, keep the values that fall outside the range.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "IncludeMin",
+ "Type": "Bool",
+ "Desc": "If true, include in the range the values that are equal to min.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "IncludeMax",
+ "Type": "Bool",
+ "Desc": "If true, include in the range the values that are equal to max.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.RowSkipAndTakeFilter",
+ "Desc": "Allows limiting input to a subset of rows at an optional offset. Can be used to implement data paging.",
+ "FriendlyName": "Skip and Take Filter",
+ "ShortName": "SkipTake",
+ "Inputs": [
+ {
+ "Name": "Skip",
+ "Type": "Int",
+ "Desc": "Number of items to skip",
+ "Aliases": [
+ "s"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Take",
+ "Type": "Int",
+ "Desc": "Number of items to take",
+ "Aliases": [
+ "t"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": true,
+ "Default": null
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.RowSkipFilter",
+ "Desc": "Allows limiting input to a subset of rows by skipping a number of rows.",
+ "FriendlyName": "Skip Filter",
+ "ShortName": "Skip",
+ "Inputs": [
+ {
+ "Name": "Count",
+ "Type": "Int",
+ "Desc": "Number of items to skip",
+ "Aliases": [
+ "c",
+ "n",
+ "s"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 0
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.RowTakeFilter",
+ "Desc": "Allows limiting input to a subset of rows by taking N first rows.",
+ "FriendlyName": "Take Filter",
+ "ShortName": "Take",
+ "Inputs": [
+ {
+ "Name": "Count",
+ "Type": "Int",
+ "Desc": "Number of items to take",
+ "Aliases": [
+ "c",
+ "n",
+ "t"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 9223372036854775807
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.ScoreColumnSelector",
+ "Desc": "Selects only the last score columns and the extra columns specified in the arguments.",
+ "FriendlyName": "Choose Columns By Index",
+ "ShortName": null,
+ "Inputs": [
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "ExtraColumns",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Extra columns to write",
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.Scorer",
+ "Desc": "Turn the predictor model into a transform model",
+ "FriendlyName": null,
+ "ShortName": null,
+ "Inputs": [
+ {
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "The predictor model to turn into a transform",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "ScoredData",
+ "Type": "DataView",
+ "Desc": "The scored dataset"
+ },
+ {
+ "Name": "ScoringTransform",
+ "Type": "TransformModel",
+ "Desc": "The scoring transform"
+ }
+ ]
+ },
+ {
+ "Name": "Transforms.Segregator",
+ "Desc": "Un-groups vector columns into sequences of rows, inverse of Group transform",
+ "FriendlyName": "Un-group Transform",
+ "ShortName": "Ungroup",
+ "Inputs": [
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Columns to unroll, or 'pivot'",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 150.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Mode",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Inner",
+ "Outer",
+ "First"
+ ]
+ },
+ "Desc": "Specifies how to unroll multiple pivot columns of different size.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": "Inner"
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.SentimentAnalyzer",
+ "Desc": "Uses a pretrained sentiment model to score input strings",
+ "FriendlyName": "Sentiment Analyzing Transform",
+ "ShortName": "Senti",
+ "Inputs": [
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column.",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column.",
+ "Aliases": [
+ "dst"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.SupervisedBinNormalizer",
+ "Desc": "Similar to BinNormalizer, but calculates bins based on correlation with the label column, not equi-density. The new value is bin_number / number_of_bins.",
+ "FriendlyName": "Supervised Binning Normalizer",
+ "ShortName": "SupBin",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "NumBins",
+ "Type": "Int",
+ "Desc": "Max number of bins, power of 2 recommended",
+ "Aliases": [
+ "bins"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "FixZero",
+ "Type": "Bool",
+ "Desc": "Whether to map zero to zero, preserving sparsity",
+ "Aliases": [
+ "zero"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "MaxTrainingExamples",
+ "Type": "Int",
+ "Desc": "Max number of examples used to train the normalizer",
+ "Aliases": [
+ "maxtrain"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:src)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "LabelColumn",
+ "Type": "String",
+ "Desc": "Label column for supervised binning",
+ "Aliases": [
+ "label",
+ "lab"
+ ],
+ "Required": true,
+ "SortOrder": 150.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "MinBinSize",
+ "Type": "Int",
+ "Desc": "Minimum number of examples per bin",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 10
+ },
+ {
+ "Name": "NumBins",
+ "Type": "Int",
+ "Desc": "Max number of bins, power of 2 recommended",
+ "Aliases": [
+ "bins"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1024
+ },
+ {
+ "Name": "FixZero",
+ "Type": "Bool",
+ "Desc": "Whether to map zero to zero, preserving sparsity",
+ "Aliases": [
+ "zero"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "MaxTrainingExamples",
+ "Type": "Int",
+ "Desc": "Max number of examples used to train the normalizer",
+ "Aliases": [
+ "maxtrain"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1000000000
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.TextFeaturizer",
+ "Desc": "A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text.",
+ "FriendlyName": "Text Transform",
+ "ShortName": "Text",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ },
+ "Desc": "New column definition (optional form: name:srcs).",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Language",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "English",
+ "French",
+ "German",
+ "Dutch",
+ "Italian",
+ "Spanish",
+ "Japanese"
+ ]
+ },
+ "Desc": "Dataset language or 'AutoDetect' to detect language per row.",
+ "Aliases": [
+ "lang"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": "English"
+ },
+ {
+ "Name": "StopWordsRemover",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "StopWordsRemover"
+ },
+ "Desc": "Stopwords remover.",
+ "Aliases": [
+ "remover"
+ ],
+ "Required": false,
+ "SortOrder": 4.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "TextCase",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Lower",
+ "Upper",
+ "None"
+ ]
+ },
+ "Desc": "Casing text using the rules of the invariant culture.",
+ "Aliases": [
+ "case"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": "Lower"
+ },
+ {
+ "Name": "KeepDiacritics",
+ "Type": "Bool",
+ "Desc": "Whether to keep diacritical marks or remove them.",
+ "Aliases": [
+ "diac"
+ ],
+ "Required": false,
+ "SortOrder": 6.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "KeepPunctuations",
+ "Type": "Bool",
+ "Desc": "Whether to keep punctuation marks or remove them.",
+ "Aliases": [
+ "punc"
+ ],
+ "Required": false,
+ "SortOrder": 7.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "KeepNumbers",
+ "Type": "Bool",
+ "Desc": "Whether to keep numbers or remove them.",
+ "Aliases": [
+ "num"
+ ],
+ "Required": false,
+ "SortOrder": 8.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "OutputTokens",
+ "Type": "Bool",
+ "Desc": "Whether to output the transformed text tokens as an additional column.",
+ "Aliases": [
+ "tokens",
+ "showtext",
+ "showTransformedText"
+ ],
+ "Required": false,
+ "SortOrder": 9.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "Dictionary",
+ "Type": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Term",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "List of terms",
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Sort",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Occurrence",
+ "Value"
+ ]
+ },
+ "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": "Occurrence"
+ },
+ {
+ "Name": "DropUnknowns",
+ "Type": "Bool",
+ "Desc": "Drop unknown terms instead of mapping them to NA term.",
+ "Aliases": [
+ "dropna"
+ ],
+ "Required": false,
+ "SortOrder": 6.0,
+ "IsNullable": false,
+ "Default": false
+ }
+ ]
+ },
+ "Desc": "A dictionary of whitelisted terms.",
+ "Aliases": [
+ "dict"
+ ],
+ "Required": false,
+ "SortOrder": 10.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "WordFeatureExtractor",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "NgramExtractor"
+ },
+ "Desc": "Ngram feature extractor to use for words (WordBag/WordHashBag).",
+ "Aliases": [
+ "wordExtractor"
+ ],
+ "Required": false,
+ "SortOrder": 11.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "NGram",
+ "Settings": {
+ "MaxNumTerms": [
+ 10000000
+ ]
+ }
+ }
+ },
+ {
+ "Name": "CharFeatureExtractor",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "NgramExtractor"
+ },
+ "Desc": "Ngram feature extractor to use for characters (WordBag/WordHashBag).",
+ "Aliases": [
+ "charExtractor"
+ ],
+ "Required": false,
+ "SortOrder": 12.0,
+ "IsNullable": false,
+ "Default": {
+ "Name": "NGram",
+ "Settings": {
+ "NgramLength": 3,
+ "AllLengths": false,
+ "MaxNumTerms": [
+ 10000000
+ ]
+ }
+ }
+ },
+ {
+ "Name": "VectorNormalizer",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "None",
+ "L1",
+ "L2",
+ "LInf"
+ ]
+ },
+ "Desc": "Normalize vectors (rows) individually by rescaling them to unit norm.",
+ "Aliases": [
+ "norm"
+ ],
+ "Required": false,
+ "SortOrder": 13.0,
+ "IsNullable": false,
+ "Default": "L2"
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.TextToKeyConverter",
+ "Desc": "Converts input values (words, numbers, etc.) to index in a dictionary.",
+ "FriendlyName": "Term Transform",
+ "ShortName": null,
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "MaxNumTerms",
+ "Type": "Int",
+ "Desc": "Maximum number of terms to keep when auto-training",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Term",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "List of terms",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Sort",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Occurrence",
+ "Value"
+ ]
+ },
+ "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "TextKeyValues",
+ "Type": "Bool",
+ "Desc": "Whether key value metadata should be text, regardless of the actual input type",
+ "Aliases": [
+ "textkv"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s) (optional form: name:src)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "MaxNumTerms",
+ "Type": "Int",
+ "Desc": "Maximum number of terms to keep per column when auto-training",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": 1000000
+ },
+ {
+ "Name": "Term",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "List of terms",
+ "Required": false,
+ "SortOrder": 106.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Sort",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Occurrence",
+ "Value"
+ ]
+ },
+ "Desc": "How items should be ordered when vectorized. By default, they will be in the order encountered. If by value items are sorted according to their default comparison, e.g., text sorting will be case sensitive (e.g., 'A' then 'Z' then 'a').",
+ "Required": false,
+ "SortOrder": 113.0,
+ "IsNullable": false,
+ "Default": "Occurrence"
+ },
+ {
+ "Name": "TextKeyValues",
+ "Type": "Bool",
+ "Desc": "Whether key value metadata should be text, regardless of the actual input type",
+ "Aliases": [
+ "textkv"
+ ],
+ "Required": false,
+ "SortOrder": 114.0,
+ "IsNullable": false,
+ "Default": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.TrainTestDatasetSplitter",
+ "Desc": "Split the dataset into train and test sets",
+ "FriendlyName": "Dataset Train-Test Split",
+ "ShortName": null,
+ "Inputs": [
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Fraction",
+ "Type": "Float",
+ "Desc": "Fraction of training data",
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": 0.8
+ },
+ {
+ "Name": "StratificationColumn",
+ "Type": "String",
+ "Desc": "Stratification column",
+ "Aliases": [
+ "strat"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "TrainData",
+ "Type": "DataView",
+ "Desc": "Training data"
+ },
+ {
+ "Name": "TestData",
+ "Type": "DataView",
+ "Desc": "Testing data"
+ }
+ ]
+ },
+ {
+ "Name": "Transforms.TreeLeafFeaturizer",
+ "Desc": "Trains a tree ensemble, or loads it from a file, then maps a numeric feature vector to three outputs: 1. A vector containing the individual tree outputs of the tree ensemble. 2. A vector indicating the leaves that the feature vector falls on in the tree ensemble. 3. A vector indicating the paths that the feature vector falls on in the tree ensemble. If a both a model file and a trainer are specified - will use the model file. If neither are specified, will train a default FastTree model. This can handle key labels by training a regression model towards their optionally permuted indices.",
+ "FriendlyName": "Tree Ensemble Featurization Transform",
+ "ShortName": "TreeFeat",
+ "Inputs": [
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "Trainer to use",
+ "Required": true,
+ "SortOrder": 10.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Suffix",
+ "Type": "String",
+ "Desc": "Output column: The suffix to append to the default column names",
+ "Aliases": [
+ "ex"
+ ],
+ "Required": false,
+ "SortOrder": 101.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "LabelPermutationSeed",
+ "Type": "Int",
+ "Desc": "If specified, determines the permutation seed for applying this featurizer to a multiclass problem.",
+ "Aliases": [
+ "lps"
+ ],
+ "Required": false,
+ "SortOrder": 102.0,
+ "IsNullable": false,
+ "Default": 0
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "IFeaturizerInput",
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.TwoHeterogeneousModelCombiner",
+ "Desc": "Combines a TransformModel and a PredictorModel into a single PredictorModel.",
+ "FriendlyName": null,
+ "ShortName": null,
+ "Inputs": [
+ {
+ "Name": "TransformModel",
+ "Type": "TransformModel",
+ "Desc": "Transform model",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "Predictor model",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "PredictorModel",
+ "Type": "PredictorModel",
+ "Desc": "Predictor model"
+ }
+ ]
+ },
+ {
+ "Name": "Transforms.WordTokenizer",
+ "Desc": "The input to this transform is text, and the output is a vector of text containing the words (tokens) in the original text. The separator is space, but can be specified as any other character (or multiple characters) if needed.",
+ "FriendlyName": "Tokenize Text Transform",
+ "ShortName": "TokenizeTextTransform",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "TermSeparators",
+ "Type": "String",
+ "Desc": "Comma separated set of term separator(s). Commonly: 'space', 'comma', 'semicolon' or other single character.",
+ "Aliases": [
+ "sep"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition(s)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "TermSeparators",
+ "Type": "String",
+ "Desc": "Comma separated set of term separator(s). Commonly: 'space', 'comma', 'semicolon' or other single character.",
+ "Aliases": [
+ "sep"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": "space"
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ }
+ ],
+ "Components": [
+ {
+ "Kind": "AutoMlEngine",
+ "Components": [
+ {
+ "Name": "Defaults",
"Desc": "AutoML engine that returns learners with default settings.",
"FriendlyName": "Defaults Engine",
"Settings": []
},
{
- "Name": "Rocket",
- "Desc": "AutoML engine that consists of distinct, hierarchical stages of operation.",
- "FriendlyName": "Rocket Engine",
+ "Name": "Rocket",
+ "Desc": "AutoML engine that consists of distinct, hierarchical stages of operation.",
+ "FriendlyName": "Rocket Engine",
+ "Settings": [
+ {
+ "Name": "TopKLearners",
+ "Type": "Int",
+ "Desc": "Number of learners to retain for second stage.",
+ "Aliases": [
+ "topk"
+ ],
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 2
+ },
+ {
+ "Name": "SecondRoundTrialsPerLearner",
+ "Type": "Int",
+ "Desc": "Number of trials for retained second stage learners.",
+ "Aliases": [
+ "stage2num"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": 5
+ },
+ {
+ "Name": "RandomInitialization",
+ "Type": "Bool",
+ "Desc": "Use random initialization only.",
+ "Aliases": [
+ "randinit"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "NumInitializationPipelines",
+ "Type": "Int",
+ "Desc": "Number of initilization pipelines, used for random initialization only.",
+ "Aliases": [
+ "numinitseeds"
+ ],
+ "Required": false,
+ "SortOrder": 4.0,
+ "IsNullable": false,
+ "Default": 20
+ }
+ ]
+ },
+ {
+ "Name": "UniformRandom",
+ "Desc": "AutoML engine using uniform random sampling.",
+ "FriendlyName": "Uniform Random Engine",
+ "Settings": []
+ }
+ ]
+ },
+ {
+ "Kind": "AutoMlStateBase",
+ "Components": [
+ {
+ "Name": "AutoMlState",
+ "Desc": "State of an AutoML search and search space.",
+ "FriendlyName": "AutoML State",
+ "Aliases": [
+ "automlst"
+ ],
+ "Settings": [
+ {
+ "Name": "Metric",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Auc",
+ "AccuracyMicro",
+ "AccuracyMacro",
+ "L2",
+ "F1",
+ "AuPrc",
+ "TopKAccuracy",
+ "Rms",
+ "LossFn",
+ "RSquared",
+ "LogLoss",
+ "LogLossReduction",
+ "Ndcg",
+ "Dcg",
+ "PositivePrecision",
+ "PositiveRecall",
+ "NegativePrecision",
+ "NegativeRecall",
+ "DrAtK",
+ "DrAtPFpr",
+ "DrAtNumPos",
+ "NumAnomalies",
+ "ThreshAtK",
+ "ThreshAtP",
+ "ThreshAtNumPos",
+ "Nmi",
+ "AvgMinScore",
+ "Dbi"
+ ]
+ },
+ "Desc": "Supported metric for evaluator.",
+ "Aliases": [
+ "metric"
+ ],
+ "Required": true,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": "Auc"
+ },
+ {
+ "Name": "Engine",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "AutoMlEngine"
+ },
+ "Desc": "AutoML engine (pipeline optimizer) that generates next candidates.",
+ "Aliases": [
+ "engine"
+ ],
+ "Required": true,
+ "SortOrder": 150.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "TrainerKind",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "SignatureBinaryClassifierTrainer",
+ "SignatureMultiClassClassifierTrainer",
+ "SignatureRankerTrainer",
+ "SignatureRegressorTrainer",
+ "SignatureMultiOutputRegressorTrainer",
+ "SignatureAnomalyDetectorTrainer",
+ "SignatureClusteringTrainer"
+ ]
+ },
+ "Desc": "Kind of trainer for task, such as binary classification trainer, multiclass trainer, etc.",
+ "Aliases": [
+ "tk"
+ ],
+ "Required": true,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": "SignatureBinaryClassifierTrainer"
+ },
+ {
+ "Name": "TerminatorArgs",
+ "Type": {
+ "Kind": "Component",
+ "ComponentKind": "SearchTerminator"
+ },
+ "Desc": "Arguments for creating terminator, which determines when to stop search.",
+ "Aliases": [
+ "term"
+ ],
+ "Required": true,
+ "SortOrder": 150.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "RequestedLearners",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Learner set to sweep over (if available).",
+ "Aliases": [
+ "learners"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "Kind": "BoosterParameterFunction",
+ "Components": [
+ {
+ "Name": "dart",
+ "Desc": "Dropouts meet Multiple Additive Regresion Trees. See https://arxiv.org/abs/1505.01866",
+ "FriendlyName": "Tree Dropout Tree Booster",
+ "Settings": [
+ {
+ "Name": "DropRate",
+ "Type": "Float",
+ "Desc": "Drop ratio for trees. Range:(0,1).",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.1,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
+ },
+ {
+ "Name": "MaxDrop",
+ "Type": "Int",
+ "Desc": "Max number of dropped tree in a boosting round.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1,
+ "Range": {
+ "Inf": 0,
+ "Max": 2147483647
+ }
+ },
+ {
+ "Name": "SkipDrop",
+ "Type": "Float",
+ "Desc": "Probability for not perform dropping in a boosting round.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.5,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
+ },
+ {
+ "Name": "XgboostDartMode",
+ "Type": "Bool",
+ "Desc": "True will enable xgboost dart mode.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "UniformDrop",
+ "Type": "Bool",
+ "Desc": "True will enable uniform drop.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "UnbalancedSets",
+ "Type": "Bool",
+ "Desc": "Use for binary classification when classes are not balanced.",
+ "Aliases": [
+ "us"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "MinSplitGain",
+ "Type": "Float",
+ "Desc": "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "Range": {
+ "Min": 0.0
+ }
+ },
+ {
+ "Name": "MaxDepth",
+ "Type": "Int",
+ "Desc": "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0,
+ "Range": {
+ "Max": 2147483647,
+ "Min": 0
+ }
+ },
+ {
+ "Name": "MinChildWeight",
+ "Type": "Float",
+ "Desc": "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.1,
+ "Range": {
+ "Min": 0.0
+ }
+ },
+ {
+ "Name": "SubsampleFreq",
+ "Type": "Int",
+ "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0,
+ "Range": {
+ "Max": 2147483647,
+ "Min": 0
+ }
+ },
+ {
+ "Name": "Subsample",
+ "Type": "Float",
+ "Desc": "Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1].",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
+ },
+ {
+ "Name": "FeatureFraction",
+ "Type": "Float",
+ "Desc": "Subsample ratio of columns when constructing each tree. Range: (0,1].",
+ "Aliases": [
+ "ff"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
+ },
+ {
+ "Name": "RegLambda",
+ "Type": "Float",
+ "Desc": "L2 regularization term on weights, increasing this value will make model more conservative.",
+ "Aliases": [
+ "l2"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.01,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.5,
+ 1.0
+ ]
+ }
+ },
+ {
+ "Name": "RegAlpha",
+ "Type": "Float",
+ "Desc": "L1 regularization term on weights, increase this value will make model more conservative.",
+ "Aliases": [
+ "l1"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.5,
+ 1.0
+ ]
+ }
+ },
+ {
+ "Name": "ScalePosWeight",
+ "Type": "Float",
+ "Desc": "Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases).",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0
+ }
+ ]
+ },
+ {
+ "Name": "gbdt",
+ "Desc": "Traditional Gradient Boosting Decision Tree.",
+ "FriendlyName": "Tree Booster",
+ "Settings": [
+ {
+ "Name": "UnbalancedSets",
+ "Type": "Bool",
+ "Desc": "Use for binary classification when classes are not balanced.",
+ "Aliases": [
+ "us"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "MinSplitGain",
+ "Type": "Float",
+ "Desc": "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "Range": {
+ "Min": 0.0
+ }
+ },
+ {
+ "Name": "MaxDepth",
+ "Type": "Int",
+ "Desc": "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0,
+ "Range": {
+ "Max": 2147483647,
+ "Min": 0
+ }
+ },
+ {
+ "Name": "MinChildWeight",
+ "Type": "Float",
+ "Desc": "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.1,
+ "Range": {
+ "Min": 0.0
+ }
+ },
+ {
+ "Name": "SubsampleFreq",
+ "Type": "Int",
+ "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0,
+ "Range": {
+ "Max": 2147483647,
+ "Min": 0
+ }
+ },
+ {
+ "Name": "Subsample",
+ "Type": "Float",
+ "Desc": "Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1].",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
+ },
+ {
+ "Name": "FeatureFraction",
+ "Type": "Float",
+ "Desc": "Subsample ratio of columns when constructing each tree. Range: (0,1].",
+ "Aliases": [
+ "ff"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
+ },
+ {
+ "Name": "RegLambda",
+ "Type": "Float",
+ "Desc": "L2 regularization term on weights, increasing this value will make model more conservative.",
+ "Aliases": [
+ "l2"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.01,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.5,
+ 1.0
+ ]
+ }
+ },
+ {
+ "Name": "RegAlpha",
+ "Type": "Float",
+ "Desc": "L1 regularization term on weights, increase this value will make model more conservative.",
+ "Aliases": [
+ "l1"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.5,
+ 1.0
+ ]
+ }
+ },
+ {
+ "Name": "ScalePosWeight",
+ "Type": "Float",
+ "Desc": "Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases).",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0
+ }
+ ]
+ },
+ {
+ "Name": "goss",
+ "Desc": "Gradient-based One-Side Sampling.",
+ "FriendlyName": "Gradient-based One-Size Sampling",
"Settings": [
{
- "Name": "TopKLearners",
- "Type": "Int",
- "Desc": "Number of learners to retain for second stage.",
- "Aliases": [
- "topk"
- ],
+ "Name": "TopRate",
+ "Type": "Float",
+ "Desc": "Retain ratio for large gradient instances.",
"Required": false,
- "SortOrder": 1.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 2
+ "Default": 0.2,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
},
{
- "Name": "SecondRoundTrialsPerLearner",
- "Type": "Int",
- "Desc": "Number of trials for retained second stage learners.",
- "Aliases": [
- "stage2num"
- ],
+ "Name": "OtherRate",
+ "Type": "Float",
+ "Desc": "Retain ratio for small gradient instances.",
"Required": false,
- "SortOrder": 2.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 5
+ "Default": 0.1,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
},
{
- "Name": "RandomInitialization",
+ "Name": "UnbalancedSets",
"Type": "Bool",
- "Desc": "Use random initialization only.",
+ "Desc": "Use for binary classification when classes are not balanced.",
"Aliases": [
- "randinit"
+ "us"
],
"Required": false,
- "SortOrder": 3.0,
+ "SortOrder": 150.0,
"IsNullable": false,
"Default": false
},
{
- "Name": "NumInitializationPipelines",
+ "Name": "MinSplitGain",
+ "Type": "Float",
+ "Desc": "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 0.0,
+ "Range": {
+ "Min": 0.0
+ }
+ },
+ {
+ "Name": "MaxDepth",
"Type": "Int",
- "Desc": "Number of initilization pipelines, used for random initialization only.",
- "Aliases": [
- "numinitseeds"
- ],
+ "Desc": "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.",
"Required": false,
- "SortOrder": 4.0,
+ "SortOrder": 150.0,
"IsNullable": false,
- "Default": 20
- }
- ]
- },
- {
- "Name": "UniformRandom",
- "Desc": "AutoML engine using uniform random sampling.",
- "FriendlyName": "Uniform Random Engine",
- "Settings": []
- }
- ]
- },
- {
- "Kind": "AutoMlStateBase",
- "Components": [
- {
- "Name": "AutoMlState",
- "Desc": "State of an AutoML search and search space.",
- "FriendlyName": "AutoML State",
- "Aliases": [
- "automlst"
- ],
- "Settings": [
+ "Default": 0,
+ "Range": {
+ "Max": 2147483647,
+ "Min": 0
+ }
+ },
{
- "Name": "Metric",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "Auc",
- "AccuracyMicro",
- "AccuracyMacro",
- "L2",
- "F1",
- "AuPrc",
- "TopKAccuracy",
- "Rms",
- "LossFn",
- "RSquared",
- "LogLoss",
- "LogLossReduction",
- "Ndcg",
- "Dcg",
- "PositivePrecision",
- "PositiveRecall",
- "NegativePrecision",
- "NegativeRecall",
- "DrAtK",
- "DrAtPFpr",
- "DrAtNumPos",
- "NumAnomalies",
- "ThreshAtK",
- "ThreshAtP",
- "ThreshAtNumPos",
- "Nmi",
- "AvgMinScore",
- "Dbi"
- ]
- },
- "Desc": "Supported metric for evaluator.",
- "Aliases": [
- "metric"
- ],
- "Required": true,
+ "Name": "MinChildWeight",
+ "Type": "Float",
+ "Desc": "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.",
+ "Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": "Auc"
+ "Default": 0.1,
+ "Range": {
+ "Min": 0.0
+ }
},
{
- "Name": "Engine",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "AutoMlEngine"
- },
- "Desc": "AutoML engine (pipeline optimizer) that generates next candidates.",
- "Aliases": [
- "engine"
- ],
- "Required": true,
+ "Name": "SubsampleFreq",
+ "Type": "Int",
+ "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.",
+ "Required": false,
"SortOrder": 150.0,
- "IsNullable": false
+ "IsNullable": false,
+ "Default": 0,
+ "Range": {
+ "Max": 2147483647,
+ "Min": 0
+ }
},
{
- "Name": "TrainerKind",
- "Type": {
- "Kind": "Enum",
- "Values": [
- "SignatureBinaryClassifierTrainer",
- "SignatureMultiClassClassifierTrainer",
- "SignatureRankerTrainer",
- "SignatureRegressorTrainer",
- "SignatureMultiOutputRegressorTrainer",
- "SignatureAnomalyDetectorTrainer",
- "SignatureClusteringTrainer"
- ]
- },
- "Desc": "Kind of trainer for task, such as binary classification trainer, multiclass trainer, etc.",
+ "Name": "Subsample",
+ "Type": "Float",
+ "Desc": "Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1].",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
+ },
+ {
+ "Name": "FeatureFraction",
+ "Type": "Float",
+ "Desc": "Subsample ratio of columns when constructing each tree. Range: (0,1].",
"Aliases": [
- "tk"
+ "ff"
],
- "Required": true,
+ "Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": "SignatureBinaryClassifierTrainer"
+ "Default": 1.0,
+ "Range": {
+ "Inf": 0.0,
+ "Max": 1.0
+ }
},
{
- "Name": "TerminatorArgs",
- "Type": {
- "Kind": "Component",
- "ComponentKind": "SearchTerminator"
- },
- "Desc": "Arguments for creating terminator, which determines when to stop search.",
+ "Name": "RegLambda",
+ "Type": "Float",
+ "Desc": "L2 regularization term on weights, increasing this value will make model more conservative.",
"Aliases": [
- "term"
+ "l2"
],
- "Required": true,
+ "Required": false,
"SortOrder": 150.0,
- "IsNullable": false
+ "IsNullable": false,
+ "Default": 0.01,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.5,
+ 1.0
+ ]
+ }
},
{
- "Name": "RequestedLearners",
- "Type": {
- "Kind": "Array",
- "ItemType": "String"
- },
- "Desc": "Learner set to sweep over (if available).",
+ "Name": "RegAlpha",
+ "Type": "Float",
+ "Desc": "L1 regularization term on weights, increase this value will make model more conservative.",
"Aliases": [
- "learners"
+ "l1"
],
"Required": false,
"SortOrder": 150.0,
"IsNullable": false,
- "Default": null
+ "Default": 0.0,
+ "Range": {
+ "Min": 0.0
+ },
+ "SweepRange": {
+ "RangeType": "Discrete",
+ "Values": [
+ 0.0,
+ 0.5,
+ 1.0
+ ]
+ }
+ },
+ {
+ "Name": "ScalePosWeight",
+ "Type": "Float",
+ "Desc": "Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases).",
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": 1.0
}
]
}
@@ -24490,6 +26969,17 @@
}
]
},
+ {
+ "Kind": "ParallelLightGBM",
+ "Components": [
+ {
+ "Name": "Single",
+ "Desc": "Single node machine learning process.",
+ "FriendlyName": "Single",
+ "Settings": []
+ }
+ ]
+ },
{
"Kind": "ParallelTraining",
"Components": [
diff --git a/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.key.txt b/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.key.txt
index 9b303c86aa..e756fcd7ba 100644
--- a/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.key.txt
+++ b/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.key.txt
@@ -1,151 +1,151 @@
Instance Label Assigned Log-loss #1 Score #2 Score #3 Score #1 Class #2 Class #3 Class
-5 0 1 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
-6 0 1 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
-8 0 1 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
-9 0 1 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
-10 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-11 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-18 0 1 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
-20 0 1 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
-21 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-25 0 1 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
-28 0 1 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
-31 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-32 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-35 0 1 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
-37 0 1 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
-40 0 1 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
-41 0 1 0.17550956509619134 0.8390294 0.09255582 0.0684148148 0 1 2
-44 0 1 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
-45 0 1 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
-46 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-48 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-50 1 2 0.48031316690941278 0.61858964 0.2931589 0.08825144 1 2 0
-51 1 2 0.18552267596609509 0.83067 0.09896274 0.07036729 1 0 2
-52 1 1.8686139310181762 0.745523036 0.154337436 0.1001395 2 1 0
-54 1 2 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
-56 1 2 0.58631345356437459 0.5563746 0.357763946 0.0858614147 1 2 0
-60 1 2 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
-63 1 2 0.442888085987238 0.6421791 0.304338247 0.0534826852 1 2 0
-64 1 2 0.14288655580917453 0.8668524 0.06818299 0.06496459 1 2 0
-66 1 2 0.13927185898584951 0.8699915 0.06910439 0.060904108 1 2 0
-68 1 2 0.1475586146516118 0.862811863 0.08110718 0.0560809337 1 2 0
-69 1 2 0.13690026149264065 0.8720572 0.07104707 0.056895718 1 2 0
-70 1 2 0.58631345356437459 0.5563746 0.357763946 0.0858614147 1 2 0
-71 1 2 0.15194427686527462 0.859036148 0.07716796 0.06379592 1 2 0
-72 1 1.4639003870351257 0.712372541 0.231332228 0.0562952235 2 1 0
-73 1 2 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
-74 1 2 0.13796619253742226 0.871128142 0.06828712 0.0605847277 1 2 0
-76 1 2 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
-77 1 2.0734221020246566 0.815010846 0.1257547 0.05923444 2 1 0
-79 1 2 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
-82 1 2 0.13641919697507263 0.8724768 0.07081407 0.0567091331 1 2 0
-88 1 2 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
-90 1 2 0.1425659799992052 0.867130339 0.0762954 0.0565742739 1 2 0
-91 1 2 0.442888085987238 0.6421791 0.304338247 0.0534826852 1 2 0
-92 1 2 0.13641919697507263 0.8724768 0.07081407 0.0567091331 1 2 0
-93 1 2 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
-95 1 2 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
-96 1 2 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
-97 1 2 0.13796619253742226 0.871128142 0.06828712 0.0605847277 1 2 0
-98 1 2 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
-99 1 2 0.13879074815854195 0.870410144 0.06865643 0.06093342 1 2 0
-100 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-102 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
-104 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
-105 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
-106 2 2 2.3434392875794119 0.8476237 0.09599691 0.05637939 1 2 0
-108 2 0.22657594234978759 0.7972588 0.1479769 0.0547643229 2 1 0
-109 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-111 2 0.177848875720656 0.8370689 0.108788572 0.0541424938 2 1 0
-112 2 0.13281455464792449 0.875627458 0.06831084 0.0560617261 2 1 0
-113 2 0.19621674447868781 0.8218341 0.12273933 0.05542656 2 1 0
-115 2 0.17200937673419167 0.8419713 0.09234353 0.0656852052 2 0 1
-117 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-120 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-121 2 0.16411842591849909 0.8486415 0.09412396 0.05723452 2 1 0
-122 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
-123 2 0.28256671512014453 0.753846347 0.189867079 0.05628657 2 1 0
-125 2 0.20564890133993838 0.814118862 0.09413585 0.09174529 2 0 1
-128 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
-129 2 0.16567795334648433 0.847319067 0.09548671 0.057194218 2 1 0
-131 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-132 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
-133 2 0.29113037794713281 0.7474182 0.191831991 0.0607497729 2 1 0
-137 2 0.22116862531406531 0.8015815 0.104995139 0.09342336 2 1 0
-138 2 2 0.99148905684440769 0.5769956 0.3710238 0.05198058 1 2 0
-141 2 0.18520119392899573 0.8309371 0.09454043 0.07452248 2 0 1
-144 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-145 2 0.14505497806361808 0.864974737 0.07757514 0.0574501 2 1 0
-147 2 0.14505497806361808 0.864974737 0.07757514 0.0574501 2 1 0
-0 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-1 0 1 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
-2 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-3 0 1 0.13111980383336003 0.8771127 0.06430127 0.05858605 0 1 2
-4 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-7 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-12 0 1 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
-13 0 1 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
-14 0 1 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
-15 0 1 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
-16 0 1 0.12377570311405263 0.883578 0.0616899766 0.0547319949 0 1 2
-17 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-19 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-22 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-23 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-24 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-26 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-27 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-29 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-30 0 1 0.13466431803417811 0.874009252 0.0676120147 0.058378756 0 1 2
-33 0 1 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
-34 0 1 0.13111980383336003 0.8771127 0.06430127 0.05858605 0 1 2
-36 0 1 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
-38 0 1 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
-39 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-42 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-43 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-47 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-49 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-53 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-55 1 2 0.13788477324168841 0.8711991 0.0693822056 0.0594187379 1 2 0
-57 1 2 0.14180960025970848 0.867786467 0.06764004 0.06457352 1 2 0
-58 1 2 0.14599016737971268 0.8641662 0.07662087 0.0592129268 1 2 0
-59 1 2 0.15802382006343754 0.853829443 0.08263559 0.06353495 1 2 0
-61 1 2 0.24588101940191279 0.782015264 0.155458942 0.06252578 1 2 0
-62 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-65 1 2 0.13132980842568853 0.8769285 0.063261956 0.05980951 1 2 0
-67 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-75 1 2 0.16325813194034094 0.8493719 0.09269803 0.0579300523 1 2 0
-78 1 2 0.25856064866775763 0.7721622 0.1669285 0.0609093271 1 2 0
-80 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-81 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-83 1 0.93589296161344149 0.5591961 0.392235458 0.04856844 2 1 0
-84 1 2 0.30739855328930082 0.735357463 0.193893984 0.07074856 1 2 0
-85 1 2 0.36387320359326997 0.6949793 0.250199676 0.05482102 1 2 0
-86 1 2 0.22482401043721545 0.798656762 0.137458712 0.06388455 1 2 0
-87 1 2 0.15038993216531976 0.8603724 0.08044732 0.05918027 1 2 0
-89 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-94 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-101 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
-103 2 0.1391351873710466 0.8701104 0.06958967 0.0602999441 2 1 0
-107 2 0.14333558969752591 0.866463244 0.07176019 0.0617765374 2 1 0
-110 2 0.23871432870005888 0.787639856 0.156446457 0.0559136942 2 1 0
-114 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
-116 2 0.1391351873710466 0.8701104 0.06958967 0.0602999441 2 1 0
-118 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
-119 2 2 0.82777746896385962 0.500950456 0.4370195 0.0620300435 1 2 0
-124 2 0.15317496105764933 0.8579796 0.08057635 0.0614440367 2 1 0
-126 2 0.41542564005029348 0.6600593 0.280402571 0.0595381558 2 1 0
-127 2 0.39337756049800737 0.674773932 0.268265069 0.05696099 2 1 0
-130 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
-134 2 2 0.85550144650046023 0.5024456 0.42507 0.07248444 1 2 0
-135 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
-136 2 0.1485323137279633 0.861972153 0.0780924 0.05993546 2 1 0
-139 2 0.16469395959924102 0.848153234 0.09065638 0.0611904152 2 1 0
-140 2 0.1540180682859322 0.857256532 0.08267425 0.0600692481 2 1 0
-142 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
-143 2 0.15317496105764933 0.8579796 0.08057635 0.0614440367 2 1 0
-146 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
-148 2 0.15751162588437911 0.8542669 0.08508965 0.0606434755 2 1 0
-149 2 0.22273115731729765 0.80033 0.142754182 0.05691586 2 1 0
+5 0 0 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
+6 0 0 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
+8 0 0 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
+9 0 0 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
+10 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+11 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+18 0 0 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
+20 0 0 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
+21 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+25 0 0 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
+28 0 0 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
+31 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+32 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+35 0 0 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
+37 0 0 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
+40 0 0 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
+41 0 0 0.17550956509619134 0.8390294 0.09255582 0.0684148148 0 1 2
+44 0 0 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
+45 0 0 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
+46 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+48 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+50 1 1 0.48031316690941278 0.61858964 0.2931589 0.08825144 1 2 0
+51 1 1 0.18552267596609509 0.83067 0.09896274 0.07036729 1 0 2
+52 1 2 1.8686139310181762 0.745523036 0.154337436 0.1001395 2 1 0
+54 1 1 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
+56 1 1 0.58631345356437459 0.5563746 0.357763946 0.0858614147 1 2 0
+60 1 1 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
+63 1 1 0.442888085987238 0.6421791 0.304338247 0.0534826852 1 2 0
+64 1 1 0.14288655580917453 0.8668524 0.06818299 0.06496459 1 2 0
+66 1 1 0.13927185898584951 0.8699915 0.06910439 0.060904108 1 2 0
+68 1 1 0.1475586146516118 0.862811863 0.08110718 0.0560809337 1 2 0
+69 1 1 0.13690026149264065 0.8720572 0.07104707 0.056895718 1 2 0
+70 1 1 0.58631345356437459 0.5563746 0.357763946 0.0858614147 1 2 0
+71 1 1 0.15194427686527462 0.859036148 0.07716796 0.06379592 1 2 0
+72 1 2 1.4639003870351257 0.712372541 0.231332228 0.0562952235 2 1 0
+73 1 1 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
+74 1 1 0.13796619253742226 0.871128142 0.06828712 0.0605847277 1 2 0
+76 1 1 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
+77 1 2 2.0734221020246566 0.815010846 0.1257547 0.05923444 2 1 0
+79 1 1 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
+82 1 1 0.13641919697507263 0.8724768 0.07081407 0.0567091331 1 2 0
+88 1 1 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
+90 1 1 0.1425659799992052 0.867130339 0.0762954 0.0565742739 1 2 0
+91 1 1 0.442888085987238 0.6421791 0.304338247 0.0534826852 1 2 0
+92 1 1 0.13641919697507263 0.8724768 0.07081407 0.0567091331 1 2 0
+93 1 1 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
+95 1 1 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
+96 1 1 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
+97 1 1 0.13796619253742226 0.871128142 0.06828712 0.0605847277 1 2 0
+98 1 1 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
+99 1 1 0.13879074815854195 0.870410144 0.06865643 0.06093342 1 2 0
+100 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+102 2 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
+104 2 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
+105 2 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
+106 2 1 2.3434392875794119 0.8476237 0.09599691 0.05637939 1 2 0
+108 2 2 0.22657594234978759 0.7972588 0.1479769 0.0547643229 2 1 0
+109 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+111 2 2 0.177848875720656 0.8370689 0.108788572 0.0541424938 2 1 0
+112 2 2 0.13281455464792449 0.875627458 0.06831084 0.0560617261 2 1 0
+113 2 2 0.19621674447868781 0.8218341 0.12273933 0.05542656 2 1 0
+115 2 2 0.17200937673419167 0.8419713 0.09234353 0.0656852052 2 0 1
+117 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+120 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+121 2 2 0.16411842591849909 0.8486415 0.09412396 0.05723452 2 1 0
+122 2 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
+123 2 2 0.28256671512014453 0.753846347 0.189867079 0.05628657 2 1 0
+125 2 2 0.20564890133993838 0.814118862 0.09413585 0.09174529 2 0 1
+128 2 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
+129 2 2 0.16567795334648433 0.847319067 0.09548671 0.057194218 2 1 0
+131 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+132 2 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
+133 2 2 0.29113037794713281 0.7474182 0.191831991 0.0607497729 2 1 0
+137 2 2 0.22116862531406531 0.8015815 0.104995139 0.09342336 2 1 0
+138 2 1 0.99148905684440769 0.5769956 0.3710238 0.05198058 1 2 0
+141 2 2 0.18520119392899573 0.8309371 0.09454043 0.07452248 2 0 1
+144 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+145 2 2 0.14505497806361808 0.864974737 0.07757514 0.0574501 2 1 0
+147 2 2 0.14505497806361808 0.864974737 0.07757514 0.0574501 2 1 0
+0 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+1 0 0 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
+2 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+3 0 0 0.13111980383336003 0.8771127 0.06430127 0.05858605 0 1 2
+4 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+7 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+12 0 0 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
+13 0 0 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
+14 0 0 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
+15 0 0 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
+16 0 0 0.12377570311405263 0.883578 0.0616899766 0.0547319949 0 1 2
+17 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+19 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+22 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+23 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+24 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+26 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+27 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+29 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+30 0 0 0.13466431803417811 0.874009252 0.0676120147 0.058378756 0 1 2
+33 0 0 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
+34 0 0 0.13111980383336003 0.8771127 0.06430127 0.05858605 0 1 2
+36 0 0 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
+38 0 0 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
+39 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+42 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+43 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+47 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+49 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+53 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+55 1 1 0.13788477324168841 0.8711991 0.0693822056 0.0594187379 1 2 0
+57 1 1 0.14180960025970848 0.867786467 0.06764004 0.06457352 1 2 0
+58 1 1 0.14599016737971268 0.8641662 0.07662087 0.0592129268 1 2 0
+59 1 1 0.15802382006343754 0.853829443 0.08263559 0.06353495 1 2 0
+61 1 1 0.24588101940191279 0.782015264 0.155458942 0.06252578 1 2 0
+62 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+65 1 1 0.13132980842568853 0.8769285 0.063261956 0.05980951 1 2 0
+67 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+75 1 1 0.16325813194034094 0.8493719 0.09269803 0.0579300523 1 2 0
+78 1 1 0.25856064866775763 0.7721622 0.1669285 0.0609093271 1 2 0
+80 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+81 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+83 1 2 0.93589296161344149 0.5591961 0.392235458 0.04856844 2 1 0
+84 1 1 0.30739855328930082 0.735357463 0.193893984 0.07074856 1 2 0
+85 1 1 0.36387320359326997 0.6949793 0.250199676 0.05482102 1 2 0
+86 1 1 0.22482401043721545 0.798656762 0.137458712 0.06388455 1 2 0
+87 1 1 0.15038993216531976 0.8603724 0.08044732 0.05918027 1 2 0
+89 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+94 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+101 2 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
+103 2 2 0.1391351873710466 0.8701104 0.06958967 0.0602999441 2 1 0
+107 2 2 0.14333558969752591 0.866463244 0.07176019 0.0617765374 2 1 0
+110 2 2 0.23871432870005888 0.787639856 0.156446457 0.0559136942 2 1 0
+114 2 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
+116 2 2 0.1391351873710466 0.8701104 0.06958967 0.0602999441 2 1 0
+118 2 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
+119 2 1 0.82777746896385962 0.500950456 0.4370195 0.0620300435 1 2 0
+124 2 2 0.15317496105764933 0.8579796 0.08057635 0.0614440367 2 1 0
+126 2 2 0.41542564005029348 0.6600593 0.280402571 0.0595381558 2 1 0
+127 2 2 0.39337756049800737 0.674773932 0.268265069 0.05696099 2 1 0
+130 2 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
+134 2 1 0.85550144650046023 0.5024456 0.42507 0.07248444 1 2 0
+135 2 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
+136 2 2 0.1485323137279633 0.861972153 0.0780924 0.05993546 2 1 0
+139 2 2 0.16469395959924102 0.848153234 0.09065638 0.0611904152 2 1 0
+140 2 2 0.1540180682859322 0.857256532 0.08267425 0.0600692481 2 1 0
+142 2 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
+143 2 2 0.15317496105764933 0.8579796 0.08057635 0.0614440367 2 1 0
+146 2 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
+148 2 2 0.15751162588437911 0.8542669 0.08508965 0.0606434755 2 1 0
+149 2 2 0.22273115731729765 0.80033 0.142754182 0.05691586 2 1 0
diff --git a/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.keyU404.txt b/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.keyU404.txt
index cb0d9331ce..c3fc93b75b 100644
--- a/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.keyU404.txt
+++ b/test/BaselineOutput/SingleDebug/LightGBMMC/LightGBMMC-CV-iris.keyU404.txt
@@ -1,151 +1,151 @@
Instance Label Assigned Log-loss #1 Score #2 Score #3 Score #1 Class #2 Class #3 Class
-5 0 1 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
-6 0 1 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
-8 0 1 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
-9 0 1 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
-10 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-11 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-18 0 1 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
-20 0 1 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
-21 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-25 0 1 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
-28 0 1 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
-31 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-32 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-35 0 1 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
-37 0 1 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
-40 0 1 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
-41 0 1 0.17550963613619172 0.8390293 0.09255581 0.06841481 0 1 2
-44 0 1 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
-45 0 1 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
-46 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-48 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-50 1 2 0.48031316690941278 0.61858964 0.293158859 0.08825144 1 2 0
-51 1 2 0.18552274772100014 0.83066994 0.09896273 0.0703672841 1 0 2
-52 1 3 1.8686139310181762 0.745523036 0.154337436 0.1001395 2 1 0
-54 1 2 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
-56 1 2 0.58631345356437459 0.5563746 0.357763946 0.08586141 1 2 0
-60 1 2 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
-63 1 2 0.44288817880347908 0.642179 0.304338247 0.05348268 1 2 0
-64 1 2 0.14288662456903103 0.866852343 0.06818299 0.06496459 1 2 0
-66 1 2 0.13927192749760864 0.8699914 0.06910439 0.0609041043 1 2 0
-68 1 2 0.1475586146516118 0.862811863 0.08110717 0.05608093 1 2 0
-69 1 2 0.13690032984210998 0.87205714 0.07104707 0.0568957143 1 2 0
-70 1 2 0.58631345356437459 0.5563746 0.357763946 0.08586141 1 2 0
-71 1 2 0.15194434625076791 0.8590361 0.07716796 0.06379592 1 2 0
-72 1 3 1.4639004514496772 0.7123725 0.231332213 0.05629522 2 1 0
-73 1 2 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
-74 1 2 0.13796626095978629 0.8711281 0.06828712 0.0605847239 1 2 0
-76 1 2 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
-77 1 3 2.0734221020246566 0.8150108 0.1257547 0.0592344366 2 1 0
-79 1 2 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
-82 1 2 0.13641926529166934 0.872476757 0.07081407 0.05670913 1 2 0
-88 1 2 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
-90 1 2 0.1425660487370225 0.8671303 0.07629539 0.05657427 1 2 0
-91 1 2 0.44288817880347908 0.642179 0.304338247 0.05348268 1 2 0
-92 1 2 0.13641926529166934 0.872476757 0.07081407 0.05670913 1 2 0
-93 1 2 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
-95 1 2 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
-96 1 2 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
-97 1 2 0.13796626095978629 0.8711281 0.06828712 0.0605847239 1 2 0
-98 1 2 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
-99 1 2 0.13879081663734727 0.8704101 0.06865642 0.0609334148 1 2 0
-100 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-102 2 3 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
-104 2 3 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
-105 2 3 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
-106 2 2 2.3434393651921286 0.847623646 0.0959969 0.0563793853 1 2 0
-108 2 3 0.22657601711176881 0.797258735 0.1479769 0.05476432 2 1 0
-109 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-111 2 3 0.17784894692703557 0.837068856 0.108788565 0.0541424938 2 1 0
-112 2 3 0.13281462271870764 0.8756274 0.0683108345 0.0560617223 2 1 0
-113 2 3 0.19621674447868781 0.8218341 0.122739322 0.0554265566 2 1 0
-115 2 3 0.17200944752597333 0.8419712 0.0923435241 0.0656852 2 0 1
-117 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-120 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-121 2 3 0.16411849615386448 0.848641455 0.09412395 0.0572345145 2 1 0
-122 2 3 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
-123 2 3 0.28256679418751329 0.7538463 0.189867079 0.056286566 2 1 0
-125 2 3 0.20564897455362954 0.8141188 0.09413585 0.09174528 2 0 1
-128 2 3 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
-129 2 3 0.16567802369146914 0.847319 0.0954867 0.0571942143 2 1 0
-131 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-132 2 3 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
-133 2 3 0.29113045769451545 0.747418165 0.191831976 0.06074977 2 1 0
-137 2 3 0.22116869967287578 0.801581442 0.104995131 0.09342335 2 1 0
-138 2 2 0.99148913716896714 0.5769956 0.371023774 0.0519805774 1 2 0
-141 2 3 0.18520126566083658 0.830937 0.09454043 0.07452247 2 0 1
-144 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-145 2 3 0.14505504697273677 0.8649747 0.07757514 0.0574500971 2 1 0
-147 2 3 0.14505504697273677 0.8649747 0.07757514 0.0574500971 2 1 0
-0 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-1 0 1 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
-2 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-3 0 1 0.13111987178887785 0.8771126 0.06430127 0.0585860461 0 1 2
-4 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-7 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-12 0 1 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
-13 0 1 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
-14 0 1 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
-15 0 1 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
-16 0 1 0.12377570311405263 0.883578 0.0616899729 0.05473199 0 1 2
-17 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-19 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-22 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-23 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-24 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-26 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-27 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-29 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-30 0 1 0.13466438623099261 0.8740092 0.06761201 0.0583787523 0 1 2
-33 0 1 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
-34 0 1 0.13111987178887785 0.8771126 0.06430127 0.0585860461 0 1 2
-36 0 1 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
-38 0 1 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
-39 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-42 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-43 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-47 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-49 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-53 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-55 1 2 0.13788484165848175 0.871199 0.0693822056 0.0594187342 1 2 0
-57 1 2 0.14180966894555355 0.8677864 0.06764004 0.06457351 1 2 0
-58 1 2 0.1459902363533046 0.864166141 0.07662086 0.059212923 1 2 0
-59 1 2 0.1580238898720478 0.8538294 0.08263559 0.0635349452 1 2 0
-61 1 2 0.24588109562120131 0.7820152 0.155458942 0.06252578 1 2 0
-62 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-65 1 2 0.13132980842568853 0.8769285 0.063261956 0.0598095059 1 2 0
-67 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-75 1 2 0.16325820211530925 0.84937185 0.09269803 0.05793005 1 2 0
-78 1 2 0.25856072585963152 0.772162139 0.166928485 0.0609093234 1 2 0
-80 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-81 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-83 1 3 0.93589303759413867 0.559196055 0.392235428 0.04856844 2 1 0
-84 1 2 0.30739863434463804 0.7353574 0.193893969 0.07074856 1 2 0
-85 1 2 0.36387328935790564 0.69497925 0.250199646 0.05482102 1 2 0
-86 1 2 0.2248240850683334 0.7986567 0.1374587 0.06388454 1 2 0
-87 1 2 0.15039000144304782 0.860372365 0.0804473162 0.0591802672 1 2 0
-89 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-94 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-101 2 3 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
-103 2 3 0.13913525587344278 0.870110333 0.06958966 0.06029994 2 1 0
-107 2 3 0.14333565848826485 0.8664632 0.07176019 0.0617765337 2 1 0
-110 2 3 0.23871440437506003 0.7876398 0.156446442 0.0559136942 2 1 0
-114 2 3 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
-116 2 3 0.13913525587344278 0.870110333 0.06958966 0.06029994 2 1 0
-118 2 3 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
-119 2 2 0.82777753715835345 0.5009504 0.437019467 0.06203004 1 2 0
-124 2 3 0.1531750305285868 0.857979536 0.0805763453 0.06144403 2 1 0
-126 2 3 0.41542573035225555 0.6600592 0.280402571 0.0595381558 2 1 0
-127 2 3 0.39337764883077281 0.6747739 0.268265069 0.0569609851 2 1 0
-130 2 3 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
-134 2 2 0.85550151661202845 0.5024455 0.425069958 0.07248443 1 2 0
-135 2 3 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
-136 2 3 0.14853238287711923 0.8619721 0.0780924 0.05993546 2 1 0
-139 2 3 0.16469402987504086 0.8481532 0.09065638 0.06119041 2 1 0
-140 2 3 0.15401813781546583 0.8572565 0.08267424 0.0600692444 2 1 0
-142 2 3 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
-143 2 3 0.1531750305285868 0.857979536 0.0805763453 0.06144403 2 1 0
-146 2 3 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
-148 2 3 0.15751169565724296 0.8542668 0.08508964 0.06064347 2 1 0
-149 2 3 0.22273123179238696 0.8003299 0.142754182 0.0569158569 2 1 0
+5 0 0 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
+6 0 0 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
+8 0 0 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
+9 0 0 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
+10 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+11 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+18 0 0 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
+20 0 0 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
+21 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+25 0 0 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
+28 0 0 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
+31 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+32 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+35 0 0 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
+37 0 0 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
+40 0 0 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
+41 0 0 0.17550963613619172 0.8390293 0.09255581 0.06841481 0 1 2
+44 0 0 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
+45 0 0 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
+46 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+48 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+50 1 1 0.48031316690941278 0.61858964 0.293158859 0.08825144 1 2 0
+51 1 1 0.18552274772100014 0.83066994 0.09896273 0.0703672841 1 0 2
+52 1 2 1.8686139310181762 0.745523036 0.154337436 0.1001395 2 1 0
+54 1 1 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
+56 1 1 0.58631345356437459 0.5563746 0.357763946 0.08586141 1 2 0
+60 1 1 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
+63 1 1 0.44288817880347908 0.642179 0.304338247 0.05348268 1 2 0
+64 1 1 0.14288662456903103 0.866852343 0.06818299 0.06496459 1 2 0
+66 1 1 0.13927192749760864 0.8699914 0.06910439 0.0609041043 1 2 0
+68 1 1 0.1475586146516118 0.862811863 0.08110717 0.05608093 1 2 0
+69 1 1 0.13690032984210998 0.87205714 0.07104707 0.0568957143 1 2 0
+70 1 1 0.58631345356437459 0.5563746 0.357763946 0.08586141 1 2 0
+71 1 1 0.15194434625076791 0.8590361 0.07716796 0.06379592 1 2 0
+72 1 2 1.4639004514496772 0.7123725 0.231332213 0.05629522 2 1 0
+73 1 1 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
+74 1 1 0.13796626095978629 0.8711281 0.06828712 0.0605847239 1 2 0
+76 1 1 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
+77 1 2 2.0734221020246566 0.8150108 0.1257547 0.0592344366 2 1 0
+79 1 1 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
+82 1 1 0.13641926529166934 0.872476757 0.07081407 0.05670913 1 2 0
+88 1 1 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
+90 1 1 0.1425660487370225 0.8671303 0.07629539 0.05657427 1 2 0
+91 1 1 0.44288817880347908 0.642179 0.304338247 0.05348268 1 2 0
+92 1 1 0.13641926529166934 0.872476757 0.07081407 0.05670913 1 2 0
+93 1 1 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
+95 1 1 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
+96 1 1 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
+97 1 1 0.13796626095978629 0.8711281 0.06828712 0.0605847239 1 2 0
+98 1 1 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
+99 1 1 0.13879081663734727 0.8704101 0.06865642 0.0609334148 1 2 0
+100 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+102 2 2 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
+104 2 2 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
+105 2 2 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
+106 2 1 2.3434393651921286 0.847623646 0.0959969 0.0563793853 1 2 0
+108 2 2 0.22657601711176881 0.797258735 0.1479769 0.05476432 2 1 0
+109 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+111 2 2 0.17784894692703557 0.837068856 0.108788565 0.0541424938 2 1 0
+112 2 2 0.13281462271870764 0.8756274 0.0683108345 0.0560617223 2 1 0
+113 2 2 0.19621674447868781 0.8218341 0.122739322 0.0554265566 2 1 0
+115 2 2 0.17200944752597333 0.8419712 0.0923435241 0.0656852 2 0 1
+117 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+120 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+121 2 2 0.16411849615386448 0.848641455 0.09412395 0.0572345145 2 1 0
+122 2 2 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
+123 2 2 0.28256679418751329 0.7538463 0.189867079 0.056286566 2 1 0
+125 2 2 0.20564897455362954 0.8141188 0.09413585 0.09174528 2 0 1
+128 2 2 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
+129 2 2 0.16567802369146914 0.847319 0.0954867 0.0571942143 2 1 0
+131 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+132 2 2 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
+133 2 2 0.29113045769451545 0.747418165 0.191831976 0.06074977 2 1 0
+137 2 2 0.22116869967287578 0.801581442 0.104995131 0.09342335 2 1 0
+138 2 1 0.99148913716896714 0.5769956 0.371023774 0.0519805774 1 2 0
+141 2 2 0.18520126566083658 0.830937 0.09454043 0.07452247 2 0 1
+144 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+145 2 2 0.14505504697273677 0.8649747 0.07757514 0.0574500971 2 1 0
+147 2 2 0.14505504697273677 0.8649747 0.07757514 0.0574500971 2 1 0
+0 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+1 0 0 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
+2 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+3 0 0 0.13111987178887785 0.8771126 0.06430127 0.0585860461 0 1 2
+4 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+7 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+12 0 0 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
+13 0 0 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
+14 0 0 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
+15 0 0 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
+16 0 0 0.12377570311405263 0.883578 0.0616899729 0.05473199 0 1 2
+17 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+19 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+22 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+23 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+24 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+26 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+27 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+29 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+30 0 0 0.13466438623099261 0.8740092 0.06761201 0.0583787523 0 1 2
+33 0 0 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
+34 0 0 0.13111987178887785 0.8771126 0.06430127 0.0585860461 0 1 2
+36 0 0 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
+38 0 0 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
+39 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+42 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+43 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+47 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+49 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+53 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+55 1 1 0.13788484165848175 0.871199 0.0693822056 0.0594187342 1 2 0
+57 1 1 0.14180966894555355 0.8677864 0.06764004 0.06457351 1 2 0
+58 1 1 0.1459902363533046 0.864166141 0.07662086 0.059212923 1 2 0
+59 1 1 0.1580238898720478 0.8538294 0.08263559 0.0635349452 1 2 0
+61 1 1 0.24588109562120131 0.7820152 0.155458942 0.06252578 1 2 0
+62 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+65 1 1 0.13132980842568853 0.8769285 0.063261956 0.0598095059 1 2 0
+67 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+75 1 1 0.16325820211530925 0.84937185 0.09269803 0.05793005 1 2 0
+78 1 1 0.25856072585963152 0.772162139 0.166928485 0.0609093234 1 2 0
+80 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+81 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+83 1 2 0.93589303759413867 0.559196055 0.392235428 0.04856844 2 1 0
+84 1 1 0.30739863434463804 0.7353574 0.193893969 0.07074856 1 2 0
+85 1 1 0.36387328935790564 0.69497925 0.250199646 0.05482102 1 2 0
+86 1 1 0.2248240850683334 0.7986567 0.1374587 0.06388454 1 2 0
+87 1 1 0.15039000144304782 0.860372365 0.0804473162 0.0591802672 1 2 0
+89 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+94 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+101 2 2 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
+103 2 2 0.13913525587344278 0.870110333 0.06958966 0.06029994 2 1 0
+107 2 2 0.14333565848826485 0.8664632 0.07176019 0.0617765337 2 1 0
+110 2 2 0.23871440437506003 0.7876398 0.156446442 0.0559136942 2 1 0
+114 2 2 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
+116 2 2 0.13913525587344278 0.870110333 0.06958966 0.06029994 2 1 0
+118 2 2 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
+119 2 1 0.82777753715835345 0.5009504 0.437019467 0.06203004 1 2 0
+124 2 2 0.1531750305285868 0.857979536 0.0805763453 0.06144403 2 1 0
+126 2 2 0.41542573035225555 0.6600592 0.280402571 0.0595381558 2 1 0
+127 2 2 0.39337764883077281 0.6747739 0.268265069 0.0569609851 2 1 0
+130 2 2 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
+134 2 1 0.85550151661202845 0.5024455 0.425069958 0.07248443 1 2 0
+135 2 2 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
+136 2 2 0.14853238287711923 0.8619721 0.0780924 0.05993546 2 1 0
+139 2 2 0.16469402987504086 0.8481532 0.09065638 0.06119041 2 1 0
+140 2 2 0.15401813781546583 0.8572565 0.08267424 0.0600692444 2 1 0
+142 2 2 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
+143 2 2 0.1531750305285868 0.857979536 0.0805763453 0.06144403 2 1 0
+146 2 2 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
+148 2 2 0.15751169565724296 0.8542668 0.08508964 0.06064347 2 1 0
+149 2 2 0.22273123179238696 0.8003299 0.142754182 0.0569158569 2 1 0
diff --git a/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.key.txt b/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.key.txt
index 9b303c86aa..e756fcd7ba 100644
--- a/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.key.txt
+++ b/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.key.txt
@@ -1,151 +1,151 @@
Instance Label Assigned Log-loss #1 Score #2 Score #3 Score #1 Class #2 Class #3 Class
-5 0 1 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
-6 0 1 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
-8 0 1 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
-9 0 1 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
-10 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-11 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-18 0 1 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
-20 0 1 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
-21 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-25 0 1 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
-28 0 1 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
-31 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-32 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-35 0 1 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
-37 0 1 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
-40 0 1 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
-41 0 1 0.17550956509619134 0.8390294 0.09255582 0.0684148148 0 1 2
-44 0 1 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
-45 0 1 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
-46 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-48 0 1 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
-50 1 2 0.48031316690941278 0.61858964 0.2931589 0.08825144 1 2 0
-51 1 2 0.18552267596609509 0.83067 0.09896274 0.07036729 1 0 2
-52 1 1.8686139310181762 0.745523036 0.154337436 0.1001395 2 1 0
-54 1 2 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
-56 1 2 0.58631345356437459 0.5563746 0.357763946 0.0858614147 1 2 0
-60 1 2 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
-63 1 2 0.442888085987238 0.6421791 0.304338247 0.0534826852 1 2 0
-64 1 2 0.14288655580917453 0.8668524 0.06818299 0.06496459 1 2 0
-66 1 2 0.13927185898584951 0.8699915 0.06910439 0.060904108 1 2 0
-68 1 2 0.1475586146516118 0.862811863 0.08110718 0.0560809337 1 2 0
-69 1 2 0.13690026149264065 0.8720572 0.07104707 0.056895718 1 2 0
-70 1 2 0.58631345356437459 0.5563746 0.357763946 0.0858614147 1 2 0
-71 1 2 0.15194427686527462 0.859036148 0.07716796 0.06379592 1 2 0
-72 1 1.4639003870351257 0.712372541 0.231332228 0.0562952235 2 1 0
-73 1 2 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
-74 1 2 0.13796619253742226 0.871128142 0.06828712 0.0605847277 1 2 0
-76 1 2 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
-77 1 2.0734221020246566 0.815010846 0.1257547 0.05923444 2 1 0
-79 1 2 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
-82 1 2 0.13641919697507263 0.8724768 0.07081407 0.0567091331 1 2 0
-88 1 2 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
-90 1 2 0.1425659799992052 0.867130339 0.0762954 0.0565742739 1 2 0
-91 1 2 0.442888085987238 0.6421791 0.304338247 0.0534826852 1 2 0
-92 1 2 0.13641919697507263 0.8724768 0.07081407 0.0567091331 1 2 0
-93 1 2 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
-95 1 2 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
-96 1 2 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
-97 1 2 0.13796619253742226 0.871128142 0.06828712 0.0605847277 1 2 0
-98 1 2 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
-99 1 2 0.13879074815854195 0.870410144 0.06865643 0.06093342 1 2 0
-100 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-102 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
-104 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
-105 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
-106 2 2 2.3434392875794119 0.8476237 0.09599691 0.05637939 1 2 0
-108 2 0.22657594234978759 0.7972588 0.1479769 0.0547643229 2 1 0
-109 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-111 2 0.177848875720656 0.8370689 0.108788572 0.0541424938 2 1 0
-112 2 0.13281455464792449 0.875627458 0.06831084 0.0560617261 2 1 0
-113 2 0.19621674447868781 0.8218341 0.12273933 0.05542656 2 1 0
-115 2 0.17200937673419167 0.8419713 0.09234353 0.0656852052 2 0 1
-117 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-120 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-121 2 0.16411842591849909 0.8486415 0.09412396 0.05723452 2 1 0
-122 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
-123 2 0.28256671512014453 0.753846347 0.189867079 0.05628657 2 1 0
-125 2 0.20564890133993838 0.814118862 0.09413585 0.09174529 2 0 1
-128 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
-129 2 0.16567795334648433 0.847319067 0.09548671 0.057194218 2 1 0
-131 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-132 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
-133 2 0.29113037794713281 0.7474182 0.191831991 0.0607497729 2 1 0
-137 2 0.22116862531406531 0.8015815 0.104995139 0.09342336 2 1 0
-138 2 2 0.99148905684440769 0.5769956 0.3710238 0.05198058 1 2 0
-141 2 0.18520119392899573 0.8309371 0.09454043 0.07452248 2 0 1
-144 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
-145 2 0.14505497806361808 0.864974737 0.07757514 0.0574501 2 1 0
-147 2 0.14505497806361808 0.864974737 0.07757514 0.0574501 2 1 0
-0 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-1 0 1 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
-2 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-3 0 1 0.13111980383336003 0.8771127 0.06430127 0.05858605 0 1 2
-4 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-7 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-12 0 1 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
-13 0 1 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
-14 0 1 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
-15 0 1 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
-16 0 1 0.12377570311405263 0.883578 0.0616899766 0.0547319949 0 1 2
-17 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-19 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-22 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-23 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-24 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-26 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-27 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-29 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-30 0 1 0.13466431803417811 0.874009252 0.0676120147 0.058378756 0 1 2
-33 0 1 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
-34 0 1 0.13111980383336003 0.8771127 0.06430127 0.05858605 0 1 2
-36 0 1 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
-38 0 1 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
-39 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-42 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-43 0 1 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
-47 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-49 0 1 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
-53 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-55 1 2 0.13788477324168841 0.8711991 0.0693822056 0.0594187379 1 2 0
-57 1 2 0.14180960025970848 0.867786467 0.06764004 0.06457352 1 2 0
-58 1 2 0.14599016737971268 0.8641662 0.07662087 0.0592129268 1 2 0
-59 1 2 0.15802382006343754 0.853829443 0.08263559 0.06353495 1 2 0
-61 1 2 0.24588101940191279 0.782015264 0.155458942 0.06252578 1 2 0
-62 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-65 1 2 0.13132980842568853 0.8769285 0.063261956 0.05980951 1 2 0
-67 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-75 1 2 0.16325813194034094 0.8493719 0.09269803 0.0579300523 1 2 0
-78 1 2 0.25856064866775763 0.7721622 0.1669285 0.0609093271 1 2 0
-80 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-81 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-83 1 0.93589296161344149 0.5591961 0.392235458 0.04856844 2 1 0
-84 1 2 0.30739855328930082 0.735357463 0.193893984 0.07074856 1 2 0
-85 1 2 0.36387320359326997 0.6949793 0.250199676 0.05482102 1 2 0
-86 1 2 0.22482401043721545 0.798656762 0.137458712 0.06388455 1 2 0
-87 1 2 0.15038993216531976 0.8603724 0.08044732 0.05918027 1 2 0
-89 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-94 1 2 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
-101 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
-103 2 0.1391351873710466 0.8701104 0.06958967 0.0602999441 2 1 0
-107 2 0.14333558969752591 0.866463244 0.07176019 0.0617765374 2 1 0
-110 2 0.23871432870005888 0.787639856 0.156446457 0.0559136942 2 1 0
-114 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
-116 2 0.1391351873710466 0.8701104 0.06958967 0.0602999441 2 1 0
-118 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
-119 2 2 0.82777746896385962 0.500950456 0.4370195 0.0620300435 1 2 0
-124 2 0.15317496105764933 0.8579796 0.08057635 0.0614440367 2 1 0
-126 2 0.41542564005029348 0.6600593 0.280402571 0.0595381558 2 1 0
-127 2 0.39337756049800737 0.674773932 0.268265069 0.05696099 2 1 0
-130 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
-134 2 2 0.85550144650046023 0.5024456 0.42507 0.07248444 1 2 0
-135 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
-136 2 0.1485323137279633 0.861972153 0.0780924 0.05993546 2 1 0
-139 2 0.16469395959924102 0.848153234 0.09065638 0.0611904152 2 1 0
-140 2 0.1540180682859322 0.857256532 0.08267425 0.0600692481 2 1 0
-142 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
-143 2 0.15317496105764933 0.8579796 0.08057635 0.0614440367 2 1 0
-146 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
-148 2 0.15751162588437911 0.8542669 0.08508965 0.0606434755 2 1 0
-149 2 0.22273115731729765 0.80033 0.142754182 0.05691586 2 1 0
+5 0 0 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
+6 0 0 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
+8 0 0 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
+9 0 0 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
+10 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+11 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+18 0 0 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
+20 0 0 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
+21 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+25 0 0 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
+28 0 0 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
+31 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+32 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+35 0 0 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
+37 0 0 0.13970851121881944 0.8696117 0.07047898 0.0599093363 0 1 2
+40 0 0 0.12225769559664824 0.8849203 0.0591422766 0.05593741 0 2 1
+41 0 0 0.17550956509619134 0.8390294 0.09255582 0.0684148148 0 1 2
+44 0 0 0.25328578422472414 0.776246 0.1675262 0.0562277846 0 1 2
+45 0 0 0.13903052119099127 0.870201468 0.07016017 0.05963834 0 1 2
+46 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+48 0 0 0.12269405525649343 0.88453424 0.0593406856 0.0561250672 0 2 1
+50 1 1 0.48031316690941278 0.61858964 0.2931589 0.08825144 1 2 0
+51 1 1 0.18552267596609509 0.83067 0.09896274 0.07036729 1 0 2
+52 1 2 1.8686139310181762 0.745523036 0.154337436 0.1001395 2 1 0
+54 1 1 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
+56 1 1 0.58631345356437459 0.5563746 0.357763946 0.0858614147 1 2 0
+60 1 1 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
+63 1 1 0.442888085987238 0.6421791 0.304338247 0.0534826852 1 2 0
+64 1 1 0.14288655580917453 0.8668524 0.06818299 0.06496459 1 2 0
+66 1 1 0.13927185898584951 0.8699915 0.06910439 0.060904108 1 2 0
+68 1 1 0.1475586146516118 0.862811863 0.08110718 0.0560809337 1 2 0
+69 1 1 0.13690026149264065 0.8720572 0.07104707 0.056895718 1 2 0
+70 1 1 0.58631345356437459 0.5563746 0.357763946 0.0858614147 1 2 0
+71 1 1 0.15194427686527462 0.859036148 0.07716796 0.06379592 1 2 0
+72 1 2 1.4639003870351257 0.712372541 0.231332228 0.0562952235 2 1 0
+73 1 1 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
+74 1 1 0.13796619253742226 0.871128142 0.06828712 0.0605847277 1 2 0
+76 1 1 0.45819815025419125 0.632422149 0.3149078 0.0526700951 1 2 0
+77 1 2 2.0734221020246566 0.815010846 0.1257547 0.05923444 2 1 0
+79 1 1 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
+82 1 1 0.13641919697507263 0.8724768 0.07081407 0.0567091331 1 2 0
+88 1 1 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
+90 1 1 0.1425659799992052 0.867130339 0.0762954 0.0565742739 1 2 0
+91 1 1 0.442888085987238 0.6421791 0.304338247 0.0534826852 1 2 0
+92 1 1 0.13641919697507263 0.8724768 0.07081407 0.0567091331 1 2 0
+93 1 1 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
+95 1 1 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
+96 1 1 0.13407533925580511 0.8745242 0.06425438 0.0612214245 1 2 0
+97 1 1 0.13796619253742226 0.871128142 0.06828712 0.0605847277 1 2 0
+98 1 1 0.54904634529954011 0.5775003 0.363432646 0.0590670444 1 0 2
+99 1 1 0.13879074815854195 0.870410144 0.06865643 0.06093342 1 2 0
+100 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+102 2 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
+104 2 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
+105 2 2 0.12282263476045074 0.8844205 0.0591996 0.056379877 2 1 0
+106 2 1 2.3434392875794119 0.8476237 0.09599691 0.05637939 1 2 0
+108 2 2 0.22657594234978759 0.7972588 0.1479769 0.0547643229 2 1 0
+109 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+111 2 2 0.177848875720656 0.8370689 0.108788572 0.0541424938 2 1 0
+112 2 2 0.13281455464792449 0.875627458 0.06831084 0.0560617261 2 1 0
+113 2 2 0.19621674447868781 0.8218341 0.12273933 0.05542656 2 1 0
+115 2 2 0.17200937673419167 0.8419713 0.09234353 0.0656852052 2 0 1
+117 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+120 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+121 2 2 0.16411842591849909 0.8486415 0.09412396 0.05723452 2 1 0
+122 2 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
+123 2 2 0.28256671512014453 0.753846347 0.189867079 0.05628657 2 1 0
+125 2 2 0.20564890133993838 0.814118862 0.09413585 0.09174529 2 0 1
+128 2 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
+129 2 2 0.16567795334648433 0.847319067 0.09548671 0.057194218 2 1 0
+131 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+132 2 2 0.13716672321276682 0.871824861 0.07206305 0.0561121143 2 1 0
+133 2 2 0.29113037794713281 0.7474182 0.191831991 0.0607497729 2 1 0
+137 2 2 0.22116862531406531 0.8015815 0.104995139 0.09342336 2 1 0
+138 2 1 0.99148905684440769 0.5769956 0.3710238 0.05198058 1 2 0
+141 2 2 0.18520119392899573 0.8309371 0.09454043 0.07452248 2 0 1
+144 2 2 0.16223550400064654 0.850240946 0.0928473249 0.0569117554 2 0 1
+145 2 2 0.14505497806361808 0.864974737 0.07757514 0.0574501 2 1 0
+147 2 2 0.14505497806361808 0.864974737 0.07757514 0.0574501 2 1 0
+0 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+1 0 0 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
+2 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+3 0 0 0.13111980383336003 0.8771127 0.06430127 0.05858605 0 1 2
+4 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+7 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+12 0 0 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
+13 0 0 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
+14 0 0 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
+15 0 0 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
+16 0 0 0.12377570311405263 0.883578 0.0616899766 0.0547319949 0 1 2
+17 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+19 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+22 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+23 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+24 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+26 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+27 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+29 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+30 0 0 0.13466431803417811 0.874009252 0.0676120147 0.058378756 0 1 2
+33 0 0 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
+34 0 0 0.13111980383336003 0.8771127 0.06430127 0.05858605 0 1 2
+36 0 0 0.18823437933195611 0.8284205 0.120264143 0.05131534 0 1 2
+38 0 0 0.13227381045206946 0.8761011 0.06422711 0.0596717857 0 1 2
+39 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+42 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+43 0 0 0.13145127781390373 0.876822 0.0646113753 0.0585666336 0 1 2
+47 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+49 0 0 0.12805244799353907 0.879807234 0.0614267066 0.0587660335 0 1 2
+53 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+55 1 1 0.13788477324168841 0.8711991 0.0693822056 0.0594187379 1 2 0
+57 1 1 0.14180960025970848 0.867786467 0.06764004 0.06457352 1 2 0
+58 1 1 0.14599016737971268 0.8641662 0.07662087 0.0592129268 1 2 0
+59 1 1 0.15802382006343754 0.853829443 0.08263559 0.06353495 1 2 0
+61 1 1 0.24588101940191279 0.782015264 0.155458942 0.06252578 1 2 0
+62 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+65 1 1 0.13132980842568853 0.8769285 0.063261956 0.05980951 1 2 0
+67 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+75 1 1 0.16325813194034094 0.8493719 0.09269803 0.0579300523 1 2 0
+78 1 1 0.25856064866775763 0.7721622 0.1669285 0.0609093271 1 2 0
+80 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+81 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+83 1 2 0.93589296161344149 0.5591961 0.392235458 0.04856844 2 1 0
+84 1 1 0.30739855328930082 0.735357463 0.193893984 0.07074856 1 2 0
+85 1 1 0.36387320359326997 0.6949793 0.250199676 0.05482102 1 2 0
+86 1 1 0.22482401043721545 0.798656762 0.137458712 0.06388455 1 2 0
+87 1 1 0.15038993216531976 0.8603724 0.08044732 0.05918027 1 2 0
+89 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+94 1 1 0.13238614086498687 0.876002669 0.06343664 0.0605606847 1 2 0
+101 2 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
+103 2 2 0.1391351873710466 0.8701104 0.06958967 0.0602999441 2 1 0
+107 2 2 0.14333558969752591 0.866463244 0.07176019 0.0617765374 2 1 0
+110 2 2 0.23871432870005888 0.787639856 0.156446457 0.0559136942 2 1 0
+114 2 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
+116 2 2 0.1391351873710466 0.8701104 0.06958967 0.0602999441 2 1 0
+118 2 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
+119 2 1 0.82777746896385962 0.500950456 0.4370195 0.0620300435 1 2 0
+124 2 2 0.15317496105764933 0.8579796 0.08057635 0.0614440367 2 1 0
+126 2 2 0.41542564005029348 0.6600593 0.280402571 0.0595381558 2 1 0
+127 2 2 0.39337756049800737 0.674773932 0.268265069 0.05696099 2 1 0
+130 2 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
+134 2 1 0.85550144650046023 0.5024456 0.42507 0.07248444 1 2 0
+135 2 2 0.12489127544479019 0.882592857 0.06075944 0.05664773 2 0 1
+136 2 2 0.1485323137279633 0.861972153 0.0780924 0.05993546 2 1 0
+139 2 2 0.16469395959924102 0.848153234 0.09065638 0.0611904152 2 1 0
+140 2 2 0.1540180682859322 0.857256532 0.08267425 0.0600692481 2 1 0
+142 2 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
+143 2 2 0.15317496105764933 0.8579796 0.08057635 0.0614440367 2 1 0
+146 2 2 0.1903299513274587 0.8266863 0.116503216 0.05681048 2 1 0
+148 2 2 0.15751162588437911 0.8542669 0.08508965 0.0606434755 2 1 0
+149 2 2 0.22273115731729765 0.80033 0.142754182 0.05691586 2 1 0
diff --git a/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.keyU404.txt b/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.keyU404.txt
index cb0d9331ce..c3fc93b75b 100644
--- a/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.keyU404.txt
+++ b/test/BaselineOutput/SingleRelease/LightGBMMC/LightGBMMC-CV-iris.keyU404.txt
@@ -1,151 +1,151 @@
Instance Label Assigned Log-loss #1 Score #2 Score #3 Score #1 Class #2 Class #3 Class
-5 0 1 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
-6 0 1 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
-8 0 1 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
-9 0 1 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
-10 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-11 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-18 0 1 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
-20 0 1 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
-21 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-25 0 1 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
-28 0 1 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
-31 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-32 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-35 0 1 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
-37 0 1 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
-40 0 1 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
-41 0 1 0.17550963613619172 0.8390293 0.09255581 0.06841481 0 1 2
-44 0 1 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
-45 0 1 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
-46 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-48 0 1 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
-50 1 2 0.48031316690941278 0.61858964 0.293158859 0.08825144 1 2 0
-51 1 2 0.18552274772100014 0.83066994 0.09896273 0.0703672841 1 0 2
-52 1 3 1.8686139310181762 0.745523036 0.154337436 0.1001395 2 1 0
-54 1 2 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
-56 1 2 0.58631345356437459 0.5563746 0.357763946 0.08586141 1 2 0
-60 1 2 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
-63 1 2 0.44288817880347908 0.642179 0.304338247 0.05348268 1 2 0
-64 1 2 0.14288662456903103 0.866852343 0.06818299 0.06496459 1 2 0
-66 1 2 0.13927192749760864 0.8699914 0.06910439 0.0609041043 1 2 0
-68 1 2 0.1475586146516118 0.862811863 0.08110717 0.05608093 1 2 0
-69 1 2 0.13690032984210998 0.87205714 0.07104707 0.0568957143 1 2 0
-70 1 2 0.58631345356437459 0.5563746 0.357763946 0.08586141 1 2 0
-71 1 2 0.15194434625076791 0.8590361 0.07716796 0.06379592 1 2 0
-72 1 3 1.4639004514496772 0.7123725 0.231332213 0.05629522 2 1 0
-73 1 2 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
-74 1 2 0.13796626095978629 0.8711281 0.06828712 0.0605847239 1 2 0
-76 1 2 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
-77 1 3 2.0734221020246566 0.8150108 0.1257547 0.0592344366 2 1 0
-79 1 2 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
-82 1 2 0.13641926529166934 0.872476757 0.07081407 0.05670913 1 2 0
-88 1 2 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
-90 1 2 0.1425660487370225 0.8671303 0.07629539 0.05657427 1 2 0
-91 1 2 0.44288817880347908 0.642179 0.304338247 0.05348268 1 2 0
-92 1 2 0.13641926529166934 0.872476757 0.07081407 0.05670913 1 2 0
-93 1 2 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
-95 1 2 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
-96 1 2 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
-97 1 2 0.13796626095978629 0.8711281 0.06828712 0.0605847239 1 2 0
-98 1 2 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
-99 1 2 0.13879081663734727 0.8704101 0.06865642 0.0609334148 1 2 0
-100 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-102 2 3 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
-104 2 3 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
-105 2 3 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
-106 2 2 2.3434393651921286 0.847623646 0.0959969 0.0563793853 1 2 0
-108 2 3 0.22657601711176881 0.797258735 0.1479769 0.05476432 2 1 0
-109 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-111 2 3 0.17784894692703557 0.837068856 0.108788565 0.0541424938 2 1 0
-112 2 3 0.13281462271870764 0.8756274 0.0683108345 0.0560617223 2 1 0
-113 2 3 0.19621674447868781 0.8218341 0.122739322 0.0554265566 2 1 0
-115 2 3 0.17200944752597333 0.8419712 0.0923435241 0.0656852 2 0 1
-117 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-120 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-121 2 3 0.16411849615386448 0.848641455 0.09412395 0.0572345145 2 1 0
-122 2 3 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
-123 2 3 0.28256679418751329 0.7538463 0.189867079 0.056286566 2 1 0
-125 2 3 0.20564897455362954 0.8141188 0.09413585 0.09174528 2 0 1
-128 2 3 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
-129 2 3 0.16567802369146914 0.847319 0.0954867 0.0571942143 2 1 0
-131 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-132 2 3 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
-133 2 3 0.29113045769451545 0.747418165 0.191831976 0.06074977 2 1 0
-137 2 3 0.22116869967287578 0.801581442 0.104995131 0.09342335 2 1 0
-138 2 2 0.99148913716896714 0.5769956 0.371023774 0.0519805774 1 2 0
-141 2 3 0.18520126566083658 0.830937 0.09454043 0.07452247 2 0 1
-144 2 3 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
-145 2 3 0.14505504697273677 0.8649747 0.07757514 0.0574500971 2 1 0
-147 2 3 0.14505504697273677 0.8649747 0.07757514 0.0574500971 2 1 0
-0 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-1 0 1 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
-2 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-3 0 1 0.13111987178887785 0.8771126 0.06430127 0.0585860461 0 1 2
-4 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-7 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-12 0 1 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
-13 0 1 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
-14 0 1 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
-15 0 1 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
-16 0 1 0.12377570311405263 0.883578 0.0616899729 0.05473199 0 1 2
-17 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-19 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-22 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-23 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-24 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-26 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-27 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-29 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-30 0 1 0.13466438623099261 0.8740092 0.06761201 0.0583787523 0 1 2
-33 0 1 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
-34 0 1 0.13111987178887785 0.8771126 0.06430127 0.0585860461 0 1 2
-36 0 1 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
-38 0 1 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
-39 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-42 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-43 0 1 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
-47 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-49 0 1 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
-53 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-55 1 2 0.13788484165848175 0.871199 0.0693822056 0.0594187342 1 2 0
-57 1 2 0.14180966894555355 0.8677864 0.06764004 0.06457351 1 2 0
-58 1 2 0.1459902363533046 0.864166141 0.07662086 0.059212923 1 2 0
-59 1 2 0.1580238898720478 0.8538294 0.08263559 0.0635349452 1 2 0
-61 1 2 0.24588109562120131 0.7820152 0.155458942 0.06252578 1 2 0
-62 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-65 1 2 0.13132980842568853 0.8769285 0.063261956 0.0598095059 1 2 0
-67 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-75 1 2 0.16325820211530925 0.84937185 0.09269803 0.05793005 1 2 0
-78 1 2 0.25856072585963152 0.772162139 0.166928485 0.0609093234 1 2 0
-80 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-81 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-83 1 3 0.93589303759413867 0.559196055 0.392235428 0.04856844 2 1 0
-84 1 2 0.30739863434463804 0.7353574 0.193893969 0.07074856 1 2 0
-85 1 2 0.36387328935790564 0.69497925 0.250199646 0.05482102 1 2 0
-86 1 2 0.2248240850683334 0.7986567 0.1374587 0.06388454 1 2 0
-87 1 2 0.15039000144304782 0.860372365 0.0804473162 0.0591802672 1 2 0
-89 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-94 1 2 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
-101 2 3 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
-103 2 3 0.13913525587344278 0.870110333 0.06958966 0.06029994 2 1 0
-107 2 3 0.14333565848826485 0.8664632 0.07176019 0.0617765337 2 1 0
-110 2 3 0.23871440437506003 0.7876398 0.156446442 0.0559136942 2 1 0
-114 2 3 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
-116 2 3 0.13913525587344278 0.870110333 0.06958966 0.06029994 2 1 0
-118 2 3 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
-119 2 2 0.82777753715835345 0.5009504 0.437019467 0.06203004 1 2 0
-124 2 3 0.1531750305285868 0.857979536 0.0805763453 0.06144403 2 1 0
-126 2 3 0.41542573035225555 0.6600592 0.280402571 0.0595381558 2 1 0
-127 2 3 0.39337764883077281 0.6747739 0.268265069 0.0569609851 2 1 0
-130 2 3 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
-134 2 2 0.85550151661202845 0.5024455 0.425069958 0.07248443 1 2 0
-135 2 3 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
-136 2 3 0.14853238287711923 0.8619721 0.0780924 0.05993546 2 1 0
-139 2 3 0.16469402987504086 0.8481532 0.09065638 0.06119041 2 1 0
-140 2 3 0.15401813781546583 0.8572565 0.08267424 0.0600692444 2 1 0
-142 2 3 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
-143 2 3 0.1531750305285868 0.857979536 0.0805763453 0.06144403 2 1 0
-146 2 3 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
-148 2 3 0.15751169565724296 0.8542668 0.08508964 0.06064347 2 1 0
-149 2 3 0.22273123179238696 0.8003299 0.142754182 0.0569158569 2 1 0
+5 0 0 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
+6 0 0 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
+8 0 0 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
+9 0 0 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
+10 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+11 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+18 0 0 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
+20 0 0 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
+21 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+25 0 0 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
+28 0 0 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
+31 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+32 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+35 0 0 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
+37 0 0 0.13970857976050094 0.8696116 0.0704789758 0.0599093325 0 1 2
+40 0 0 0.12225776295259752 0.884920239 0.0591422729 0.0559374057 0 2 1
+41 0 0 0.17550963613619172 0.8390293 0.09255581 0.06841481 0 1 2
+44 0 0 0.25328586101049333 0.776245952 0.167526186 0.05622778 0 1 2
+45 0 0 0.13903058968621793 0.8702014 0.07016017 0.0596383363 0 1 2
+46 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+48 0 0 0.12269412264184056 0.8845342 0.0593406819 0.0561250634 0 2 1
+50 1 1 0.48031316690941278 0.61858964 0.293158859 0.08825144 1 2 0
+51 1 1 0.18552274772100014 0.83066994 0.09896273 0.0703672841 1 0 2
+52 1 2 1.8686139310181762 0.745523036 0.154337436 0.1001395 2 1 0
+54 1 1 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
+56 1 1 0.58631345356437459 0.5563746 0.357763946 0.08586141 1 2 0
+60 1 1 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
+63 1 1 0.44288817880347908 0.642179 0.304338247 0.05348268 1 2 0
+64 1 1 0.14288662456903103 0.866852343 0.06818299 0.06496459 1 2 0
+66 1 1 0.13927192749760864 0.8699914 0.06910439 0.0609041043 1 2 0
+68 1 1 0.1475586146516118 0.862811863 0.08110717 0.05608093 1 2 0
+69 1 1 0.13690032984210998 0.87205714 0.07104707 0.0568957143 1 2 0
+70 1 1 0.58631345356437459 0.5563746 0.357763946 0.08586141 1 2 0
+71 1 1 0.15194434625076791 0.8590361 0.07716796 0.06379592 1 2 0
+72 1 2 1.4639004514496772 0.7123725 0.231332213 0.05629522 2 1 0
+73 1 1 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
+74 1 1 0.13796626095978629 0.8711281 0.06828712 0.0605847239 1 2 0
+76 1 1 0.45819824450238866 0.6324221 0.314907759 0.0526700951 1 2 0
+77 1 2 2.0734221020246566 0.8150108 0.1257547 0.0592344366 2 1 0
+79 1 1 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
+82 1 1 0.13641926529166934 0.872476757 0.07081407 0.05670913 1 2 0
+88 1 1 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
+90 1 1 0.1425660487370225 0.8671303 0.07629539 0.05657427 1 2 0
+91 1 1 0.44288817880347908 0.642179 0.304338247 0.05348268 1 2 0
+92 1 1 0.13641926529166934 0.872476757 0.07081407 0.05670913 1 2 0
+93 1 1 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
+95 1 1 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
+96 1 1 0.13407540741246499 0.8745241 0.06425437 0.06122142 1 2 0
+97 1 1 0.13796626095978629 0.8711281 0.06828712 0.0605847239 1 2 0
+98 1 1 0.54904634529954011 0.5775003 0.363432616 0.05906704 1 0 2
+99 1 1 0.13879081663734727 0.8704101 0.06865642 0.0609334148 1 2 0
+100 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+102 2 2 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
+104 2 2 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
+105 2 2 0.1228227021544628 0.884420455 0.0591995977 0.0563798733 2 1 0
+106 2 1 2.3434393651921286 0.847623646 0.0959969 0.0563793853 1 2 0
+108 2 2 0.22657601711176881 0.797258735 0.1479769 0.05476432 2 1 0
+109 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+111 2 2 0.17784894692703557 0.837068856 0.108788565 0.0541424938 2 1 0
+112 2 2 0.13281462271870764 0.8756274 0.0683108345 0.0560617223 2 1 0
+113 2 2 0.19621674447868781 0.8218341 0.122739322 0.0554265566 2 1 0
+115 2 2 0.17200944752597333 0.8419712 0.0923435241 0.0656852 2 0 1
+117 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+120 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+121 2 2 0.16411849615386448 0.848641455 0.09412395 0.0572345145 2 1 0
+122 2 2 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
+123 2 2 0.28256679418751329 0.7538463 0.189867079 0.056286566 2 1 0
+125 2 2 0.20564897455362954 0.8141188 0.09413585 0.09174528 2 0 1
+128 2 2 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
+129 2 2 0.16567802369146914 0.847319 0.0954867 0.0571942143 2 1 0
+131 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+132 2 2 0.13716679158045109 0.8718248 0.07206305 0.05611211 2 1 0
+133 2 2 0.29113045769451545 0.747418165 0.191831976 0.06074977 2 1 0
+137 2 2 0.22116869967287578 0.801581442 0.104995131 0.09342335 2 1 0
+138 2 1 0.99148913716896714 0.5769956 0.371023774 0.0519805774 1 2 0
+141 2 2 0.18520126566083658 0.830937 0.09454043 0.07452247 2 0 1
+144 2 2 0.16223557410388861 0.8502409 0.09284732 0.05691175 2 0 1
+145 2 2 0.14505504697273677 0.8649747 0.07757514 0.0574500971 2 1 0
+147 2 2 0.14505504697273677 0.8649747 0.07757514 0.0574500971 2 1 0
+0 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+1 0 0 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
+2 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+3 0 0 0.13111987178887785 0.8771126 0.06430127 0.0585860461 0 1 2
+4 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+7 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+12 0 0 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
+13 0 0 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
+14 0 0 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
+15 0 0 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
+16 0 0 0.12377570311405263 0.883578 0.0616899729 0.05473199 0 1 2
+17 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+19 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+22 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+23 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+24 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+26 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+27 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+29 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+30 0 0 0.13466438623099261 0.8740092 0.06761201 0.0583787523 0 1 2
+33 0 0 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
+34 0 0 0.13111987178887785 0.8771126 0.06430127 0.0585860461 0 1 2
+36 0 0 0.18823445128170324 0.82842046 0.120264135 0.0513153374 0 1 2
+38 0 0 0.13227381045206946 0.8761011 0.0642271042 0.059671782 0 1 2
+39 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+42 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+43 0 0 0.13145134579195075 0.876821935 0.0646113753 0.05856663 0 1 2
+47 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+49 0 0 0.12805244799353907 0.879807234 0.0614267029 0.05876603 0 1 2
+53 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+55 1 1 0.13788484165848175 0.871199 0.0693822056 0.0594187342 1 2 0
+57 1 1 0.14180966894555355 0.8677864 0.06764004 0.06457351 1 2 0
+58 1 1 0.1459902363533046 0.864166141 0.07662086 0.059212923 1 2 0
+59 1 1 0.1580238898720478 0.8538294 0.08263559 0.0635349452 1 2 0
+61 1 1 0.24588109562120131 0.7820152 0.155458942 0.06252578 1 2 0
+62 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+65 1 1 0.13132980842568853 0.8769285 0.063261956 0.0598095059 1 2 0
+67 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+75 1 1 0.16325820211530925 0.84937185 0.09269803 0.05793005 1 2 0
+78 1 1 0.25856072585963152 0.772162139 0.166928485 0.0609093234 1 2 0
+80 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+81 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+83 1 2 0.93589303759413867 0.559196055 0.392235428 0.04856844 2 1 0
+84 1 1 0.30739863434463804 0.7353574 0.193893969 0.07074856 1 2 0
+85 1 1 0.36387328935790564 0.69497925 0.250199646 0.05482102 1 2 0
+86 1 1 0.2248240850683334 0.7986567 0.1374587 0.06388454 1 2 0
+87 1 1 0.15039000144304782 0.860372365 0.0804473162 0.0591802672 1 2 0
+89 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+94 1 1 0.13238620890661379 0.8760026 0.0634366348 0.06056068 1 2 0
+101 2 2 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
+103 2 2 0.13913525587344278 0.870110333 0.06958966 0.06029994 2 1 0
+107 2 2 0.14333565848826485 0.8664632 0.07176019 0.0617765337 2 1 0
+110 2 2 0.23871440437506003 0.7876398 0.156446442 0.0559136942 2 1 0
+114 2 2 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
+116 2 2 0.13913525587344278 0.870110333 0.06958966 0.06029994 2 1 0
+118 2 2 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
+119 2 1 0.82777753715835345 0.5009504 0.437019467 0.06203004 1 2 0
+124 2 2 0.1531750305285868 0.857979536 0.0805763453 0.06144403 2 1 0
+126 2 2 0.41542573035225555 0.6600592 0.280402571 0.0595381558 2 1 0
+127 2 2 0.39337764883077281 0.6747739 0.268265069 0.0569609851 2 1 0
+130 2 2 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
+134 2 1 0.85550151661202845 0.5024455 0.425069958 0.07248443 1 2 0
+135 2 2 0.12489134297836053 0.8825928 0.0607594363 0.0566477254 2 0 1
+136 2 2 0.14853238287711923 0.8619721 0.0780924 0.05993546 2 1 0
+139 2 2 0.16469402987504086 0.8481532 0.09065638 0.06119041 2 1 0
+140 2 2 0.15401813781546583 0.8572565 0.08267424 0.0600692444 2 1 0
+142 2 2 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
+143 2 2 0.1531750305285868 0.857979536 0.0805763453 0.06144403 2 1 0
+146 2 2 0.19033002342813979 0.826686263 0.116503209 0.0568104759 2 1 0
+148 2 2 0.15751169565724296 0.8542668 0.08508964 0.06064347 2 1 0
+149 2 2 0.22273123179238696 0.8003299 0.142754182 0.0569158569 2 1 0
diff --git a/test/Directory.Build.targets b/test/Directory.Build.targets
index c497a50172..88f693b838 100644
--- a/test/Directory.Build.targets
+++ b/test/Directory.Build.targets
@@ -1,34 +1,5 @@
-
-
-
-
- lib
- .dll
- .so
- .dylib
-
-
-
-
- $(NativeOutputPath)$(LibPrefix)%(NativeAssemblyReference.Identity)$(LibExtension)
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj
index 3cf404f981..bed3dce0eb 100644
--- a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj
+++ b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj
@@ -10,6 +10,7 @@
+
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index ded78d11de..de0db6f3a9 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -6,6 +6,7 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
+using System.Text.RegularExpressions;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Core.Tests.UnitTests;
using Microsoft.ML.Runtime.Data;
@@ -274,13 +275,31 @@ public void EntryPointCatalog()
var entryPointsSubDir = Path.Combine("..", "Common", "EntryPoints");
var catalog = ModuleCatalog.CreateInstance(Env);
var path = DeleteOutputPath(entryPointsSubDir, epListFile);
+
+ var regex = new Regex(@"\r\n?|\n", RegexOptions.Compiled);
File.WriteAllLines(path, catalog.AllEntryPoints()
- .Select(x => string.Join("\t", x.Name, x.Description, x.Method.DeclaringType, x.Method.Name, x.InputType, x.OutputType).Replace(Environment.NewLine, "\\n "))
+ .Select(x => string.Join("\t",
+ x.Name,
+ regex.Replace(x.Description, ""),
+ x.Method.DeclaringType,
+ x.Method.Name,
+ x.InputType,
+ x.OutputType)
+ .Replace(Environment.NewLine, ""))
.OrderBy(x => x));
CheckEquality(entryPointsSubDir, epListFile);
var jObj = JsonManifestUtils.BuildAllManifests(Env, catalog);
+
+ //clean up the description from the new line characters
+ if (jObj[FieldNames.TopEntryPoints] != null && jObj[FieldNames.TopEntryPoints] is JArray)
+ {
+ foreach (JToken entry in jObj[FieldNames.TopEntryPoints].Children())
+ if (entry[FieldNames.Desc] != null)
+ entry[FieldNames.Desc] = regex.Replace(entry[FieldNames.Desc].ToString(), "");
+ }
+
var jPath = DeleteOutputPath(entryPointsSubDir, manifestFile);
using (var file = File.OpenWrite(jPath))
using (var writer = new StreamWriter(file))
@@ -1784,6 +1803,18 @@ public void EntryPointEvaluateRanking()
}
}
+ [Fact]
+ public void EntryPointLightGbmBinary()
+ {
+ TestEntryPointRoutine("breast-cancer.txt", "Trainers.LightGbmBinaryClassifier");
+ }
+
+ [Fact]
+ public void EntryPointLightGbmMultiClass()
+ {
+ TestEntryPointRoutine(GetDataPath(@"iris.txt"), "Trainers.LightGbmClassifier");
+ }
+
[Fact]
public void EntryPointSdcaBinary()
{
diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
index 26ba5d118e..928f740b57 100644
--- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
+++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
@@ -407,7 +407,7 @@ public void FastTreeBinaryClassificationTest()
});
Done();
}
-
+
[Fact]
[TestCategory("Binary")]
[TestCategory("LightGBM")]
diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
index 696fb9e92d..01027b63e0 100644
--- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
@@ -138,7 +138,7 @@ public class IrisPrediction
}
[Fact]
- public void TrainOva()
+ public void TrainOneVersusAll()
{
string dataPath = GetDataPath("iris.txt");
diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs
index 1ebc2489ec..b104570ca7 100644
--- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs
@@ -32,6 +32,18 @@ public void TrainAndPredictSentimentModelTest()
ValidateBinaryMetrics(metrics);
}
+ [Fact]
+ public void TrainAndPredictLightGBMSentimentModelTest()
+ {
+ var pipeline = PreparePipelineLightGBM();
+ var model = pipeline.Train();
+ var testData = PrepareTextLoaderTestData();
+ var evaluator = new BinaryClassificationEvaluator();
+ var metrics = evaluator.Evaluate(model, testData);
+ ValidateExamplesLightGBM(model);
+ ValidateBinaryMetricsLightGBM(metrics);
+ }
+
[Fact]
public void TrainTestPredictSentimentModelTest()
{
@@ -163,8 +175,42 @@ public void CrossValidateSentimentModelTest()
Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);
}
+ private void ValidateBinaryMetricsLightGBM(BinaryClassificationMetrics metrics)
+ {
+
+ Assert.Equal(.6111, metrics.Accuracy, 4);
+ Assert.Equal(.8, metrics.Auc, 1);
+ Assert.Equal(.85, metrics.Auprc, 2);
+ Assert.Equal(1, metrics.Entropy, 3);
+ Assert.Equal(.72, metrics.F1Score, 4);
+ Assert.Equal(.952, metrics.LogLoss, 3);
+ Assert.Equal(4.777, metrics.LogLossReduction, 3);
+ Assert.Equal(1, metrics.NegativePrecision, 3);
+ Assert.Equal(.222, metrics.NegativeRecall, 3);
+ Assert.Equal(.562, metrics.PositivePrecision, 3);
+ Assert.Equal(1, metrics.PositiveRecall);
+
+ var matrix = metrics.ConfusionMatrix;
+ Assert.Equal(2, matrix.Order);
+ Assert.Equal(2, matrix.ClassNames.Count);
+ Assert.Equal("positive", matrix.ClassNames[0]);
+ Assert.Equal("negative", matrix.ClassNames[1]);
+
+ Assert.Equal(9, matrix[0, 0]);
+ Assert.Equal(9, matrix["positive", "positive"]);
+ Assert.Equal(0, matrix[0, 1]);
+ Assert.Equal(0, matrix["positive", "negative"]);
+
+ Assert.Equal(7, matrix[1, 0]);
+ Assert.Equal(7, matrix["negative", "positive"]);
+ Assert.Equal(2, matrix[1, 1]);
+ Assert.Equal(2, matrix["negative", "negative"]);
+
+ }
+
private void ValidateBinaryMetrics(BinaryClassificationMetrics metrics)
{
+
Assert.Equal(.5556, metrics.Accuracy, 4);
Assert.Equal(.8, metrics.Auc, 1);
Assert.Equal(.87, metrics.Auprc, 2);
@@ -236,18 +282,81 @@ private LearningPipeline PreparePipeline()
WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true }
});
+
pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 });
+
+ pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" });
+ return pipeline;
+ }
+
+ private LearningPipeline PreparePipelineLightGBM()
+ {
+ var dataPath = GetDataPath(SentimentDataPath);
+ var pipeline = new LearningPipeline();
+
+ pipeline.Add(new Data.TextLoader(dataPath)
+ {
+ Arguments = new TextLoaderArguments
+ {
+ Separator = new[] { '\t' },
+ HasHeader = true,
+ Column = new[]
+ {
+ new TextLoaderColumn()
+ {
+ Name = "Label",
+ Source = new [] { new TextLoaderRange(0) },
+ Type = Data.DataKind.Num
+ },
+
+ new TextLoaderColumn()
+ {
+ Name = "SentimentText",
+ Source = new [] { new TextLoaderRange(1) },
+ Type = Data.DataKind.Text
+ }
+ }
+ }
+ });
+
+ pipeline.Add(new TextFeaturizer("Features", "SentimentText")
+ {
+ KeepDiacritics = false,
+ KeepPunctuations = false,
+ TextCase = TextNormalizerTransformCaseNormalizationMode.Lower,
+ OutputTokens = true,
+ StopWordsRemover = new PredefinedStopWordsRemover(),
+ VectorNormalizer = TextTransformTextNormKind.L2,
+ CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false },
+ WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true }
+ });
+
+
+ pipeline.Add(new LightGbmBinaryClassifier() { NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2 });
+
pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" });
return pipeline;
}
- private void ValidateExamples(PredictionModel model)
+ private void ValidateExamples(PredictionModel model, bool useLightGBM = false)
{
var sentiments = GetTestData();
var predictions = model.Predict(sentiments);
Assert.Equal(2, predictions.Count());
+
Assert.True(predictions.ElementAt(0).Sentiment.IsFalse);
Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);
+
+ }
+
+ private void ValidateExamplesLightGBM(PredictionModel model)
+ {
+ var sentiments = GetTestData();
+ var predictions = model.Predict(sentiments);
+ Assert.Equal(2, predictions.Count());
+
+ Assert.True(predictions.ElementAt(0).Sentiment.IsTrue);
+ Assert.True(predictions.ElementAt(1).Sentiment.IsTrue);
}
private Data.TextLoader PrepareTextLoaderTestData()