diff --git a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs
index 544bce0aeb..a51f502ecd 100644
--- a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs
+++ b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs
@@ -90,6 +90,19 @@ public bool TryUnparse(StringBuilder sb)
public sealed class Arguments : TransformInputBase
{
+ public Arguments()
+ {
+ }
+
+ public Arguments(string name, params string[] source)
+ {
+ Column = new[] { new Column()
+ {
+ Name = name,
+ Source = source
+ }};
+ }
+
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:srcs)", ShortName = "col", SortOrder = 1)]
public Column[] Column;
}
@@ -527,6 +540,18 @@ private static VersionInfo GetVersionInfo()
public override ISchema Schema => _bindings;
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Input columns to concatenate.
+ public ConcatTransform(IHostEnvironment env, IDataView input, string name, params string[] source)
+ : this(env, new Arguments(name, source), input)
+ {
+ }
+
///
/// Public constructor corresponding to SignatureDataTransform.
///
diff --git a/src/Microsoft.ML.Data/Transforms/CopyColumnsTransform.cs b/src/Microsoft.ML.Data/Transforms/CopyColumnsTransform.cs
index f365dd9e98..2729a48e3e 100644
--- a/src/Microsoft.ML.Data/Transforms/CopyColumnsTransform.cs
+++ b/src/Microsoft.ML.Data/Transforms/CopyColumnsTransform.cs
@@ -64,6 +64,18 @@ private static VersionInfo GetVersionInfo()
private const string RegistrationName = "CopyColumns";
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be copied.
+ public CopyColumnsTransform(IHostEnvironment env, IDataView input, string name, string source)
+ : this(env, new Arguments(){ Column = new[] { new Column() { Source = source, Name = name }}}, input)
+ {
+ }
+
public CopyColumnsTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, input, null)
{
diff --git a/src/Microsoft.ML.Data/Transforms/DropColumnsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropColumnsTransform.cs
index 502e6f395d..3e15199ff7 100644
--- a/src/Microsoft.ML.Data/Transforms/DropColumnsTransform.cs
+++ b/src/Microsoft.ML.Data/Transforms/DropColumnsTransform.cs
@@ -237,6 +237,17 @@ private static VersionInfo GetVersionInfo()
private const string DropRegistrationName = "DropColumns";
private const string KeepRegistrationName = "KeepColumns";
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the columns to be dropped.
+ public DropColumnsTransform(IHostEnvironment env, IDataView input, params string[] columnsToDrop)
+ :this(env, new Arguments() { Column = columnsToDrop }, input)
+ {
+ }
+
///
/// Public constructor corresponding to SignatureDataTransform.
///
@@ -383,4 +394,17 @@ public ValueGetter GetGetter(int col)
}
}
}
+
+ public class KeepColumnsTransform
+ {
+ ///
+ /// A helper method to create for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the columns to be kept. All other columns will be removed.
+ ///
+ public static IDataTransform Create(IHostEnvironment env, IDataView input, params string[] columnsToKeep)
+ => new DropColumnsTransform(env, new DropColumnsTransform.KeepArguments() { Column = columnsToKeep }, input);
+ }
}
diff --git a/src/Microsoft.ML.Data/Transforms/NAFilter.cs b/src/Microsoft.ML.Data/Transforms/NAFilter.cs
index 96c2111366..7b94ff1e07 100644
--- a/src/Microsoft.ML.Data/Transforms/NAFilter.cs
+++ b/src/Microsoft.ML.Data/Transforms/NAFilter.cs
@@ -28,13 +28,18 @@ namespace Microsoft.ML.Runtime.Data
{
public sealed class NAFilter : FilterBase
{
+ private static class Defaults
+ {
+ public const bool Complement = false;
+ }
+
public sealed class Arguments : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Column", ShortName = "col", SortOrder = 1)]
public string[] Column;
[Argument(ArgumentType.Multiple, HelpText = "If true, keep only rows that contain NA values, and filter the rest.")]
- public bool Complement;
+ public bool Complement = Defaults.Complement;
}
private sealed class ColInfo
@@ -72,6 +77,18 @@ private static VersionInfo GetVersionInfo()
private readonly bool _complement;
private const string RegistrationName = "MissingValueFilter";
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// If true, keep only rows that contain NA values, and filter the rest.
+ /// Name of the columns. Only these columns will be used to filter rows having 'NA' values.
+ public NAFilter(IHostEnvironment env, IDataView input, bool complement = Defaults.Complement, params string[] columns)
+ : this(env, new Arguments() { Column = columns, Complement = complement }, input)
+ {
+ }
+
public NAFilter(IHostEnvironment env, Arguments args, IDataView input)
: base(env, RegistrationName, input)
{
diff --git a/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs b/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs
index b7f03deeec..72e7030604 100644
--- a/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs
+++ b/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs
@@ -25,20 +25,27 @@ namespace Microsoft.ML.Runtime.Data
///
public sealed class BootstrapSampleTransform : FilterBase
{
+ private static class Defaults
+ {
+ public const bool Complement = false;
+ public const bool ShuffleInput = true;
+ public const int PoolSize = 1000;
+ }
+
public sealed class Arguments : TransformInputBase
{
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform.",
ShortName = "comp")]
- public bool Complement;
+ public bool Complement = Defaults.Complement;
[Argument(ArgumentType.AtMostOnce, HelpText = "The random seed. If unspecified random state will be instead derived from the environment.")]
public uint? Seed;
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether we should attempt to shuffle the source data. By default on, but can be turned off for efficiency.", ShortName = "si")]
- public bool ShuffleInput = true;
+ public bool ShuffleInput = Defaults.ShuffleInput;
[Argument(ArgumentType.LastOccurenceWins, HelpText = "When shuffling the output, the number of output rows to keep in that pool. Note that shuffling of output is completely distinct from shuffling of input.", ShortName = "pool")]
- public int PoolSize = 1000;
+ public int PoolSize = Defaults.PoolSize;
}
internal const string Summary = "Approximate bootstrap sampling.";
@@ -76,6 +83,25 @@ public BootstrapSampleTransform(IHostEnvironment env, Arguments args, IDataView
_poolSize = args.PoolSize;
}
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform.
+ /// The random seed. If unspecified random state will be instead derived from the environment.
+ /// Whether we should attempt to shuffle the source data. By default on, but can be turned off for efficiency.
+ /// When shuffling the output, the number of output rows to keep in that pool. Note that shuffling of output is completely distinct from shuffling of input.
+ public BootstrapSampleTransform(IHostEnvironment env,
+ IDataView input,
+ bool complement = Defaults.Complement,
+ uint? seed = null,
+ bool shuffleInput = Defaults.ShuffleInput,
+ int poolSize = Defaults.PoolSize)
+ : this(env, new Arguments() { Complement = complement, Seed = seed, ShuffleInput = shuffleInput, PoolSize = poolSize }, input)
+ {
+ }
+
private BootstrapSampleTransform(IHost host, ModelLoadContext ctx, IDataView input)
: base(host, input)
{
diff --git a/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs b/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs
index a30b9a6c84..5aec5658b2 100644
--- a/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs
+++ b/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs
@@ -86,6 +86,15 @@ public bool TryUnparse(StringBuilder sb)
}
}
+ private static class Defaults
+ {
+ public const int HashBits = 16;
+ public const uint Seed = 314489979;
+ public const bool Ordered = true;
+ public const int InvertHash = 0;
+ public const CategoricalTransform.OutputKind OutputKind = CategoricalTransform.OutputKind.Bag;
+ }
+
///
/// This class is a merger of and
/// with join option removed
@@ -97,22 +106,22 @@ public sealed class Arguments : TransformInputBase
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of bits to hash into. Must be between 1 and 30, inclusive.",
ShortName = "bits", SortOrder = 2)]
- public int HashBits = 16;
+ public int HashBits = Defaults.HashBits;
[Argument(ArgumentType.AtMostOnce, HelpText = "Hashing seed")]
- public uint Seed = 314489979;
+ public uint Seed = Defaults.Seed;
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether the position of each term should be included in the hash", ShortName = "ord")]
- public bool Ordered = true;
+ public bool Ordered = Defaults.Ordered;
[Argument(ArgumentType.AtMostOnce,
HelpText = "Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.",
ShortName = "ih")]
- public int InvertHash;
+ public int InvertHash = Defaults.InvertHash;
[Argument(ArgumentType.AtMostOnce, HelpText = "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)",
ShortName = "kind", SortOrder = 102)]
- public CategoricalTransform.OutputKind OutputKind = CategoricalTransform.OutputKind.Bag;
+ public CategoricalTransform.OutputKind OutputKind = Defaults.OutputKind;
}
internal const string Summary = "Converts the categorical value into an indicator array by hashing the value and using the hash as an index in the "
@@ -120,6 +129,38 @@ public sealed class Arguments : TransformInputBase
public const string UserName = "Categorical Hash Transform";
+ ///
+ /// A helper method to create for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// Number of bits to hash into. Must be between 1 and 30, inclusive.
+ /// Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.
+ /// The type of output expected.
+ public static IDataTransform Create(IHostEnvironment env,
+ IDataView input,
+ string name,
+ string source =null,
+ int hashBits = Defaults.HashBits,
+ int invertHash = Defaults.InvertHash,
+ CategoricalTransform.OutputKind outputKind = Defaults.OutputKind)
+ {
+ var args = new Arguments()
+ {
+ Column = new[] { new Column(){
+ Source = source ?? name,
+ Name = name
+ }
+ },
+ HashBits = hashBits,
+ InvertHash = invertHash,
+ OutputKind = outputKind
+ };
+ return Create(env, args, input);
+ }
+
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/CategoricalTransform.cs b/src/Microsoft.ML.Transforms/CategoricalTransform.cs
index 621bbe6c1d..dfc6849427 100644
--- a/src/Microsoft.ML.Transforms/CategoricalTransform.cs
+++ b/src/Microsoft.ML.Transforms/CategoricalTransform.cs
@@ -38,15 +38,27 @@ public static class CategoricalTransform
{
public enum OutputKind : byte
{
+ ///
+ /// Output is a bag (multi-set) vector
+ ///
[TGUI(Label = "Output is a bag (multi-set) vector")]
Bag = 1,
+ ///
+ /// Output is an indicator vector
+ ///
[TGUI(Label = "Output is an indicator vector")]
Ind = 2,
+ ///
+ /// Output is a key value
+ ///
[TGUI(Label = "Output is a key value")]
Key = 3,
+ ///
+ /// Output is binary encoded
+ ///
[TGUI(Label = "Output is binary encoded")]
Bin = 4,
}
@@ -96,6 +108,11 @@ public bool TryUnparse(StringBuilder sb)
}
}
+ private static class Defaults
+ {
+ public const OutputKind OutKind = OutputKind.Ind;
+ }
+
public sealed class Arguments : TermTransform.ArgumentsBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)]
@@ -103,7 +120,7 @@ public sealed class Arguments : TermTransform.ArgumentsBase
[Argument(ArgumentType.AtMostOnce, HelpText = "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)",
ShortName = "kind", SortOrder = 102)]
- public OutputKind OutputKind = OutputKind.Ind;
+ public OutputKind OutputKind = Defaults.OutKind;
public Arguments()
{
@@ -118,6 +135,28 @@ public Arguments()
public const string UserName = "Categorical Transform";
+ ///
+ /// A helper method to create for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// The type of output expected.
+ public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source = null, OutputKind outputKind = Defaults.OutKind)
+ {
+ var args = new Arguments()
+ {
+ Column = new[] { new Column(){
+ Source = source ?? name,
+ Name = name
+ }
+ },
+ OutputKind = outputKind
+ };
+ return Create(env, args, input);
+ }
+
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
index 72131902b5..79adda882e 100644
--- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
+++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
@@ -28,17 +28,40 @@ public static class CountFeatureSelectionTransform
public const string Summary = "Selects the slots for which the count of non-default values is greater than or equal to a threshold.";
public const string UserName = "Count Feature Selection Transform";
+ private static class Defaults
+ {
+ public const long Count = 1;
+ }
+
public sealed class Arguments : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Columns to use for feature selection", ShortName = "col", SortOrder = 1)]
public string[] Column;
[Argument(ArgumentType.Required, HelpText = "If the count of non-default values for a slot is greater than or equal to this threshold, the slot is preserved", ShortName = "c", SortOrder = 1)]
- public long Count = 1;
+ public long Count = Defaults.Count;
}
internal static string RegistrationName = "CountFeatureSelectionTransform";
+ ///
+ /// A helper method to create CountFeatureSelection transform for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// If the count of non-default values for a slot is greater than or equal to this threshold, the slot is preserved.
+ /// Columns to use for feature selection.
+ ///
+ public static IDataTransform Create(IHostEnvironment env, IDataView input, long count = Defaults.Count, params string[] columns)
+ {
+ var args = new Arguments()
+ {
+ Column = columns,
+ Count = count
+ };
+ return Create(env, args, input);
+ }
+
///
/// Create method corresponding to SignatureDataTransform.
///
diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs
index 2fee0e8ef3..fdb1d26c25 100644
--- a/src/Microsoft.ML.Transforms/GcnTransform.cs
+++ b/src/Microsoft.ML.Transforms/GcnTransform.cs
@@ -53,16 +53,25 @@ public enum NormalizerKind : byte
LInf = 3
}
+ private static class Defaults
+ {
+ public const NormalizerKind NormKind = NormalizerKind.L2Norm;
+ public const bool LpSubMean = false;
+ public const bool GcnSubMean = true;
+ public const bool UseStdDev = false;
+ public const Float Scale = 1;
+ }
+
public sealed class Arguments : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)]
public Column[] Column;
[Argument(ArgumentType.AtMostOnce, HelpText = "The norm to use to normalize each sample", ShortName = "norm", SortOrder = 1)]
- public NormalizerKind NormKind = NormalizerKind.L2Norm;
+ public NormalizerKind NormKind = Defaults.NormKind;
[Argument(ArgumentType.AtMostOnce, HelpText = "Subtract mean from each value before normalizing", SortOrder = 2)]
- public bool SubMean = false;
+ public bool SubMean = Defaults.LpSubMean;
}
public sealed class GcnArguments : TransformInputBase
@@ -71,13 +80,13 @@ public sealed class GcnArguments : TransformInputBase
public GcnColumn[] Column;
[Argument(ArgumentType.AtMostOnce, HelpText = "Subtract mean from each value before normalizing", SortOrder = 1)]
- public bool SubMean = true;
+ public bool SubMean = Defaults.GcnSubMean;
[Argument(ArgumentType.AtMostOnce, HelpText = "Normalize by standard deviation rather than L2 norm", ShortName = "useStd")]
- public bool UseStdDev = false;
+ public bool UseStdDev = Defaults.UseStdDev;
[Argument(ArgumentType.AtMostOnce, HelpText = "Scale features by this value")]
- public Float Scale = 1;
+ public Float Scale = Defaults.Scale;
}
public abstract class ColumnBase : OneToOneColumn
@@ -237,6 +246,38 @@ private static VersionInfo GetVersionInfo()
private readonly ColInfoEx[] _exes;
+ ///
+ /// A helper method to create GlobalContrastNormalizer transform for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// Subtract mean from each value before normalizing.
+ /// Normalize by standard deviation rather than L2 norm.
+ /// Scale features by this value.
+ public static IDataTransform CreateGlobalContrastNormalizer(IHostEnvironment env,
+ IDataView input,
+ string name,
+ string source = null,
+ bool subMean = Defaults.GcnSubMean,
+ bool useStdDev = Defaults.UseStdDev,
+ Float scale = Defaults.Scale)
+ {
+ var args = new GcnArguments()
+ {
+ Column = new[] { new GcnColumn(){
+ Source = source ?? name,
+ Name = name
+ }
+ },
+ SubMean = subMean,
+ UseStdDev = useStdDev,
+ Scale = scale
+ };
+ return new LpNormNormalizerTransform(env, args, input);
+ }
+
///
/// Public constructor corresponding to SignatureDataTransform.
///
@@ -263,9 +304,38 @@ public LpNormNormalizerTransform(IHostEnvironment env, GcnArguments args, IDataV
SetMetadata();
}
+ ///
+ /// A helper method to create LpNormNormalizer transform for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// /// The norm to use to normalize each sample.
+ /// Subtract mean from each value before normalizing.
+ public static IDataTransform CreateLpNormNormalizer(IHostEnvironment env,
+ IDataView input,
+ string name,
+ string source = null,
+ NormalizerKind normKind = Defaults.NormKind,
+ bool subMean = Defaults.LpSubMean)
+ {
+ var args = new Arguments()
+ {
+ Column = new[] { new Column(){
+ Source = source ?? name,
+ Name = name
+ }
+ },
+ SubMean = subMean,
+ NormKind = normKind
+ };
+ return new LpNormNormalizerTransform(env, args, input);
+ }
+
public LpNormNormalizerTransform(IHostEnvironment env, Arguments args, IDataView input)
- : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column,
- input, TestIsFloatVector)
+ : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column,
+ input, TestIsFloatVector)
{
Host.AssertNonEmpty(Infos);
Host.Assert(Infos.Length == Utils.Size(args.Column));
diff --git a/src/Microsoft.ML.Transforms/NormalizeColumn.cs b/src/Microsoft.ML.Transforms/NormalizeColumn.cs
index a5769ec90a..f6e8851f51 100644
--- a/src/Microsoft.ML.Transforms/NormalizeColumn.cs
+++ b/src/Microsoft.ML.Transforms/NormalizeColumn.cs
@@ -135,12 +135,21 @@ public bool TryUnparse(StringBuilder sb)
}
}
+ private static class Defaults
+ {
+ public const bool FixZero = true;
+ public const bool MeanVarCdf = false;
+ public const bool LogMeanVarCdf = true;
+ public const int NumBins = 1024;
+ public const int MinBinSize = 10;
+ }
+
public abstract class FixZeroArgumentsBase : ArgumentsBase
{
// REVIEW: This only allows mapping either zero or min to zero. It might make sense to allow also max, midpoint and mean to be mapped to zero.
// REVIEW: Convert this to bool? or even an enum{Auto, No, Yes}, and automatically map zero to zero when it is null/Auto.
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether to map zero to zero, preserving sparsity", ShortName = "zero")]
- public bool FixZero = true;
+ public bool FixZero = Defaults.FixZero;
}
public abstract class AffineArgumentsBase : FixZeroArgumentsBase
@@ -158,13 +167,13 @@ public sealed class MinMaxArguments : AffineArgumentsBase
public sealed class MeanVarArguments : AffineArgumentsBase
{
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether to use CDF as the output", ShortName = "cdf")]
- public bool UseCdf;
+ public bool UseCdf = Defaults.MeanVarCdf;
}
public sealed class LogMeanVarArguments : ArgumentsBase
{
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether to use CDF as the output", ShortName = "cdf")]
- public bool UseCdf = true;
+ public bool UseCdf = Defaults.LogMeanVarCdf;
[Argument(ArgumentType.Multiple, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)]
public LogNormalColumn[] Column;
@@ -179,7 +188,7 @@ public abstract class BinArgumentsBase : FixZeroArgumentsBase
[Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bins, power of 2 recommended", ShortName = "bins")]
[TGUI(Label = "Max number of bins")]
- public int NumBins = 1024;
+ public int NumBins = Defaults.NumBins;
public override OneToOneColumn[] GetColumns() => Column;
}
@@ -196,7 +205,7 @@ public sealed class SupervisedBinArguments : BinArgumentsBase
public string LabelColumn;
[Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of examples per bin")]
- public int MinBinSize = 10;
+ public int MinBinSize = Defaults.MinBinSize;
}
public const string MinMaxNormalizerSummary = "Normalizes the data based on the observed minimum and maximum values of the data.";
@@ -218,6 +227,26 @@ public sealed class SupervisedBinArguments : BinArgumentsBase
public const string BinNormalizerShortName = "Bin";
public const string SupervisedBinNormalizerShortName = "SupBin";
+ ///
+ /// A helper method to create MinMaxNormalizer transform for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ public static NormalizeTransform CreateMinMaxNormalizer(IHostEnvironment env, IDataView input, string name, string source = null)
+ {
+ var args = new MinMaxArguments()
+ {
+ Column = new[] { new AffineColumn(){
+ Source = source ?? name,
+ Name = name
+ }
+ }
+ };
+ return Create(env, args, input);
+ }
+
///
/// Public create method corresponding to SignatureDataTransform.
///
@@ -234,6 +263,32 @@ public static NormalizeTransform Create(IHostEnvironment env, MinMaxArguments ar
return func;
}
+ ///
+ /// A helper method to create MeanVarNormalizer transform for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// /// Whether to use CDF as the output.
+ public static NormalizeTransform CreateMeanVarNormalizer(IHostEnvironment env,
+ IDataView input,
+ string name,
+ string source=null,
+ bool useCdf = Defaults.MeanVarCdf)
+ {
+ var args = new MeanVarArguments()
+ {
+ Column = new[] { new AffineColumn(){
+ Source = source ?? name,
+ Name = name
+ }
+ },
+ UseCdf = useCdf
+ };
+ return Create(env, args, input);
+ }
+
///
/// Public create method corresponding to SignatureDataTransform.
///
@@ -250,6 +305,32 @@ public static NormalizeTransform Create(IHostEnvironment env, MeanVarArguments a
return func;
}
+ ///
+ /// A helper method to create LogMeanVarNormalizer transform for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// /// Whether to use CDF as the output.
+ public static NormalizeTransform CreateLogMeanVarNormalizer(IHostEnvironment env,
+ IDataView input,
+ string name,
+ string source=null,
+ bool useCdf = Defaults.LogMeanVarCdf)
+ {
+ var args = new LogMeanVarArguments()
+ {
+ Column = new[] { new LogNormalColumn(){
+ Source = source ?? name,
+ Name = name
+ }
+ },
+ UseCdf = useCdf
+ };
+ return Create(env, args, input);
+ }
+
///
/// Public create method corresponding to SignatureDataTransform.
///
@@ -266,6 +347,24 @@ public static NormalizeTransform Create(IHostEnvironment env, LogMeanVarArgument
return func;
}
+ public static NormalizeTransform CreateBinningNormalizer(IHostEnvironment env,
+ IDataView input,
+ string name,
+ string source=null,
+ int numBins = Defaults.NumBins)
+ {
+ var args = new BinArguments()
+ {
+ Column = new[] { new BinColumn(){
+ Source = source ?? name,
+ Name = name
+ }
+ },
+ NumBins = numBins
+ };
+ return Create(env, args, input);
+ }
+
///
/// Public create method corresponding to SignatureDataTransform.
///
@@ -282,6 +381,28 @@ public static NormalizeTransform Create(IHostEnvironment env, BinArguments args,
return func;
}
+ public static NormalizeTransform CreateSupervisedBinningNormalizer(IHostEnvironment env,
+ IDataView input,
+ string labelColumn,
+ string name,
+ string source = null,
+ int numBins = Defaults.NumBins,
+ int minBinSize = Defaults.MinBinSize)
+ {
+ var args = new SupervisedBinArguments()
+ {
+ Column = new[] { new BinColumn(){
+ Source = source ?? name,
+ Name = name
+ }
+ },
+ LabelColumn = labelColumn,
+ NumBins = numBins,
+ MinBinSize = minBinSize
+ };
+ return Create(env, args, input);
+ }
+
///
/// Public create method corresponding to SignatureDataTransform.
///