diff --git a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs index 544bce0aeb..a51f502ecd 100644 --- a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs @@ -90,6 +90,19 @@ public bool TryUnparse(StringBuilder sb) public sealed class Arguments : TransformInputBase { + public Arguments() + { + } + + public Arguments(string name, params string[] source) + { + Column = new[] { new Column() + { + Name = name, + Source = source + }}; + } + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:srcs)", ShortName = "col", SortOrder = 1)] public Column[] Column; } @@ -527,6 +540,18 @@ private static VersionInfo GetVersionInfo() public override ISchema Schema => _bindings; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Input columns to concatenate. + public ConcatTransform(IHostEnvironment env, IDataView input, string name, params string[] source) + : this(env, new Arguments(name, source), input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Data/Transforms/CopyColumnsTransform.cs b/src/Microsoft.ML.Data/Transforms/CopyColumnsTransform.cs index f365dd9e98..2729a48e3e 100644 --- a/src/Microsoft.ML.Data/Transforms/CopyColumnsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/CopyColumnsTransform.cs @@ -64,6 +64,18 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "CopyColumns"; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be copied. + public CopyColumnsTransform(IHostEnvironment env, IDataView input, string name, string source) + : this(env, new Arguments(){ Column = new[] { new Column() { Source = source, Name = name }}}, input) + { + } + public CopyColumnsTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, input, null) { diff --git a/src/Microsoft.ML.Data/Transforms/DropColumnsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropColumnsTransform.cs index 502e6f395d..3e15199ff7 100644 --- a/src/Microsoft.ML.Data/Transforms/DropColumnsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/DropColumnsTransform.cs @@ -237,6 +237,17 @@ private static VersionInfo GetVersionInfo() private const string DropRegistrationName = "DropColumns"; private const string KeepRegistrationName = "KeepColumns"; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the columns to be dropped. + public DropColumnsTransform(IHostEnvironment env, IDataView input, params string[] columnsToDrop) + :this(env, new Arguments() { Column = columnsToDrop }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// @@ -383,4 +394,17 @@ public ValueGetter GetGetter(int col) } } } + + public class KeepColumnsTransform + { + /// + /// A helper method to create for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the columns to be kept. All other columns will be removed. + /// + public static IDataTransform Create(IHostEnvironment env, IDataView input, params string[] columnsToKeep) + => new DropColumnsTransform(env, new DropColumnsTransform.KeepArguments() { Column = columnsToKeep }, input); + } } diff --git a/src/Microsoft.ML.Data/Transforms/NAFilter.cs b/src/Microsoft.ML.Data/Transforms/NAFilter.cs index 96c2111366..7b94ff1e07 100644 --- a/src/Microsoft.ML.Data/Transforms/NAFilter.cs +++ b/src/Microsoft.ML.Data/Transforms/NAFilter.cs @@ -28,13 +28,18 @@ namespace Microsoft.ML.Runtime.Data { public sealed class NAFilter : FilterBase { + private static class Defaults + { + public const bool Complement = false; + } + public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Column", ShortName = "col", SortOrder = 1)] public string[] Column; [Argument(ArgumentType.Multiple, HelpText = "If true, keep only rows that contain NA values, and filter the rest.")] - public bool Complement; + public bool Complement = Defaults.Complement; } private sealed class ColInfo @@ -72,6 +77,18 @@ private static VersionInfo GetVersionInfo() private readonly bool _complement; private const string RegistrationName = "MissingValueFilter"; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// If true, keep only rows that contain NA values, and filter the rest. + /// Name of the columns. Only these columns will be used to filter rows having 'NA' values. + public NAFilter(IHostEnvironment env, IDataView input, bool complement = Defaults.Complement, params string[] columns) + : this(env, new Arguments() { Column = columns, Complement = complement }, input) + { + } + public NAFilter(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { diff --git a/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs b/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs index b7f03deeec..72e7030604 100644 --- a/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs +++ b/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs @@ -25,20 +25,27 @@ namespace Microsoft.ML.Runtime.Data /// public sealed class BootstrapSampleTransform : FilterBase { + private static class Defaults + { + public const bool Complement = false; + public const bool ShuffleInput = true; + public const int PoolSize = 1000; + } + public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.AtMostOnce, HelpText = "Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform.", ShortName = "comp")] - public bool Complement; + public bool Complement = Defaults.Complement; [Argument(ArgumentType.AtMostOnce, HelpText = "The random seed. If unspecified random state will be instead derived from the environment.")] public uint? Seed; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether we should attempt to shuffle the source data. By default on, but can be turned off for efficiency.", ShortName = "si")] - public bool ShuffleInput = true; + public bool ShuffleInput = Defaults.ShuffleInput; [Argument(ArgumentType.LastOccurenceWins, HelpText = "When shuffling the output, the number of output rows to keep in that pool. Note that shuffling of output is completely distinct from shuffling of input.", ShortName = "pool")] - public int PoolSize = 1000; + public int PoolSize = Defaults.PoolSize; } internal const string Summary = "Approximate bootstrap sampling."; @@ -76,6 +83,25 @@ public BootstrapSampleTransform(IHostEnvironment env, Arguments args, IDataView _poolSize = args.PoolSize; } + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform. + /// The random seed. If unspecified random state will be instead derived from the environment. + /// Whether we should attempt to shuffle the source data. By default on, but can be turned off for efficiency. + /// When shuffling the output, the number of output rows to keep in that pool. Note that shuffling of output is completely distinct from shuffling of input. + public BootstrapSampleTransform(IHostEnvironment env, + IDataView input, + bool complement = Defaults.Complement, + uint? seed = null, + bool shuffleInput = Defaults.ShuffleInput, + int poolSize = Defaults.PoolSize) + : this(env, new Arguments() { Complement = complement, Seed = seed, ShuffleInput = shuffleInput, PoolSize = poolSize }, input) + { + } + private BootstrapSampleTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { diff --git a/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs b/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs index a30b9a6c84..5aec5658b2 100644 --- a/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs +++ b/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs @@ -86,6 +86,15 @@ public bool TryUnparse(StringBuilder sb) } } + private static class Defaults + { + public const int HashBits = 16; + public const uint Seed = 314489979; + public const bool Ordered = true; + public const int InvertHash = 0; + public const CategoricalTransform.OutputKind OutputKind = CategoricalTransform.OutputKind.Bag; + } + /// /// This class is a merger of and /// with join option removed @@ -97,22 +106,22 @@ public sealed class Arguments : TransformInputBase [Argument(ArgumentType.AtMostOnce, HelpText = "Number of bits to hash into. Must be between 1 and 30, inclusive.", ShortName = "bits", SortOrder = 2)] - public int HashBits = 16; + public int HashBits = Defaults.HashBits; [Argument(ArgumentType.AtMostOnce, HelpText = "Hashing seed")] - public uint Seed = 314489979; + public uint Seed = Defaults.Seed; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether the position of each term should be included in the hash", ShortName = "ord")] - public bool Ordered = true; + public bool Ordered = Defaults.Ordered; [Argument(ArgumentType.AtMostOnce, HelpText = "Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.", ShortName = "ih")] - public int InvertHash; + public int InvertHash = Defaults.InvertHash; [Argument(ArgumentType.AtMostOnce, HelpText = "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)", ShortName = "kind", SortOrder = 102)] - public CategoricalTransform.OutputKind OutputKind = CategoricalTransform.OutputKind.Bag; + public CategoricalTransform.OutputKind OutputKind = Defaults.OutputKind; } internal const string Summary = "Converts the categorical value into an indicator array by hashing the value and using the hash as an index in the " @@ -120,6 +129,38 @@ public sealed class Arguments : TransformInputBase public const string UserName = "Categorical Hash Transform"; + /// + /// A helper method to create for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// Number of bits to hash into. Must be between 1 and 30, inclusive. + /// Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit. + /// The type of output expected. + public static IDataTransform Create(IHostEnvironment env, + IDataView input, + string name, + string source =null, + int hashBits = Defaults.HashBits, + int invertHash = Defaults.InvertHash, + CategoricalTransform.OutputKind outputKind = Defaults.OutputKind) + { + var args = new Arguments() + { + Column = new[] { new Column(){ + Source = source ?? name, + Name = name + } + }, + HashBits = hashBits, + InvertHash = invertHash, + OutputKind = outputKind + }; + return Create(env, args, input); + } + public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML.Transforms/CategoricalTransform.cs b/src/Microsoft.ML.Transforms/CategoricalTransform.cs index 621bbe6c1d..dfc6849427 100644 --- a/src/Microsoft.ML.Transforms/CategoricalTransform.cs +++ b/src/Microsoft.ML.Transforms/CategoricalTransform.cs @@ -38,15 +38,27 @@ public static class CategoricalTransform { public enum OutputKind : byte { + /// + /// Output is a bag (multi-set) vector + /// [TGUI(Label = "Output is a bag (multi-set) vector")] Bag = 1, + /// + /// Output is an indicator vector + /// [TGUI(Label = "Output is an indicator vector")] Ind = 2, + /// + /// Output is a key value + /// [TGUI(Label = "Output is a key value")] Key = 3, + /// + /// Output is binary encoded + /// [TGUI(Label = "Output is binary encoded")] Bin = 4, } @@ -96,6 +108,11 @@ public bool TryUnparse(StringBuilder sb) } } + private static class Defaults + { + public const OutputKind OutKind = OutputKind.Ind; + } + public sealed class Arguments : TermTransform.ArgumentsBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] @@ -103,7 +120,7 @@ public sealed class Arguments : TermTransform.ArgumentsBase [Argument(ArgumentType.AtMostOnce, HelpText = "Output kind: Bag (multi-set vector), Ind (indicator vector), or Key (index)", ShortName = "kind", SortOrder = 102)] - public OutputKind OutputKind = OutputKind.Ind; + public OutputKind OutputKind = Defaults.OutKind; public Arguments() { @@ -118,6 +135,28 @@ public Arguments() public const string UserName = "Categorical Transform"; + /// + /// A helper method to create for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// The type of output expected. + public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source = null, OutputKind outputKind = Defaults.OutKind) + { + var args = new Arguments() + { + Column = new[] { new Column(){ + Source = source ?? name, + Name = name + } + }, + OutputKind = outputKind + }; + return Create(env, args, input); + } + public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs index 72131902b5..79adda882e 100644 --- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs @@ -28,17 +28,40 @@ public static class CountFeatureSelectionTransform public const string Summary = "Selects the slots for which the count of non-default values is greater than or equal to a threshold."; public const string UserName = "Count Feature Selection Transform"; + private static class Defaults + { + public const long Count = 1; + } + public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Columns to use for feature selection", ShortName = "col", SortOrder = 1)] public string[] Column; [Argument(ArgumentType.Required, HelpText = "If the count of non-default values for a slot is greater than or equal to this threshold, the slot is preserved", ShortName = "c", SortOrder = 1)] - public long Count = 1; + public long Count = Defaults.Count; } internal static string RegistrationName = "CountFeatureSelectionTransform"; + /// + /// A helper method to create CountFeatureSelection transform for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// If the count of non-default values for a slot is greater than or equal to this threshold, the slot is preserved. + /// Columns to use for feature selection. + /// + public static IDataTransform Create(IHostEnvironment env, IDataView input, long count = Defaults.Count, params string[] columns) + { + var args = new Arguments() + { + Column = columns, + Count = count + }; + return Create(env, args, input); + } + /// /// Create method corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 2fee0e8ef3..fdb1d26c25 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -53,16 +53,25 @@ public enum NormalizerKind : byte LInf = 3 } + private static class Defaults + { + public const NormalizerKind NormKind = NormalizerKind.L2Norm; + public const bool LpSubMean = false; + public const bool GcnSubMean = true; + public const bool UseStdDev = false; + public const Float Scale = 1; + } + public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] public Column[] Column; [Argument(ArgumentType.AtMostOnce, HelpText = "The norm to use to normalize each sample", ShortName = "norm", SortOrder = 1)] - public NormalizerKind NormKind = NormalizerKind.L2Norm; + public NormalizerKind NormKind = Defaults.NormKind; [Argument(ArgumentType.AtMostOnce, HelpText = "Subtract mean from each value before normalizing", SortOrder = 2)] - public bool SubMean = false; + public bool SubMean = Defaults.LpSubMean; } public sealed class GcnArguments : TransformInputBase @@ -71,13 +80,13 @@ public sealed class GcnArguments : TransformInputBase public GcnColumn[] Column; [Argument(ArgumentType.AtMostOnce, HelpText = "Subtract mean from each value before normalizing", SortOrder = 1)] - public bool SubMean = true; + public bool SubMean = Defaults.GcnSubMean; [Argument(ArgumentType.AtMostOnce, HelpText = "Normalize by standard deviation rather than L2 norm", ShortName = "useStd")] - public bool UseStdDev = false; + public bool UseStdDev = Defaults.UseStdDev; [Argument(ArgumentType.AtMostOnce, HelpText = "Scale features by this value")] - public Float Scale = 1; + public Float Scale = Defaults.Scale; } public abstract class ColumnBase : OneToOneColumn @@ -237,6 +246,38 @@ private static VersionInfo GetVersionInfo() private readonly ColInfoEx[] _exes; + /// + /// A helper method to create GlobalContrastNormalizer transform for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// Subtract mean from each value before normalizing. + /// Normalize by standard deviation rather than L2 norm. + /// Scale features by this value. + public static IDataTransform CreateGlobalContrastNormalizer(IHostEnvironment env, + IDataView input, + string name, + string source = null, + bool subMean = Defaults.GcnSubMean, + bool useStdDev = Defaults.UseStdDev, + Float scale = Defaults.Scale) + { + var args = new GcnArguments() + { + Column = new[] { new GcnColumn(){ + Source = source ?? name, + Name = name + } + }, + SubMean = subMean, + UseStdDev = useStdDev, + Scale = scale + }; + return new LpNormNormalizerTransform(env, args, input); + } + /// /// Public constructor corresponding to SignatureDataTransform. /// @@ -263,9 +304,38 @@ public LpNormNormalizerTransform(IHostEnvironment env, GcnArguments args, IDataV SetMetadata(); } + /// + /// A helper method to create LpNormNormalizer transform for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// /// The norm to use to normalize each sample. + /// Subtract mean from each value before normalizing. + public static IDataTransform CreateLpNormNormalizer(IHostEnvironment env, + IDataView input, + string name, + string source = null, + NormalizerKind normKind = Defaults.NormKind, + bool subMean = Defaults.LpSubMean) + { + var args = new Arguments() + { + Column = new[] { new Column(){ + Source = source ?? name, + Name = name + } + }, + SubMean = subMean, + NormKind = normKind + }; + return new LpNormNormalizerTransform(env, args, input); + } + public LpNormNormalizerTransform(IHostEnvironment env, Arguments args, IDataView input) - : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, - input, TestIsFloatVector) + : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, + input, TestIsFloatVector) { Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(args.Column)); diff --git a/src/Microsoft.ML.Transforms/NormalizeColumn.cs b/src/Microsoft.ML.Transforms/NormalizeColumn.cs index a5769ec90a..f6e8851f51 100644 --- a/src/Microsoft.ML.Transforms/NormalizeColumn.cs +++ b/src/Microsoft.ML.Transforms/NormalizeColumn.cs @@ -135,12 +135,21 @@ public bool TryUnparse(StringBuilder sb) } } + private static class Defaults + { + public const bool FixZero = true; + public const bool MeanVarCdf = false; + public const bool LogMeanVarCdf = true; + public const int NumBins = 1024; + public const int MinBinSize = 10; + } + public abstract class FixZeroArgumentsBase : ArgumentsBase { // REVIEW: This only allows mapping either zero or min to zero. It might make sense to allow also max, midpoint and mean to be mapped to zero. // REVIEW: Convert this to bool? or even an enum{Auto, No, Yes}, and automatically map zero to zero when it is null/Auto. [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to map zero to zero, preserving sparsity", ShortName = "zero")] - public bool FixZero = true; + public bool FixZero = Defaults.FixZero; } public abstract class AffineArgumentsBase : FixZeroArgumentsBase @@ -158,13 +167,13 @@ public sealed class MinMaxArguments : AffineArgumentsBase public sealed class MeanVarArguments : AffineArgumentsBase { [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to use CDF as the output", ShortName = "cdf")] - public bool UseCdf; + public bool UseCdf = Defaults.MeanVarCdf; } public sealed class LogMeanVarArguments : ArgumentsBase { [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to use CDF as the output", ShortName = "cdf")] - public bool UseCdf = true; + public bool UseCdf = Defaults.LogMeanVarCdf; [Argument(ArgumentType.Multiple, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] public LogNormalColumn[] Column; @@ -179,7 +188,7 @@ public abstract class BinArgumentsBase : FixZeroArgumentsBase [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bins, power of 2 recommended", ShortName = "bins")] [TGUI(Label = "Max number of bins")] - public int NumBins = 1024; + public int NumBins = Defaults.NumBins; public override OneToOneColumn[] GetColumns() => Column; } @@ -196,7 +205,7 @@ public sealed class SupervisedBinArguments : BinArgumentsBase public string LabelColumn; [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of examples per bin")] - public int MinBinSize = 10; + public int MinBinSize = Defaults.MinBinSize; } public const string MinMaxNormalizerSummary = "Normalizes the data based on the observed minimum and maximum values of the data."; @@ -218,6 +227,26 @@ public sealed class SupervisedBinArguments : BinArgumentsBase public const string BinNormalizerShortName = "Bin"; public const string SupervisedBinNormalizerShortName = "SupBin"; + /// + /// A helper method to create MinMaxNormalizer transform for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public static NormalizeTransform CreateMinMaxNormalizer(IHostEnvironment env, IDataView input, string name, string source = null) + { + var args = new MinMaxArguments() + { + Column = new[] { new AffineColumn(){ + Source = source ?? name, + Name = name + } + } + }; + return Create(env, args, input); + } + /// /// Public create method corresponding to SignatureDataTransform. /// @@ -234,6 +263,32 @@ public static NormalizeTransform Create(IHostEnvironment env, MinMaxArguments ar return func; } + /// + /// A helper method to create MeanVarNormalizer transform for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// /// Whether to use CDF as the output. + public static NormalizeTransform CreateMeanVarNormalizer(IHostEnvironment env, + IDataView input, + string name, + string source=null, + bool useCdf = Defaults.MeanVarCdf) + { + var args = new MeanVarArguments() + { + Column = new[] { new AffineColumn(){ + Source = source ?? name, + Name = name + } + }, + UseCdf = useCdf + }; + return Create(env, args, input); + } + /// /// Public create method corresponding to SignatureDataTransform. /// @@ -250,6 +305,32 @@ public static NormalizeTransform Create(IHostEnvironment env, MeanVarArguments a return func; } + /// + /// A helper method to create LogMeanVarNormalizer transform for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// /// Whether to use CDF as the output. + public static NormalizeTransform CreateLogMeanVarNormalizer(IHostEnvironment env, + IDataView input, + string name, + string source=null, + bool useCdf = Defaults.LogMeanVarCdf) + { + var args = new LogMeanVarArguments() + { + Column = new[] { new LogNormalColumn(){ + Source = source ?? name, + Name = name + } + }, + UseCdf = useCdf + }; + return Create(env, args, input); + } + /// /// Public create method corresponding to SignatureDataTransform. /// @@ -266,6 +347,24 @@ public static NormalizeTransform Create(IHostEnvironment env, LogMeanVarArgument return func; } + public static NormalizeTransform CreateBinningNormalizer(IHostEnvironment env, + IDataView input, + string name, + string source=null, + int numBins = Defaults.NumBins) + { + var args = new BinArguments() + { + Column = new[] { new BinColumn(){ + Source = source ?? name, + Name = name + } + }, + NumBins = numBins + }; + return Create(env, args, input); + } + /// /// Public create method corresponding to SignatureDataTransform. /// @@ -282,6 +381,28 @@ public static NormalizeTransform Create(IHostEnvironment env, BinArguments args, return func; } + public static NormalizeTransform CreateSupervisedBinningNormalizer(IHostEnvironment env, + IDataView input, + string labelColumn, + string name, + string source = null, + int numBins = Defaults.NumBins, + int minBinSize = Defaults.MinBinSize) + { + var args = new SupervisedBinArguments() + { + Column = new[] { new BinColumn(){ + Source = source ?? name, + Name = name + } + }, + LabelColumn = labelColumn, + NumBins = numBins, + MinBinSize = minBinSize + }; + return Create(env, args, input); + } + /// /// Public create method corresponding to SignatureDataTransform. ///