Skip to content

Commit

Permalink
Convert TextNormalizer to estimator (#1276)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivanidzo4ka authored Oct 18, 2018
1 parent 14dadfe commit a285f8d
Show file tree
Hide file tree
Showing 20 changed files with 761 additions and 603 deletions.
6 changes: 3 additions & 3 deletions src/Microsoft.ML.Data/Transforms/HashTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1289,10 +1289,10 @@ public override void Process()
/// </summary>
public sealed class HashEstimator : IEstimator<HashTransformer>
{
public const int NumBitsMin = 1;
public const int NumBitsLim = 32;
internal const int NumBitsMin = 1;
internal const int NumBitsLim = 32;

public static class Defaults
internal static class Defaults
{
public const int HashBits = NumBitsLim - 1;
public const uint Seed = 314489979;
Expand Down
19 changes: 8 additions & 11 deletions src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -208,17 +208,14 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData

env.CheckValue(args.Column, nameof(args.Column));
var cols = new ColumnInfo[args.Column.Length];
using (var ch = env.Start("ValidateArgs"))
for (int i = 0; i < cols.Length; i++)
{
for (int i = 0; i < cols.Length; i++)
{
var item = args.Column[i];
var item = args.Column[i];

cols[i] = new ColumnInfo(item.Source ?? item.Name,
item.Name,
item.Bag ?? args.Bag);
};
}
cols[i] = new ColumnInfo(item.Source ?? item.Name,
item.Name,
item.Bag ?? args.Bag);
};
return new KeyToVectorTransform(env, cols).MakeDataTransform(input);
}

Expand Down Expand Up @@ -727,7 +724,7 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src
// Note that one input feature got expended to an one-hot vector.
opType = "ReduceSum";
var reduceNode = ctx.CreateNode(opType, encodedVariableName, dstVariableName, ctx.GetNodeName(opType), "");
reduceNode.AddAttribute("axes", new long[] { shape.Count - 1});
reduceNode.AddAttribute("axes", new long[] { shape.Count - 1 });
reduceNode.AddAttribute("keepdims", 0);
}
return true;
Expand All @@ -737,7 +734,7 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src

public sealed class KeyToVectorEstimator : TrivialEstimator<KeyToVectorTransform>
{
public static class Defaults
internal static class Defaults
{
public const bool Bag = false;
}
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Legacy/CSharpApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16789,7 +16789,7 @@ public enum TextTransformLanguage
Japanese = 7
}

public enum TextNormalizerTransformCaseNormalizationMode
public enum TextNormalizerEstimatorCaseNormalizationMode
{
Lower = 0,
Upper = 1,
Expand Down Expand Up @@ -16877,7 +16877,7 @@ public void AddColumn(string name, params string[] source)
/// <summary>
/// Casing text using the rules of the invariant culture.
/// </summary>
public TextNormalizerTransformCaseNormalizationMode TextCase { get; set; } = TextNormalizerTransformCaseNormalizationMode.Lower;
public TextNormalizerEstimatorCaseNormalizationMode TextCase { get; set; } = TextNormalizerEstimatorCaseNormalizationMode.Lower;

/// <summary>
/// Whether to keep diacritical marks or remove them.
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/RffTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ private void TransformFeatures(ref VBuffer<float> src, ref VBuffer<float> dst, T
/// </summary>
public sealed class RffEstimator : IEstimator<RffTransform>
{
public static class Defaults
internal static class Defaults
{
public const int NewDim = 1000;
public const bool UseSin = false;
Expand Down
Loading

0 comments on commit a285f8d

Please sign in to comment.