Skip to content

Convert TextNormalizer to estimator #1276

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/Microsoft.ML.Data/Transforms/HashTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1289,10 +1289,10 @@ public override void Process()
/// </summary>
public sealed class HashEstimator : IEstimator<HashTransformer>
{
public const int NumBitsMin = 1;
public const int NumBitsLim = 32;
internal const int NumBitsMin = 1;
internal const int NumBitsLim = 32;

public static class Defaults
internal static class Defaults
{
public const int HashBits = NumBitsLim - 1;
public const uint Seed = 314489979;
Expand Down
19 changes: 8 additions & 11 deletions src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -208,17 +208,14 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData

env.CheckValue(args.Column, nameof(args.Column));
var cols = new ColumnInfo[args.Column.Length];
using (var ch = env.Start("ValidateArgs"))
for (int i = 0; i < cols.Length; i++)
{
for (int i = 0; i < cols.Length; i++)
{
var item = args.Column[i];
var item = args.Column[i];

cols[i] = new ColumnInfo(item.Source ?? item.Name,
item.Name,
item.Bag ?? args.Bag);
};
}
cols[i] = new ColumnInfo(item.Source ?? item.Name,
item.Name,
item.Bag ?? args.Bag);
};
return new KeyToVectorTransform(env, cols).MakeDataTransform(input);
}

Expand Down Expand Up @@ -727,7 +724,7 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src
// Note that one input feature got expended to an one-hot vector.
opType = "ReduceSum";
var reduceNode = ctx.CreateNode(opType, encodedVariableName, dstVariableName, ctx.GetNodeName(opType), "");
reduceNode.AddAttribute("axes", new long[] { shape.Count - 1});
reduceNode.AddAttribute("axes", new long[] { shape.Count - 1 });
reduceNode.AddAttribute("keepdims", 0);
}
return true;
Expand All @@ -737,7 +734,7 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src

public sealed class KeyToVectorEstimator : TrivialEstimator<KeyToVectorTransform>
{
public static class Defaults
internal static class Defaults
{
public const bool Bag = false;
}
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Legacy/CSharpApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16789,7 +16789,7 @@ public enum TextTransformLanguage
Japanese = 7
}

public enum TextNormalizerTransformCaseNormalizationMode
public enum TextNormalizerEstimatorCaseNormalizationMode
{
Lower = 0,
Upper = 1,
Expand Down Expand Up @@ -16877,7 +16877,7 @@ public void AddColumn(string name, params string[] source)
/// <summary>
/// Casing text using the rules of the invariant culture.
/// </summary>
public TextNormalizerTransformCaseNormalizationMode TextCase { get; set; } = TextNormalizerTransformCaseNormalizationMode.Lower;
public TextNormalizerEstimatorCaseNormalizationMode TextCase { get; set; } = TextNormalizerEstimatorCaseNormalizationMode.Lower;

/// <summary>
/// Whether to keep diacritical marks or remove them.
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/RffTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ private void TransformFeatures(ref VBuffer<float> src, ref VBuffer<float> dst, T
/// </summary>
public sealed class RffEstimator : IEstimator<RffTransform>
{
public static class Defaults
internal static class Defaults
{
public const int NewDim = 1000;
public const bool UseSin = false;
Expand Down
Loading