Skip to content

Commit

Permalink
Fix Codegen for columnConvert and ValueToKeyMapping transform and add…
Browse files Browse the repository at this point in the history
… individual transform tests (dotnet#95)

* Added sequential grouping of columns

* reverted the file

* fix usings for type convert

* added transforms tests

* review comments
  • Loading branch information
srsaggam authored and Dmitry-A committed Aug 22, 2019
1 parent c7165e1 commit 0e5feee
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 18 deletions.
170 changes: 154 additions & 16 deletions src/mlnet.Test/CodeGenTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ public void ClassLabelGenerationBasicTest()
};

var result = (new TextLoader.Arguments()
{
Column = columns,
AllowQuoting = false,
AllowSparse = false,
Separators = new[] { ',' },
HasHeader = true,
TrimWhitespace = true
}, purposes);
{
Column = columns,
AllowQuoting = false,
AllowSparse = false,
Separators = new[] { ',' },
HasHeader = true,
TrimWhitespace = true
}, purposes);

CodeGenerator codeGenerator = new CodeGenerator(null, result);
var actual = codeGenerator.GenerateClassLabels();
Expand All @@ -128,14 +128,14 @@ public void ColumnGenerationTest()
};

var result = (new TextLoader.Arguments()
{
Column = columns,
AllowQuoting = false,
AllowSparse = false,
Separators = new[] { ',' },
HasHeader = true,
TrimWhitespace = true
}, purposes);
{
Column = columns,
AllowQuoting = false,
AllowSparse = false,
Separators = new[] { ',' },
HasHeader = true,
TrimWhitespace = true
}, purposes);

var context = new MLContext();
var elementProperties = new Dictionary<string, object>();
Expand Down Expand Up @@ -170,5 +170,143 @@ public void TrainerComplexParameterTest()

}

#region Transform Tests
[TestMethod]
public void OneHotEncodingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();//categorical
PipelineNode node = new PipelineNode("OneHotEncoding", PipelineNodeType.Transform, new string[] { "categorical_column_1" }, new string[] { "categorical_column_1" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "Categorical.OneHotEncoding(new []{new OneHotEncodingEstimator.ColumnInfo(\"categorical_column_1\",\"categorical_column_1\")})";
var expectedUsings = "using Microsoft.ML.Transforms.Categorical;\r\n";
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

[TestMethod]
public void NormalizingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();
PipelineNode node = new PipelineNode("Normalizing", PipelineNodeType.Transform, new string[] { "numeric_column_1" }, new string[] { "numeric_column_1_copy" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "Normalize(\"numeric_column_1_copy\",\"numeric_column_1\")";
string expectedUsings = null;
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

[TestMethod]
public void ColumnConcatenatingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();
PipelineNode node = new PipelineNode("ColumnConcatenating", PipelineNodeType.Transform, new string[] { "numeric_column_1", "numeric_column_2" }, new string[] { "Features" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "Concatenate(\"Features\",new []{\"numeric_column_1\",\"numeric_column_2\"})";
string expectedUsings = null;
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

[TestMethod]
public void ColumnCopyingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();//nume to num feature 2
PipelineNode node = new PipelineNode("ColumnCopying", PipelineNodeType.Transform, new string[] { "numeric_column_1" }, new string[] { "numeric_column_2" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "CopyColumns(\"numeric_column_2\",\"numeric_column_1\")";
string expectedUsings = null;
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

[TestMethod]
public void MissingValueIndicatingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();//numeric feature
PipelineNode node = new PipelineNode("MissingValueIndicating", PipelineNodeType.Transform, new string[] { "numeric_column_1" }, new string[] { "numeric_column_1" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "IndicateMissingValues(new []{(\"numeric_column_1\",\"numeric_column_1\")})";
string expectedUsings = null;
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

[TestMethod]
public void OneHotHashEncodingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();
PipelineNode node = new PipelineNode("OneHotHashEncoding", PipelineNodeType.Transform, new string[] { "Categorical_column_1" }, new string[] { "Categorical_column_1" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "Categorical.OneHotHashEncoding(new []{new OneHotHashEncodingEstimator.ColumnInfo(\"Categorical_column_1\",\"Categorical_column_1\")})";
var expectedUsings = "using Microsoft.ML.Transforms.Categorical;\r\n";
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

[TestMethod]
public void TextFeaturizingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();
PipelineNode node = new PipelineNode("TextFeaturizing", PipelineNodeType.Transform, new string[] { "Text_column_1" }, new string[] { "Text_column_1" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "Text.FeaturizeText(\"Text_column_1\",\"Text_column_1\")";
string expectedUsings = null;
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

[TestMethod]
public void TypeConvertingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();
PipelineNode node = new PipelineNode("TypeConverting", PipelineNodeType.Transform, new string[] { "I4_column_1" }, new string[] { "R4_column_1" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "Conversion.ConvertType(new []{new TypeConvertingTransformer.ColumnInfo(\"R4_column_1\",DataKind.R4,\"I4_column_1\")})";
string expectedUsings = "using Microsoft.ML.Transforms.Conversions;\r\n";
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

[TestMethod]
public void ValueToKeyMappingTest()
{
var context = new MLContext();
var elementProperties = new Dictionary<string, object>();
PipelineNode node = new PipelineNode("ValueToKeyMapping", PipelineNodeType.Transform, new string[] { "Label" }, new string[] { "Label" }, elementProperties);
Pipeline pipeline = new Pipeline(new PipelineNode[] { node });
CodeGenerator codeGenerator = new CodeGenerator(pipeline, (null, null));
var actual = codeGenerator.GenerateTransformsAndUsings();
string expectedTransform = "Conversion.MapValueToKey(\"Label\",\"Label\")";
var expectedUsings = "using Microsoft.ML.Transforms.Conversions;\r\n";
Assert.AreEqual(expectedTransform, actual[0].Item1);
Assert.AreEqual(expectedUsings, actual[0].Item2);
}

#endregion

}
}
4 changes: 2 additions & 2 deletions src/mlnet/CodeGenerator/TransformGenerators.cs
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ public TypeConverting(PipelineNode node) : base(node)

internal override string MethodName => "Conversion.ConvertType";

internal override string Usings => null;
internal override string Usings => "using Microsoft.ML.Transforms.Conversions;\r\n";

private string ArgumentsName = "TypeConvertingTransformer.ColumnInfo";

Expand Down Expand Up @@ -271,7 +271,7 @@ public ValueToKeyMapping(PipelineNode node) : base(node)

internal override string MethodName => "Conversion.MapValueToKey";

internal override string Usings => null;
internal override string Usings => "using Microsoft.ML.Transforms.Conversions;\r\n";

public override string GenerateTransformer()
{
Expand Down

0 comments on commit 0e5feee

Please sign in to comment.