From 3b88d2f61fd3ba38f11af42b01f3b55c103f52a6 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Sat, 5 May 2018 22:10:42 -0700 Subject: [PATCH 01/17] Code generated loader API and install it at the backend pipeline loader API. --- .../DataLoadSave/Text/TextLoader.cs | 2 +- .../EntryPoints/InputBase.cs | 1 + src/Microsoft.ML/CSharpApi.cs | 212 +++++++++++++++++- src/Microsoft.ML/Data/TextLoader.cs | 100 +++++++++ .../Runtime/EntryPoints/ImportTextData.cs | 23 +- .../Internal/Tools/CSharpApiGenerator.cs | 55 +++++ src/Microsoft.ML/TextLoader.cs | 125 ----------- .../UnitTests/TestCSharpApi.cs | 10 +- .../UnitTests/TestEntryPoints.cs | 38 ++-- .../Microsoft.ML.TestFramework/ModelHelper.cs | 4 +- test/Microsoft.ML.Tests/CSharpCodeGen.cs | 3 +- .../Scenario3_SentimentPrediction.cs | 59 ++++- .../Scenario_TrainPredictionModel.cs | 1 + .../Scenarios/TrainAndPredictIrisModelTest.cs | 1 + test/Microsoft.ML.Tests/TextLoaderTests.cs | 1 + 15 files changed, 476 insertions(+), 159 deletions(-) create mode 100644 src/Microsoft.ML/Data/TextLoader.cs delete mode 100644 src/Microsoft.ML/TextLoader.cs diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 3867b18f26..40519b39fd 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -317,7 +317,7 @@ public bool IsValid() } } - public sealed class Arguments : ArgumentsCore + public class Arguments : ArgumentsCore { [Argument(ArgumentType.AtMostOnce, HelpText = "Use separate parsing threads?", ShortName = "threads", Hide = true)] public bool UseThreads = true; diff --git a/src/Microsoft.ML.Data/EntryPoints/InputBase.cs b/src/Microsoft.ML.Data/EntryPoints/InputBase.cs index 57a7c9120f..a787d43cb9 100644 --- a/src/Microsoft.ML.Data/EntryPoints/InputBase.cs +++ b/src/Microsoft.ML.Data/EntryPoints/InputBase.cs @@ -191,6 +191,7 @@ public static TOut Train(IHost host, TArg input, /// public static class CommonInputs { + /// /// Interface that all API transform input classes will implement. /// diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 46002c5abf..612fc94e36 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -22,6 +22,18 @@ namespace Runtime { public sealed partial class Experiment { + public Microsoft.ML.Data.CustomTextLoader.Output Add(Microsoft.ML.Data.CustomTextLoader input) + { + var output = new Microsoft.ML.Data.CustomTextLoader.Output(); + Add(input, output); + return output; + } + + public void Add(Microsoft.ML.Data.CustomTextLoader input, Microsoft.ML.Data.CustomTextLoader.Output output) + { + _jsonNodes.Add(Serialize("Data.CustomTextLoader", input, output)); + } + public Microsoft.ML.Data.IDataViewArrayConverter.Output Add(Microsoft.ML.Data.IDataViewArrayConverter input) { var output = new Microsoft.ML.Data.IDataViewArrayConverter.Output(); @@ -1236,6 +1248,38 @@ public void Add(Microsoft.ML.Transforms.WordTokenizer input, Microsoft.ML.Transf } } + namespace Data + { + + /// + /// Import a dataset from a text file + /// + public sealed partial class CustomTextLoader + { + + + /// + /// Location of the input file + /// + public Var InputFile { get; set; } = new Var(); + + /// + /// Custom schema to use for parsing + /// + public string CustomSchema { get; set; } + + + public sealed class Output + { + /// + /// The resulting data view + /// + public Var Data { get; set; } = new Var(); + + } + } + } + namespace Data { @@ -1293,12 +1337,174 @@ public sealed class Output namespace Data { + public sealed class TextLoaderArguments + { + /// + /// Use separate parsing threads? + /// + public bool UseThreads { get; set; } = true; + + /// + /// File containing a header with feature names. If specified, header defined in the data file (header+) is ignored. + /// + public string HeaderFile { get; set; } + + /// + /// Maximum number of rows to produce + /// + public long? MaxRows { get; set; } + + /// + /// Whether the input may include quoted values, which can contain separator characters, colons, and distinguish empty values from missing values. When true, consecutive separators denote a missing value and an empty value is denoted by "". When false, consecutive separators denote an empty value. + /// + public bool AllowQuoting { get; set; } = true; + + /// + /// Whether the input may include sparse representations + /// + public bool AllowSparse { get; set; } = true; + + /// + /// Number of source columns in the text data. Default is that sparse rows contain their size information. + /// + public int? InputSize { get; set; } + + /// + /// Source column separator. Options: tab, space, comma, single character + /// + public string Separator { get; set; } = "tab"; + + /// + /// Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40 + /// + public TextLoaderColumn[] Column { get; set; } + + /// + /// Remove trailing whitespace from lines + /// + public bool TrimWhitespace { get; set; } = false; + + /// + /// Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified. + /// + public bool HasHeader { get; set; } = false; + + } + + public sealed class TextLoaderColumn + { + /// + /// Name of the column + /// + public string Name { get; set; } + + /// + /// Type of the items in the column + /// + public DataKind? Type { get; set; } + + /// + /// Source index range(s) of the column + /// + public TextLoaderRange[] Source { get; set; } + + /// + /// For a key column, this defines the range of values + /// + public KeyRange KeyRange { get; set; } + + } + + public sealed class TextLoaderRange + { + /// + /// First index in the range + /// + public int Min { get; set; } + + /// + /// Last index in the range + /// + public int? Max { get; set; } + + /// + /// This range extends to the end of the line, but should be a fixed number of items + /// + public bool AutoEnd { get; set; } = false; + + /// + /// This range extends to the end of the line, which can vary from line to line + /// + public bool VariableEnd { get; set; } = false; + + /// + /// This range includes only other indices not specified + /// + public bool AllOther { get; set; } = false; + + /// + /// Force scalar columns to be treated as vectors of length one + /// + public bool ForceVector { get; set; } = false; + + } + + public sealed class KeyRange + { + /// + /// First index in the range + /// + public ulong Min { get; set; } = 0; + + /// + /// Last index in the range + /// + public ulong? Max { get; set; } + + /// + /// Whether the key is contiguous + /// + public bool Contiguous { get; set; } = true; + + } + /// /// Import a dataset from a text file /// - public sealed partial class TextLoader + public partial class TextLoader : Microsoft.ML.ILearningPipelineLoader { + [JsonIgnore] + private string _inputFilePath = null; + public TextLoader(string filePath) + { + _inputFilePath = filePath; + } + + public void SetInput(IHostEnvironment env, Experiment experiment) + { + IFileHandle inputFile = new SimpleFileHandle(env, _inputFilePath, false, false); + experiment.SetInput(InputFile, inputFile); + } + + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) + { + Contracts.Assert(previousStep == null); + + return new TextLoaderPipelineStep(experiment.Add(this)); + } + + private class TextLoaderPipelineStep : ILearningPipelineDataStep + { + public TextLoaderPipelineStep (Output output) + { + Data = output.Data; + Model = null; + } + + public Var Data { get; } + public Var Model { get; } + } /// /// Location of the input file @@ -1306,9 +1512,9 @@ public sealed partial class TextLoader public Var InputFile { get; set; } = new Var(); /// - /// Custom schema to use for parsing + /// Arguments /// - public string CustomSchema { get; set; } + public Data.TextLoaderArguments Arguments { get; set; } = new TextLoaderArguments(); public sealed class Output diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs new file mode 100644 index 0000000000..f447de1e11 --- /dev/null +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -0,0 +1,100 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Api; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text; + +namespace Microsoft.ML.Data +{ + public sealed class TextLoader : TextLoader + { + + /// + /// Construct a TextLoader object + /// + /// Data file path + /// Does the file contains header? + /// How the columns are seperated? + /// Options: separator="tab", separator="space", separator="comma" or separator=[single character]. + /// By default separator=null means "tab" + /// Whether the input may include quoted values, + /// which can contain separator characters, colons, + /// and distinguish empty values from missing values. When true, consecutive separators + /// denote a missing value and an empty value is denoted by \"\". + /// When false, consecutive separators denote an empty value. + /// Whether the input may include sparse representations e.g. + /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero + /// except for 3rd and 5th columns which have values 6 and 3 + /// Remove trailing whitespace from lines + public TextLoader(string inputFilePath, bool useHeader = false, + string separator = null, bool allowQuotedStrings = true, + bool supportSparse = true, bool trimWhitespace = false) : base(inputFilePath) + { + var fields = typeof(TInput).GetFields(); + Arguments.Column = new TextLoaderColumn[fields.Length]; + for (int index = 0; index < fields.Length; index++) + { + var field = fields[index]; + var mappingAttr = field.GetCustomAttribute(); + if (mappingAttr == null) + throw Contracts.ExceptParam(nameof(field.Name), " is missing ColumnAttribute"); + + var col = Runtime.Data.TextLoader.Column.Parse( + $"{mappingAttr.Name ?? field.Name}:" + + $"{TypeToName(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType)}:" + + $"{mappingAttr.Ordinal}" + ); + + TextLoaderColumn tlc = new TextLoaderColumn(); + if (col.KeyRange != null) + { + tlc.KeyRange = new KeyRange(); + tlc.KeyRange.Min = col.KeyRange.Min; + tlc.KeyRange.Max = col.KeyRange.Max; + } + + tlc.Name = col.Name; + tlc.Source = new TextLoaderRange[col.Source.Length]; + for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++) + { + tlc.Source[indexLocal] = new TextLoaderRange + { + AllOther = col.Source[indexLocal].AllOther, + AutoEnd = col.Source[indexLocal].AutoEnd, + ForceVector = col.Source[indexLocal].ForceVector, + VariableEnd = col.Source[indexLocal].VariableEnd, + Max = col.Source[indexLocal].Max, + Min = col.Source[indexLocal].Min + }; + } + + tlc.Type = col.Type; + Arguments.Column[index] = tlc; + } + + Arguments.HasHeader = useHeader; + Arguments.Separator = separator; + Arguments.AllowQuoting = allowQuotedStrings; + Arguments.AllowSparse = supportSparse; + Arguments.TrimWhitespace = trimWhitespace; + } + + private string TypeToName(Type type) + { + if (type == typeof(string)) + return "TX"; + else if (type == typeof(float) || type == typeof(double)) + return "R4"; + else if (type == typeof(bool)) + return "BL"; + else + throw new Exception("Type not implemented or supported."); //Add more types. + } + } +} diff --git a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs index 8038294398..e9f9128d40 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs @@ -27,13 +27,23 @@ public sealed class Input public string CustomSchema = null; } + [TlcModule.EntryPointKind(typeof(ILearningPipelineLoader))] + public sealed class LoaderInput + { + [Argument(ArgumentType.Required, ShortName = "data", HelpText = "Location of the input file", SortOrder = 1)] + public IFileHandle InputFile; + + [Argument(ArgumentType.AtMostOnce, ShortName = "args", HelpText = "Arguments", SortOrder = 2)] + public TextLoader.Arguments Arguments; + } + public sealed class Output { [TlcModule.Output(Desc = "The resulting data view", SortOrder = 1)] public IDataView Data; } - [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file")] + [TlcModule.EntryPoint(Name = "Data.CustomTextLoader", Desc = "Import a dataset from a text file")] public static Output ImportText(IHostEnvironment env, Input input) { Contracts.CheckValue(env, nameof(env)); @@ -43,5 +53,16 @@ public static Output ImportText(IHostEnvironment env, Input input) var loader = host.CreateLoader(string.Format("Text{{{0}}}", input.CustomSchema), new FileHandleSource(input.InputFile)); return new Output { Data = loader }; } + + [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file")] + public static Output TextLoader(IHostEnvironment env, LoaderInput input) + { + Contracts.CheckValue(env, nameof(env)); + var host = env.Register("ImportTextData"); + env.CheckValue(input, nameof(input)); + EntryPointUtils.CheckInputArgs(host, input); + var loader = host.CreateLoader(input.Arguments, new FileHandleSource(input.InputFile)); + return new Output { Data = loader }; + } } } diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index 17643518ca..6234f70a86 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -640,6 +640,58 @@ private void GenerateStructs(IndentingTextWriter writer, } } + private void GenerateLoaderAddInputMethod(IndentingTextWriter writer, string className) + { + //Constructor. + writer.WriteLine("[JsonIgnore]"); + writer.WriteLine("private string _inputFilePath = null;"); + writer.WriteLine($"public {className}(string filePath)"); + writer.WriteLine("{"); + writer.Indent(); + writer.WriteLine("_inputFilePath = filePath;"); + writer.Outdent(); + writer.WriteLine("}"); + writer.WriteLine(""); + + //SetInput. + writer.WriteLine($"public void SetInput(IHostEnvironment env, Experiment experiment)"); + writer.WriteLine("{"); + writer.Indent(); + writer.WriteLine("IFileHandle inputFile = new SimpleFileHandle(env, _inputFilePath, false, false);"); + writer.WriteLine("experiment.SetInput(InputFile, inputFile);"); + writer.Outdent(); + writer.WriteLine("}"); + writer.WriteLine(""); + + //Apply. + writer.WriteLine($"public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)"); + writer.WriteLine("{"); + writer.Indent(); + writer.WriteLine("Contracts.Assert(previousStep == null);"); + writer.WriteLine(""); + writer.WriteLine($"return new {className}PipelineStep(experiment.Add(this));"); + writer.Outdent(); + writer.WriteLine("}"); + writer.WriteLine(""); + + //Pipelinestep class. + writer.WriteLine($"private class {className}PipelineStep : ILearningPipelineDataStep"); + writer.WriteLine("{"); + writer.Indent(); + writer.WriteLine($"public {className}PipelineStep (Output output)"); + writer.WriteLine("{"); + writer.Indent(); + writer.WriteLine("Data = output.Data;"); + writer.WriteLine("Model = null;"); + writer.Outdent(); + writer.WriteLine("}"); + writer.WriteLine(); + writer.WriteLine("public Var Data { get; }"); + writer.WriteLine("public Var Model { get; }"); + writer.Outdent(); + writer.WriteLine("}"); + } + private void GenerateColumnAddMethods(IndentingTextWriter writer, Type inputType, ModuleCatalog catalog, @@ -802,6 +854,9 @@ private void GenerateInput(IndentingTextWriter writer, writer.WriteLine("{"); writer.Indent(); writer.WriteLine(); + if (classBase.Contains("ILearningPipelineLoader")) + GenerateLoaderAddInputMethod(writer, classAndMethod.Item2); + GenerateColumnAddMethods(writer, entryPointInfo.InputType, catalog, classAndMethod.Item2, out Type transformType); writer.WriteLine(); GenerateInputFields(writer, entryPointInfo.InputType, catalog, _typesSymbolTable); diff --git a/src/Microsoft.ML/TextLoader.cs b/src/Microsoft.ML/TextLoader.cs deleted file mode 100644 index de592a4f69..0000000000 --- a/src/Microsoft.ML/TextLoader.cs +++ /dev/null @@ -1,125 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Api; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.EntryPoints; -using System; -using System.Linq; -using System.Reflection; -using System.Text; - -namespace Microsoft.ML -{ - public class TextLoader : ILearningPipelineLoader - { - private string _inputFilePath; - private string CustomSchema; - private Data.TextLoader ImportTextInput; - - /// - /// Construct a TextLoader object - /// - /// Data file path - /// Does the file contains header? - /// How the columns are seperated? - /// Options: separator="tab", separator="space", separator="comma" or separator=[single character]. - /// By default separator=null means "tab" - /// Whether the input may include quoted values, - /// which can contain separator characters, colons, - /// and distinguish empty values from missing values. When true, consecutive separators - /// denote a missing value and an empty value is denoted by \"\". - /// When false, consecutive separators denote an empty value. - /// Whether the input may include sparse representations e.g. - /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero - /// except for 3rd and 5th columns which have values 6 and 3 - /// Remove trailing whitespace from lines - public TextLoader(string inputFilePath, bool useHeader = false, - string separator = null, bool allowQuotedStrings = true, - bool supportSparse = true, bool trimWhitespace = false) - { - _inputFilePath = inputFilePath; - SetCustomStringFromType(useHeader, separator, allowQuotedStrings, supportSparse, trimWhitespace); - } - - private IFileHandle GetTextLoaderFileHandle(IHostEnvironment env, string trainFilePath) => - new SimpleFileHandle(env, trainFilePath, false, false); - - private void SetCustomStringFromType(bool useHeader, string separator, - bool allowQuotedStrings, bool supportSparse, bool trimWhitespace) - { - StringBuilder schemaBuilder = new StringBuilder(CustomSchema); - foreach (var field in typeof(TInput).GetFields()) - { - var mappingAttr = field.GetCustomAttribute(); - if(mappingAttr == null) - throw Contracts.ExceptParam(nameof(field.Name), " is missing ColumnAttribute"); - - schemaBuilder.AppendFormat("col={0}:{1}:{2} ", - mappingAttr.Name ?? field.Name, - TypeToName(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType), - mappingAttr.Ordinal); - } - - if (useHeader) - schemaBuilder.Append(nameof(TextLoader.Arguments.HasHeader)).Append("+ "); - - if (separator != null) - schemaBuilder.Append(nameof(TextLoader.Arguments.Separator)).Append("=").Append(separator).Append(" "); - - if (!allowQuotedStrings) - schemaBuilder.Append(nameof(TextLoader.Arguments.AllowQuoting)).Append("- "); - - if (!supportSparse) - schemaBuilder.Append(nameof(TextLoader.Arguments.AllowSparse)).Append("- "); - - if (trimWhitespace) - schemaBuilder.Append(nameof(TextLoader.Arguments.TrimWhitespace)).Append("+ "); - - schemaBuilder.Length--; - CustomSchema = schemaBuilder.ToString(); - } - - private string TypeToName(Type type) - { - if (type == typeof(string)) - return "TX"; - else if (type == typeof(float) || type == typeof(double)) - return "R4"; - else if (type == typeof(bool)) - return "BL"; - else - throw new Exception("Type not implemented or supported."); //Add more types. - } - - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - Contracts.Assert(previousStep == null); - - ImportTextInput = new Data.TextLoader(); - ImportTextInput.CustomSchema = CustomSchema; - var importOutput = experiment.Add(ImportTextInput); - return new TextLoaderPipelineStep(importOutput.Data); - } - - public void SetInput(IHostEnvironment env, Experiment experiment) - { - IFileHandle inputFile = GetTextLoaderFileHandle(env, _inputFilePath); - experiment.SetInput(ImportTextInput.InputFile, inputFile); - } - - private class TextLoaderPipelineStep : ILearningPipelineDataStep - { - public TextLoaderPipelineStep(Var data) - { - Data = data; - Model = null; - } - - public Var Data { get; } - public Var Model { get; } - } - } -} diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index b66d61ae69..8bfa4e4e78 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -36,7 +36,7 @@ public void TestSimpleExperiment() { var experiment = env.CreateExperiment(); - var importInput = new ML.Data.TextLoader(); + var importInput = new ML.Data.CustomTextLoader(); var importOutput = experiment.Add(importInput); var normalizeInput = new ML.Transforms.MinMaxNormalizer @@ -67,7 +67,7 @@ public void TestSimpleTrainExperiment() { var experiment = env.CreateExperiment(); - var importInput = new ML.Data.TextLoader(); + var importInput = new ML.Data.CustomTextLoader(); var importOutput = experiment.Add(importInput); var catInput = new ML.Transforms.CategoricalOneHotVectorizer @@ -165,7 +165,7 @@ public void TestTrainTestMacro() var experiment = env.CreateExperiment(); - var importInput = new ML.Data.TextLoader(); + var importInput = new ML.Data.CustomTextLoader(); var importOutput = experiment.Add(importInput); var trainTestInput = new ML.Models.TrainTestBinaryEvaluator @@ -235,7 +235,7 @@ public void TestCrossValidationBinaryMacro() var experiment = env.CreateExperiment(); - var importInput = new ML.Data.TextLoader(); + var importInput = new ML.Data.CustomTextLoader(); var importOutput = experiment.Add(importInput); var crossValidateBinary = new ML.Models.BinaryCrossValidator @@ -295,7 +295,7 @@ public void TestCrossValidationMacro() var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); - var importInput = new ML.Data.TextLoader(); + var importInput = new ML.Data.CustomTextLoader(); var importOutput = experiment.Add(importInput); var crossValidate = new ML.Models.CrossValidator diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 4fe503b9b7..4a4032bf03 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -305,7 +305,7 @@ public void EntryPointOptionalParams() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file1' }, @@ -355,7 +355,7 @@ public void EntryPointExecGraphCommand() {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file1' }}, @@ -512,7 +512,7 @@ public void EntryPointParseColumns() {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file1' }}, @@ -562,7 +562,7 @@ public void EntryPointCountFeatures() {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file1' }}, @@ -607,7 +607,7 @@ public void EntryPointMutualSelectFeatures() {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file1' }}, @@ -653,7 +653,7 @@ public void EntryPointTextToKeyToText() {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file1', 'CustomSchema': 'sep=comma col=Cat:TX:4' @@ -735,7 +735,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file' }}, @@ -1214,7 +1214,7 @@ internal void TestEntryPointPipelineRoutine(string dataFile, string schema, stri {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file1', 'CustomSchema': '{schema}' @@ -1287,7 +1287,7 @@ internal void TestEntryPointRoutine(string dataFile, string trainerName, string {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file1' {3} @@ -1459,7 +1459,7 @@ public void EntryPointNormalizeIfNeeded() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file' }, @@ -1522,7 +1522,7 @@ public void EntryPointTrainTestBinaryMacro() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file' }, @@ -1630,7 +1630,7 @@ public void EntryPointTrainTestMacroNoTransformInput() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file' }, @@ -1744,7 +1744,7 @@ public void EntryPointTrainTestMacro() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file' }, @@ -1843,7 +1843,7 @@ public void EntryPointChainedTrainTestMacros() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file' }, @@ -2019,7 +2019,7 @@ public void EntryPointChainedCrossValMacros() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file' }, @@ -2214,7 +2214,7 @@ public void EntryPointMacroEarlyExpansion() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file' }, @@ -2302,7 +2302,7 @@ public void EntryPointSerialization() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': { 'InputFile': '$file' }, @@ -2368,7 +2368,7 @@ public void EntryPointNodeSchedulingFields() { 'Nodes': [ { - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'StageId': '5063dee8f19c4dd89a1fc3a9da5351a7', 'Inputs': { 'InputFile': '$file' @@ -2437,7 +2437,7 @@ public void EntryPointPrepareLabelConvertPredictedLabel() {{ 'Nodes': [ {{ - 'Name': 'Data.TextLoader', + 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file1', 'CustomSchema': 'sep=comma col=Label:TX:4 col=Features:Num:0-3' diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs index dca360c4e3..0e8a4000ac 100644 --- a/test/Microsoft.ML.TestFramework/ModelHelper.cs +++ b/test/Microsoft.ML.TestFramework/ModelHelper.cs @@ -54,9 +54,9 @@ private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) Experiment experiment = s_environment.CreateExperiment(); - var importData = new Data.TextLoader(); + var importData = new Data.CustomTextLoader(); importData.CustomSchema = dataSchema; - Data.TextLoader.Output imported = experiment.Add(importData); + Data.CustomTextLoader.Output imported = experiment.Add(importData); var numericalConcatenate = new Transforms.ColumnConcatenator(); numericalConcatenate.Data = imported.Data; diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs index c647110702..316d7eab55 100644 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ b/test/Microsoft.ML.Tests/CSharpCodeGen.cs @@ -15,7 +15,8 @@ public CSharpCodeGen(ITestOutputHelper output) : base(output) { } - [Fact(Skip = "Temporary solution(Windows ONLY) to regenerate codegenerated CSharpAPI.cs")] + //[Fact(Skip = "Temporary solution(Windows ONLY) to regenerate codegenerated CSharpAPI.cs")] + [Fact] public void GenerateCSharpAPI() { var cSharpAPIPath = Path.Combine(RootDir, @"src\\Microsoft.ML\\CSharpApi.cs"); diff --git a/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs index a0591d34b9..97a2e3de84 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; @@ -23,7 +24,35 @@ public void TrainAndPredictSentimentModelTest() { string dataPath = GetDataPath(SentimentDataPath); var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath, useHeader: true, separator: "tab")); + + pipeline.Add(new TextLoader(dataPath) + { + Arguments = new TextLoaderArguments + { + Separator = "tab", + HasHeader = true, + Column = new[] + { + new TextLoaderColumn() + { + Name = "Label", + Source = new [] + { + new TextLoaderRange() { Min = 0, Max = 0} + }, + Type = Runtime.Data.DataKind.R4 + }, + + new TextLoaderColumn() + { + Name = "SentimentText", + Source = new [] { new TextLoaderRange() { Min = 1, Max = 1} }, + Type = Runtime.Data.DataKind.TX + } + } + } + }); + pipeline.Add(new TextFeaturizer("Features", "SentimentText") { KeepDiacritics = false, @@ -60,7 +89,33 @@ public void TrainAndPredictSentimentModelTest() Assert.True(predictions.ElementAt(1).Sentiment); string testDataPath = GetDataPath(SentimentTestPath); - var testData = new TextLoader(testDataPath, useHeader: true, separator: "tab"); + var testData = new TextLoader(dataPath) + { + Arguments = new TextLoaderArguments + { + Separator = "tab", + HasHeader = true, + Column = new[] + { + new TextLoaderColumn() + { + Name = "Label", + Source = new [] + { + new TextLoaderRange() { Min = 0, Max = 0} + }, + Type = Runtime.Data.DataKind.R4 + }, + + new TextLoaderColumn() + { + Name = "SentimentText", + Source = new [] { new TextLoaderRange() { Min = 1, Max = 1} }, + Type = Runtime.Data.DataKind.TX + } + } + } + }; var evaluator = new BinaryClassificationEvaluator(); BinaryClassificationMetrics metrics = evaluator.Evaluate(model, testData); diff --git a/test/Microsoft.ML.Tests/Scenarios/Scenario_TrainPredictionModel.cs b/test/Microsoft.ML.Tests/Scenarios/Scenario_TrainPredictionModel.cs index de7d2f6a00..162c6566c4 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Scenario_TrainPredictionModel.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Scenario_TrainPredictionModel.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; diff --git a/test/Microsoft.ML.Tests/Scenarios/TrainAndPredictIrisModelTest.cs b/test/Microsoft.ML.Tests/Scenarios/TrainAndPredictIrisModelTest.cs index d897303e30..4fc2c99acc 100644 --- a/test/Microsoft.ML.Tests/Scenarios/TrainAndPredictIrisModelTest.cs +++ b/test/Microsoft.ML.Tests/Scenarios/TrainAndPredictIrisModelTest.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Trainers; diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index ad85ae47f7..3610e6e7c6 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML; +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; From c0ff88121e8e0b813495a76222325cba84681756 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Sun, 6 May 2018 12:47:02 -0700 Subject: [PATCH 02/17] Update baselines and add checks for invalid loader arguments. --- .../Common/EntryPoints/core_ep-list.tsv | 3 +- .../Common/EntryPoints/core_manifest.json | 365 +++++++++++++++++- .../EntryPoints/ModuleArgs.cs | 6 + .../EntryPoints/ModuleCatalog.cs | 2 + src/Microsoft.ML/CSharpApi.cs | 2 +- src/Microsoft.ML/Data/TextLoader.cs | 22 +- src/Microsoft.ML/LearningPipeline.cs | 4 +- .../Runtime/EntryPoints/ImportTextData.cs | 6 +- .../Internal/Tools/CSharpApiGenerator.cs | 4 +- .../Scenario3_SentimentPrediction.cs | 56 ++- 10 files changed, 420 insertions(+), 50 deletions(-) diff --git a/ZBaselines/Common/EntryPoints/core_ep-list.tsv b/ZBaselines/Common/EntryPoints/core_ep-list.tsv index 568a6066f9..0a5c423892 100644 --- a/ZBaselines/Common/EntryPoints/core_ep-list.tsv +++ b/ZBaselines/Common/EntryPoints/core_ep-list.tsv @@ -1,6 +1,7 @@ +Data.CustomTextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData ImportText Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output Data.IDataViewArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewOutput Data.PredictorModelArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelOutput -Data.TextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData ImportText Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output +Data.TextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData TextLoader Microsoft.ML.Runtime.EntryPoints.ImportTextData+LoaderInput Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output Models.AnomalyDetectionEvaluator Evaluates an anomaly detection scored dataset. Microsoft.ML.Runtime.Data.Evaluate AnomalyDetection Microsoft.ML.Runtime.Data.AnomalyDetectionMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CommonEvaluateOutput Models.BinaryClassificationEvaluator Evaluates a binary classification scored dataset. Microsoft.ML.Runtime.Data.Evaluate Binary Microsoft.ML.Runtime.Data.BinaryClassifierMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+ClassificationEvaluateOutput Models.BinaryCrossValidator Cross validation for binary classification Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro CrossValidateBinary Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+Output] diff --git a/ZBaselines/Common/EntryPoints/core_manifest.json b/ZBaselines/Common/EntryPoints/core_manifest.json index a3778a7f7f..1071227c0f 100644 --- a/ZBaselines/Common/EntryPoints/core_manifest.json +++ b/ZBaselines/Common/EntryPoints/core_manifest.json @@ -1,5 +1,43 @@ { "EntryPoints": [ + { + "Name": "Data.CustomTextLoader", + "Desc": "Import a dataset from a text file", + "FriendlyName": null, + "ShortName": null, + "Inputs": [ + { + "Name": "InputFile", + "Type": "FileHandle", + "Desc": "Location of the input file", + "Aliases": [ + "data" + ], + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "CustomSchema", + "Type": "String", + "Desc": "Custom schema to use for parsing", + "Aliases": [ + "schema" + ], + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": null + } + ], + "Outputs": [ + { + "Name": "Data", + "Type": "DataView", + "Desc": "The resulting data view" + } + ] + }, { "Name": "Data.IDataViewArrayConverter", "Desc": "Create and array variable", @@ -76,16 +114,320 @@ "IsNullable": false }, { - "Name": "CustomSchema", - "Type": "String", - "Desc": "Custom schema to use for parsing", + "Name": "Arguments", + "Type": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Column", + "Type": { + "Kind": "Array", + "ItemType": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Name", + "Type": "String", + "Desc": "Name of the column", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "Type", + "Type": { + "Kind": "Enum", + "Values": [ + "I1", + "U1", + "I2", + "U2", + "I4", + "U4", + "I8", + "U8", + "R4", + "Num", + "R8", + "TX", + "Text", + "TXT", + "BL", + "Bool", + "TimeSpan", + "TS", + "DT", + "DateTime", + "DZ", + "DateTimeZone", + "UG", + "U16" + ] + }, + "Desc": "Type of the items in the column", + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "Source", + "Type": { + "Kind": "Array", + "ItemType": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Min", + "Type": "Int", + "Desc": "First index in the range", + "Required": true, + "SortOrder": 150.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "Max", + "Type": "Int", + "Desc": "Last index in the range", + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "AutoEnd", + "Type": "Bool", + "Desc": "This range extends to the end of the line, but should be a fixed number of items", + "Aliases": [ + "auto" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "VariableEnd", + "Type": "Bool", + "Desc": "This range extends to the end of the line, which can vary from line to line", + "Aliases": [ + "var" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "AllOther", + "Type": "Bool", + "Desc": "This range includes only other indices not specified", + "Aliases": [ + "other" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "ForceVector", + "Type": "Bool", + "Desc": "Force scalar columns to be treated as vectors of length one", + "Aliases": [ + "vector" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + } + ] + } + }, + "Desc": "Source index range(s) of the column", + "Aliases": [ + "src" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "KeyRange", + "Type": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Min", + "Type": "UInt", + "Desc": "First index in the range", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "Max", + "Type": "UInt", + "Desc": "Last index in the range", + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "Contiguous", + "Type": "Bool", + "Desc": "Whether the key is contiguous", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + } + ] + }, + "Desc": "For a key column, this defines the range of values", + "Aliases": [ + "key" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + } + ] + } + }, + "Desc": "Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40", + "Aliases": [ + "col" + ], + "Required": false, + "SortOrder": 1.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "UseThreads", + "Type": "Bool", + "Desc": "Use separate parsing threads?", + "Aliases": [ + "threads" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "HeaderFile", + "Type": "String", + "Desc": "File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.", + "Aliases": [ + "hf" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "MaxRows", + "Type": "Int", + "Desc": "Maximum number of rows to produce", + "Aliases": [ + "rows" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "AllowQuoting", + "Type": "Bool", + "Desc": "Whether the input may include quoted values, which can contain separator characters, colons, and distinguish empty values from missing values. When true, consecutive separators denote a missing value and an empty value is denoted by \"\". When false, consecutive separators denote an empty value.", + "Aliases": [ + "quote" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "AllowSparse", + "Type": "Bool", + "Desc": "Whether the input may include sparse representations", + "Aliases": [ + "sparse" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "InputSize", + "Type": "Int", + "Desc": "Number of source columns in the text data. Default is that sparse rows contain their size information.", + "Aliases": [ + "size" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "Separator", + "Type": "String", + "Desc": "Source column separator. Options: tab, space, comma, single character", + "Aliases": [ + "sep" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": "tab" + }, + { + "Name": "TrimWhitespace", + "Type": "Bool", + "Desc": "Remove trailing whitespace from lines", + "Aliases": [ + "trim" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "HasHeader", + "Type": "Bool", + "Desc": "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.", + "Aliases": [ + "header" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + } + ] + }, + "Desc": "Arguments", "Aliases": [ - "schema" + "args" ], - "Required": false, - "SortOrder": 2.0, - "IsNullable": false, - "Default": null + "Required": true, + "SortOrder": 1.0, + "IsNullable": false } ], "Outputs": [ @@ -94,6 +436,9 @@ "Type": "DataView", "Desc": "The resulting data view" } + ], + "InputKind": [ + "ILearningPipelineLoader" ] }, { @@ -21578,6 +21923,10 @@ } ] }, + { + "Kind": "ILearningPipelineLoader", + "Settings": [] + }, { "Kind": "IMulticlassClassificationOutput", "Settings": [] diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs index 99cfec0dd9..9b778557eb 100644 --- a/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs +++ b/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs @@ -527,6 +527,12 @@ public sealed class EntryPointAttribute : Attribute /// Short name of the Entry Point /// public string ShortName { get; set; } + + /// + /// Indicates if the code generated should not be sealed. + /// By default all classes are sealed. + /// + public bool NoSeal { get; set; } } /// diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs index 3a468ad451..ac1698e520 100644 --- a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs +++ b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs @@ -40,6 +40,7 @@ public sealed class ModuleCatalog /// public sealed class EntryPointInfo { + public readonly bool NoSeal; public readonly string Name; public readonly string Description; public readonly string ShortName; @@ -57,6 +58,7 @@ internal EntryPointInfo(IExceptionContext ectx, MethodInfo method, TlcModule.Ent ectx.AssertValue(attribute); Name = attribute.Name ?? string.Join(".", method.DeclaringType.Name, method.Name); + NoSeal = attribute.NoSeal; Description = attribute.Desc; Method = method; ShortName = attribute.ShortName; diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 612fc94e36..e9e527c7fd 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -1514,7 +1514,7 @@ public TextLoaderPipelineStep (Output output) /// /// Arguments /// - public Data.TextLoaderArguments Arguments { get; set; } = new TextLoaderArguments(); + public Data.TextLoaderArguments Arguments { get; set; } = new Data.TextLoaderArguments(); public sealed class Output diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index f447de1e11..f9ed1db012 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -9,6 +9,7 @@ using System.Linq; using System.Reflection; using System.Text; +using System.Text.RegularExpressions; namespace Microsoft.ML.Data { @@ -43,14 +44,29 @@ public TextLoader(string inputFilePath, bool useHeader = false, var field = fields[index]; var mappingAttr = field.GetCustomAttribute(); if (mappingAttr == null) - throw Contracts.ExceptParam(nameof(field.Name), " is missing ColumnAttribute"); + throw Contracts.Except($"{field.Name} is missing ColumnAttribute"); + + if (Regex.Match(mappingAttr.Ordinal, @"[^(0-9,\*\-~)]+").Success) + throw Contracts.Except($"{mappingAttr.Ordinal} contains invalid characters. " + + $"Valid characters are 0-9, *, - and ~"); + + var name = mappingAttr.Name ?? field.Name; + if (name.Any(c => !Char.IsLetterOrDigit(c))) + throw Contracts.Except($"{name} is not alphanumeric."); + + if(separator != null) + { + if(separator != "space" && separator != "tab" && separator != "comma" && separator.Length > 1) + throw Contracts.Except($"{nameof(separator)} can only be one of the following: space, tab, comma" + + $" or a single character."); + } var col = Runtime.Data.TextLoader.Column.Parse( - $"{mappingAttr.Name ?? field.Name}:" + + $"{name}:" + $"{TypeToName(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType)}:" + $"{mappingAttr.Ordinal}" ); - + TextLoaderColumn tlc = new TextLoaderColumn(); if (col.KeyRange != null) { diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs index 0ece3697a9..aa490abc09 100644 --- a/src/Microsoft.ML/LearningPipeline.cs +++ b/src/Microsoft.ML/LearningPipeline.cs @@ -66,7 +66,7 @@ public PredictionModel Train() step = currentItem.ApplyStep(step, experiment); if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) transformModels.Add(dataStep.Model); - + else if (step is ILearningPipelinePredictorStep predictorDataStep) { if (lastTransformModel != null) @@ -94,7 +94,7 @@ public PredictionModel Train() if (transformModels.Count > 0) { - transformModels.Insert(0,lastTransformModel); + transformModels.Insert(0, lastTransformModel); var modelInput = new Transforms.ModelCombiner { Models = new ArrayVar(transformModels.ToArray()) diff --git a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs index e9f9128d40..77a54b2abb 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs @@ -33,8 +33,8 @@ public sealed class LoaderInput [Argument(ArgumentType.Required, ShortName = "data", HelpText = "Location of the input file", SortOrder = 1)] public IFileHandle InputFile; - [Argument(ArgumentType.AtMostOnce, ShortName = "args", HelpText = "Arguments", SortOrder = 2)] - public TextLoader.Arguments Arguments; + [Argument(ArgumentType.Required, ShortName = "args", HelpText = "Arguments", SortOrder = 1)] + public TextLoader.Arguments Arguments = new TextLoader.Arguments(); } public sealed class Output @@ -54,7 +54,7 @@ public static Output ImportText(IHostEnvironment env, Input input) return new Output { Data = loader }; } - [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file")] + [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file", NoSeal = true)] public static Output TextLoader(IHostEnvironment env, LoaderInput input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index 6234f70a86..ccd8bc47e4 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -850,7 +850,9 @@ private void GenerateInput(IndentingTextWriter writer, foreach (var line in entryPointInfo.Description.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries)) writer.WriteLine($"/// {line}"); writer.WriteLine("/// "); - writer.WriteLine($"public sealed partial class {classAndMethod.Item2}{classBase}"); + + string seal = entryPointInfo.NoSeal ? "" : "sealed "; + writer.WriteLine($"public {seal}partial class {classAndMethod.Item2}{classBase}"); writer.WriteLine("{"); writer.Indent(); writer.WriteLine(); diff --git a/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs index 97a2e3de84..216f36fbd2 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs @@ -36,18 +36,15 @@ public void TrainAndPredictSentimentModelTest() new TextLoaderColumn() { Name = "Label", - Source = new [] - { - new TextLoaderRange() { Min = 0, Max = 0} - }, - Type = Runtime.Data.DataKind.R4 + Source = new [] { new TextLoaderRange() { Min = 0, Max = 0} }, + Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", Source = new [] { new TextLoaderRange() { Min = 1, Max = 1} }, - Type = Runtime.Data.DataKind.TX + Type = Runtime.Data.DataKind.Text } } } @@ -100,18 +97,15 @@ public void TrainAndPredictSentimentModelTest() new TextLoaderColumn() { Name = "Label", - Source = new [] - { - new TextLoaderRange() { Min = 0, Max = 0} - }, - Type = Runtime.Data.DataKind.R4 + Source = new [] { new TextLoaderRange() { Min = 0, Max = 0} }, + Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", Source = new [] { new TextLoaderRange() { Min = 1, Max = 1} }, - Type = Runtime.Data.DataKind.TX + Type = Runtime.Data.DataKind.Text } } } @@ -120,17 +114,17 @@ public void TrainAndPredictSentimentModelTest() var evaluator = new BinaryClassificationEvaluator(); BinaryClassificationMetrics metrics = evaluator.Evaluate(model, testData); - Assert.Equal(.7222, metrics.Accuracy, 4); - Assert.Equal(.9643, metrics.Auc, 1); - Assert.Equal(.96, metrics.Auprc, 2); - Assert.Equal(1, metrics.Entropy, 3); - Assert.Equal(.7826, metrics.F1Score, 4); - Assert.Equal(.812, metrics.LogLoss, 3); - Assert.Equal(18.831, metrics.LogLossReduction, 3); - Assert.Equal(1, metrics.NegativePrecision, 3); - Assert.Equal(.444, metrics.NegativeRecall, 3); - Assert.Equal(.643, metrics.PositivePrecision, 3); - Assert.Equal(1, metrics.PositiveRecall); + Assert.Equal(0.872, metrics.Accuracy, 4); + Assert.Equal(0.9339, metrics.Auc, 1); + Assert.Equal(0.949, metrics.Auprc, 2); + Assert.Equal(0.9521, metrics.Entropy, 3); + Assert.Equal(0.9030, metrics.F1Score, 4); + Assert.Equal(0.6961, metrics.LogLoss, 3); + Assert.Equal(26.8935, metrics.LogLossReduction, 3); + Assert.Equal(0.8961, metrics.NegativePrecision, 3); + Assert.Equal(0.7419, metrics.NegativeRecall, 3); + Assert.Equal(0.8612, metrics.PositivePrecision, 3); + Assert.Equal(0.9490, metrics.PositiveRecall, 3); ConfusionMatrix matrix = metrics.ConfusionMatrix; Assert.Equal(2, matrix.Order); @@ -138,15 +132,15 @@ public void TrainAndPredictSentimentModelTest() Assert.Equal("positive", matrix.ClassNames[0]); Assert.Equal("negative", matrix.ClassNames[1]); - Assert.Equal(9, matrix[0, 0]); - Assert.Equal(9, matrix["positive", "positive"]); - Assert.Equal(0, matrix[0, 1]); - Assert.Equal(0, matrix["positive", "negative"]); + Assert.Equal(149, matrix[0, 0]); + Assert.Equal(149, matrix["positive", "positive"]); + Assert.Equal(8, matrix[0, 1]); + Assert.Equal(8, matrix["positive", "negative"]); - Assert.Equal(5, matrix[1, 0]); - Assert.Equal(5, matrix["negative", "positive"]); - Assert.Equal(4, matrix[1, 1]); - Assert.Equal(4, matrix["negative", "negative"]); + Assert.Equal(24, matrix[1, 0]); + Assert.Equal(24, matrix["negative", "positive"]); + Assert.Equal(69, matrix[1, 1]); + Assert.Equal(69, matrix["negative", "negative"]); } public class SentimentData From 6d5f3512601f420971c855fb02743ec516e3c993 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Sun, 6 May 2018 14:51:41 -0700 Subject: [PATCH 03/17] Revert test metric changes and change test file for evaluation to be test data file instead of train file. --- .../Scenario3_SentimentPrediction.cs | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs index 216f36fbd2..37c841e6f0 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs @@ -86,7 +86,7 @@ public void TrainAndPredictSentimentModelTest() Assert.True(predictions.ElementAt(1).Sentiment); string testDataPath = GetDataPath(SentimentTestPath); - var testData = new TextLoader(dataPath) + var testData = new TextLoader(testDataPath) { Arguments = new TextLoaderArguments { @@ -110,21 +110,20 @@ public void TrainAndPredictSentimentModelTest() } } }; - var evaluator = new BinaryClassificationEvaluator(); BinaryClassificationMetrics metrics = evaluator.Evaluate(model, testData); - Assert.Equal(0.872, metrics.Accuracy, 4); - Assert.Equal(0.9339, metrics.Auc, 1); - Assert.Equal(0.949, metrics.Auprc, 2); - Assert.Equal(0.9521, metrics.Entropy, 3); - Assert.Equal(0.9030, metrics.F1Score, 4); - Assert.Equal(0.6961, metrics.LogLoss, 3); - Assert.Equal(26.8935, metrics.LogLossReduction, 3); - Assert.Equal(0.8961, metrics.NegativePrecision, 3); - Assert.Equal(0.7419, metrics.NegativeRecall, 3); - Assert.Equal(0.8612, metrics.PositivePrecision, 3); - Assert.Equal(0.9490, metrics.PositiveRecall, 3); + Assert.Equal(.7222, metrics.Accuracy, 4); + Assert.Equal(.9643, metrics.Auc, 1); + Assert.Equal(.96, metrics.Auprc, 2); + Assert.Equal(1, metrics.Entropy, 3); + Assert.Equal(.7826, metrics.F1Score, 4); + Assert.Equal(.812, metrics.LogLoss, 3); + Assert.Equal(18.831, metrics.LogLossReduction, 3); + Assert.Equal(1, metrics.NegativePrecision, 3); + Assert.Equal(.444, metrics.NegativeRecall, 3); + Assert.Equal(.643, metrics.PositivePrecision, 3); + Assert.Equal(1, metrics.PositiveRecall); ConfusionMatrix matrix = metrics.ConfusionMatrix; Assert.Equal(2, matrix.Order); @@ -132,15 +131,15 @@ public void TrainAndPredictSentimentModelTest() Assert.Equal("positive", matrix.ClassNames[0]); Assert.Equal("negative", matrix.ClassNames[1]); - Assert.Equal(149, matrix[0, 0]); - Assert.Equal(149, matrix["positive", "positive"]); - Assert.Equal(8, matrix[0, 1]); - Assert.Equal(8, matrix["positive", "negative"]); + Assert.Equal(9, matrix[0, 0]); + Assert.Equal(9, matrix["positive", "positive"]); + Assert.Equal(0, matrix[0, 1]); + Assert.Equal(0, matrix["positive", "negative"]); - Assert.Equal(24, matrix[1, 0]); - Assert.Equal(24, matrix["negative", "positive"]); - Assert.Equal(69, matrix[1, 1]); - Assert.Equal(69, matrix["negative", "negative"]); + Assert.Equal(5, matrix[1, 0]); + Assert.Equal(5, matrix["negative", "positive"]); + Assert.Equal(4, matrix[1, 1]); + Assert.Equal(4, matrix["negative", "negative"]); } public class SentimentData From 51d56589823f429ba46d18a99491a67f45df3a9d Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Sun, 6 May 2018 17:24:02 -0700 Subject: [PATCH 04/17] cleanup. --- src/Microsoft.ML/CSharpApi.cs | 416 +++++++++++------------ test/Microsoft.ML.Tests/CSharpCodeGen.cs | 3 +- 2 files changed, 209 insertions(+), 210 deletions(-) diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index e9e527c7fd..2402ecf95a 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -1480,23 +1480,23 @@ public TextLoader(string filePath) { _inputFilePath = filePath; } - + public void SetInput(IHostEnvironment env, Experiment experiment) { IFileHandle inputFile = new SimpleFileHandle(env, _inputFilePath, false, false); experiment.SetInput(InputFile, inputFile); } - + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { Contracts.Assert(previousStep == null); - + return new TextLoaderPipelineStep(experiment.Add(this)); } - + private class TextLoaderPipelineStep : ILearningPipelineDataStep { - public TextLoaderPipelineStep (Output output) + public TextLoaderPipelineStep(Output output) { Data = output.Data; Model = null; @@ -3178,13 +3178,13 @@ public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Ru /// /// Learning rate /// - [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{0.01f, 0.1f, 0.5f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[] { 0.01f, 0.1f, 0.5f, 1f })] public float LearningRate { get; set; } = 1f; /// /// Decrease learning rate /// - [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[] { false, true })] public bool DecreaseLearningRate { get; set; } = false; /// @@ -3226,7 +3226,7 @@ public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Ru /// /// Number of iterations /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] public int NumIterations { get; set; } = 1; /// @@ -3237,13 +3237,13 @@ public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Ru /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] public float InitWtsDiameter { get; set; } /// /// Whether to shuffle for each training iteration /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle { get; set; } = true; /// @@ -3327,25 +3327,25 @@ public sealed partial class BinaryLogisticRegressor : Microsoft.ML.Runtime.Entry /// /// L2 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] + [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps: 4)] public float L2Weight { get; set; } = 1f; /// /// L1 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] + [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps: 4)] public float L1Weight { get; set; } = 1f; /// /// Tolerance parameter for optimization convergence. Lower = slower, more accurate /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] + [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[] { 0.0001f, 1E-07f })] public float OptTol { get; set; } = 1E-07f; /// /// Memory size for L-BFGS. Lower=faster, less accurate /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[] { 5, 20, 50 })] public int MemorySize { get; set; } = 20; /// @@ -3367,7 +3367,7 @@ public sealed partial class BinaryLogisticRegressor : Microsoft.ML.Runtime.Entry /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] public float InitWtsDiameter { get; set; } /// @@ -3383,7 +3383,7 @@ public sealed partial class BinaryLogisticRegressor : Microsoft.ML.Runtime.Entry /// /// Force densification of the internal optimization vectors /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[] { false, true })] public bool DenseOptimizer { get; set; } = false; /// @@ -3606,19 +3606,19 @@ public sealed partial class FastForestBinaryClassifier : Microsoft.ML.Runtime.En /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -3883,19 +3883,19 @@ public sealed partial class FastForestRegressor : Microsoft.ML.Runtime.EntryPoin /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -4107,19 +4107,19 @@ public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.Entr /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] public double DropoutRate { get; set; } /// @@ -4276,19 +4276,19 @@ public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.Entr /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -4528,19 +4528,19 @@ public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.Co /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] public double DropoutRate { get; set; } /// @@ -4697,19 +4697,19 @@ public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.Co /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -4909,19 +4909,19 @@ public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] public double DropoutRate { get; set; } /// @@ -5078,19 +5078,19 @@ public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -5295,19 +5295,19 @@ public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.Runtime.Entr /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] public double DropoutRate { get; set; } /// @@ -5464,19 +5464,19 @@ public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.Runtime.Entr /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -5646,7 +5646,7 @@ public sealed partial class GeneralizedAdditiveModelBinaryClassifier : Microsoft /// /// Total number of iterations over all features /// - [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[]{200, 1500, 9500})] + [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[] { 200, 1500, 9500 })] public int NumIterations { get; set; } = 9500; /// @@ -5657,7 +5657,7 @@ public sealed partial class GeneralizedAdditiveModelBinaryClassifier : Microsoft /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale: true)] public double LearningRates { get; set; } = 0.002d; /// @@ -5688,7 +5688,7 @@ public sealed partial class GeneralizedAdditiveModelBinaryClassifier : Microsoft /// /// Minimum number of training instances required to form a partition /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[] { 1, 10, 50 })] public int MinDocuments { get; set; } = 10; /// @@ -5782,7 +5782,7 @@ public sealed partial class GeneralizedAdditiveModelRegressor : Microsoft.ML.Run /// /// Total number of iterations over all features /// - [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[]{200, 1500, 9500})] + [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[] { 200, 1500, 9500 })] public int NumIterations { get; set; } = 9500; /// @@ -5793,7 +5793,7 @@ public sealed partial class GeneralizedAdditiveModelRegressor : Microsoft.ML.Run /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale: true)] public double LearningRates { get; set; } = 0.002d; /// @@ -5824,7 +5824,7 @@ public sealed partial class GeneralizedAdditiveModelRegressor : Microsoft.ML.Run /// /// Minimum number of training instances required to form a partition /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[] { 1, 10, 50 })] public int MinDocuments { get; set; } = 10; /// @@ -5908,7 +5908,7 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Regularizer constant /// - [TlcModule.SweepableFloatParamAttribute("Lambda", 1E-05f, 0.1f, stepSize:10, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Lambda", 1E-05f, 0.1f, stepSize: 10, isLogScale: true)] public float Lambda { get; set; } = 0.001f; /// @@ -5919,13 +5919,13 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Perform projection to unit-ball? Typically used with batch size > 1. /// - [TlcModule.SweepableDiscreteParamAttribute("PerformProjection", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("PerformProjection", new object[] { false, true })] public bool PerformProjection { get; set; } = false; /// /// No bias /// - [TlcModule.SweepableDiscreteParamAttribute("NoBias", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("NoBias", new object[] { false, true })] public bool NoBias { get; set; } = false; /// @@ -5942,7 +5942,7 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Number of iterations /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] public int NumIterations { get; set; } = 1; /// @@ -5953,13 +5953,13 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] public float InitWtsDiameter { get; set; } /// /// Whether to shuffle for each training iteration /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle { get; set; } = true; /// @@ -6043,25 +6043,25 @@ public sealed partial class LogisticRegressor : Microsoft.ML.Runtime.EntryPoints /// /// L2 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] + [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps: 4)] public float L2Weight { get; set; } = 1f; /// /// L1 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] + [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps: 4)] public float L1Weight { get; set; } = 1f; /// /// Tolerance parameter for optimization convergence. Lower = slower, more accurate /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] + [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[] { 0.0001f, 1E-07f })] public float OptTol { get; set; } = 1E-07f; /// /// Memory size for L-BFGS. Lower=faster, less accurate /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[] { 5, 20, 50 })] public int MemorySize { get; set; } = 20; /// @@ -6083,7 +6083,7 @@ public sealed partial class LogisticRegressor : Microsoft.ML.Runtime.EntryPoints /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] public float InitWtsDiameter { get; set; } /// @@ -6099,7 +6099,7 @@ public sealed partial class LogisticRegressor : Microsoft.ML.Runtime.EntryPoints /// /// Force densification of the internal optimization vectors /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[] { false, true })] public bool DenseOptimizer { get; set; } = false; /// @@ -6257,13 +6257,13 @@ public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtim /// /// Learning rate /// - [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{0.01f, 0.1f, 0.5f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[] { 0.01f, 0.1f, 0.5f, 1f })] public float LearningRate { get; set; } = 0.1f; /// /// Decrease learning rate /// - [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[] { false, true })] public bool DecreaseLearningRate { get; set; } = true; /// @@ -6305,7 +6305,7 @@ public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtim /// /// Number of iterations /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] public int NumIterations { get; set; } = 1; /// @@ -6316,13 +6316,13 @@ public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtim /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] public float InitWtsDiameter { get; set; } /// /// Whether to shuffle for each training iteration /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle { get; set; } = true; /// @@ -6401,7 +6401,7 @@ public sealed partial class OrdinaryLeastSquaresRegressor : Microsoft.ML.Runtime /// /// L2 regularization weight /// - [TlcModule.SweepableDiscreteParamAttribute("L2Weight", new object[]{1E-06f, 0.1f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("L2Weight", new object[] { 1E-06f, 0.1f, 1f })] public float L2Weight { get; set; } = 1E-06f; /// @@ -6485,25 +6485,25 @@ public sealed partial class PoissonRegressor : Microsoft.ML.Runtime.EntryPoints. /// /// L2 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] + [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps: 4)] public float L2Weight { get; set; } = 1f; /// /// L1 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] + [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps: 4)] public float L1Weight { get; set; } = 1f; /// /// Tolerance parameter for optimization convergence. Lower = slower, more accurate /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] + [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[] { 0.0001f, 1E-07f })] public float OptTol { get; set; } = 1E-07f; /// /// Memory size for L-BFGS. Lower=faster, less accurate /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[] { 5, 20, 50 })] public int MemorySize { get; set; } = 20; /// @@ -6525,7 +6525,7 @@ public sealed partial class PoissonRegressor : Microsoft.ML.Runtime.EntryPoints. /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] public float InitWtsDiameter { get; set; } /// @@ -6541,7 +6541,7 @@ public sealed partial class PoissonRegressor : Microsoft.ML.Runtime.EntryPoints. /// /// Force densification of the internal optimization vectors /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[] { false, true })] public bool DenseOptimizer { get; set; } = false; /// @@ -6647,13 +6647,13 @@ public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Mic /// /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] + [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[] { "", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f })] public float? L2Const { get; set; } /// /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[] { "", 0f, 0.25f, 0.5f, 0.75f, 1f })] public float? L1Threshold { get; set; } /// @@ -6664,19 +6664,19 @@ public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Mic /// /// The tolerance for the ratio between duality gap and primal loss for convergence checking. /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[] { 0.001f, 0.01f, 0.1f, 0.2f })] public float ConvergenceTolerance { get; set; } = 0.1f; /// /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] + [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[] { "", 10, 20, 100 })] public int? MaxIterations { get; set; } /// /// Shuffle data every epoch? /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle { get; set; } = true; /// @@ -6687,7 +6687,7 @@ public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Mic /// /// The learning rate for adjusting bias from being regularized. /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[] { 0f, 0.01f, 0.1f, 1f })] public float BiasLearningRate { get; set; } /// @@ -6767,13 +6767,13 @@ public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft /// /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] + [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[] { "", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f })] public float? L2Const { get; set; } /// /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[] { "", 0f, 0.25f, 0.5f, 0.75f, 1f })] public float? L1Threshold { get; set; } /// @@ -6784,19 +6784,19 @@ public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft /// /// The tolerance for the ratio between duality gap and primal loss for convergence checking. /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[] { 0.001f, 0.01f, 0.1f, 0.2f })] public float ConvergenceTolerance { get; set; } = 0.1f; /// /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] + [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[] { "", 10, 20, 100 })] public int? MaxIterations { get; set; } /// /// Shuffle data every epoch? /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle { get; set; } = true; /// @@ -6807,7 +6807,7 @@ public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft /// /// The learning rate for adjusting bias from being regularized. /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[] { 0f, 0.01f, 0.1f, 1f })] public float BiasLearningRate { get; set; } /// @@ -6887,13 +6887,13 @@ public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft. /// /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] + [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[] { "", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f })] public float? L2Const { get; set; } /// /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[] { "", 0f, 0.25f, 0.5f, 0.75f, 1f })] public float? L1Threshold { get; set; } /// @@ -6904,19 +6904,19 @@ public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft. /// /// The tolerance for the ratio between duality gap and primal loss for convergence checking. /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[] { 0.001f, 0.01f, 0.1f, 0.2f })] public float ConvergenceTolerance { get; set; } = 0.01f; /// /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] + [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[] { "", 10, 20, 100 })] public int? MaxIterations { get; set; } /// /// Shuffle data every epoch? /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle { get; set; } = true; /// @@ -6927,7 +6927,7 @@ public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft. /// /// The learning rate for adjusting bias from being regularized. /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] + [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[] { 0f, 0.01f, 0.1f, 1f })] public float BiasLearningRate { get; set; } = 1f; /// @@ -7007,7 +7007,7 @@ public sealed partial class StochasticGradientDescentBinaryClassifier : Microsof /// /// L2 regularizer constant /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{1E-07f, 5E-07f, 1E-06f, 5E-06f, 1E-05f})] + [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[] { 1E-07f, 5E-07f, 1E-06f, 5E-06f, 1E-05f })] public float L2Const { get; set; } = 1E-06f; /// @@ -7018,13 +7018,13 @@ public sealed partial class StochasticGradientDescentBinaryClassifier : Microsof /// /// Exponential moving averaged improvement tolerance for convergence /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.01f, 0.001f, 0.0001f, 1E-05f})] + [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[] { 0.01f, 0.001f, 0.0001f, 1E-05f })] public double ConvergenceTolerance { get; set; } = 0.0001d; /// /// Maximum number of iterations; set to 1 to simulate online learning. /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{1, 5, 10, 20})] + [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[] { 1, 5, 10, 20 })] public int MaxIterations { get; set; } = 20; /// @@ -7035,7 +7035,7 @@ public sealed partial class StochasticGradientDescentBinaryClassifier : Microsof /// /// Shuffle data every epoch? /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle { get; set; } = true; /// @@ -7298,7 +7298,7 @@ public sealed partial class BinNormalizer : Microsoft.ML.Runtime.EntryPoints.Com public BinNormalizer() { } - + public BinNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -7309,7 +7309,7 @@ public BinNormalizer(params string[] inputColumns) } } } - + public BinNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -7320,7 +7320,7 @@ public BinNormalizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -7460,7 +7460,7 @@ public sealed partial class CategoricalHashOneHotVectorizer : Microsoft.ML.Runti public CategoricalHashOneHotVectorizer() { } - + public CategoricalHashOneHotVectorizer(params string[] inputColumns) { if (inputColumns != null) @@ -7471,7 +7471,7 @@ public CategoricalHashOneHotVectorizer(params string[] inputColumns) } } } - + public CategoricalHashOneHotVectorizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -7482,7 +7482,7 @@ public CategoricalHashOneHotVectorizer(params ValueTuple[] input } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -7630,7 +7630,7 @@ public sealed partial class CategoricalOneHotVectorizer : Microsoft.ML.Runtime.E public CategoricalOneHotVectorizer() { } - + public CategoricalOneHotVectorizer(params string[] inputColumns) { if (inputColumns != null) @@ -7641,7 +7641,7 @@ public CategoricalOneHotVectorizer(params string[] inputColumns) } } } - + public CategoricalOneHotVectorizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -7652,7 +7652,7 @@ public CategoricalOneHotVectorizer(params ValueTuple[] inputOutp } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -7769,7 +7769,7 @@ public sealed partial class CharacterTokenizer : Microsoft.ML.Runtime.EntryPoint public CharacterTokenizer() { } - + public CharacterTokenizer(params string[] inputColumns) { if (inputColumns != null) @@ -7780,7 +7780,7 @@ public CharacterTokenizer(params string[] inputColumns) } } } - + public CharacterTokenizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -7791,7 +7791,7 @@ public CharacterTokenizer(params ValueTuple[] inputOutputColumns } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -7888,12 +7888,12 @@ public sealed partial class ColumnConcatenator : Microsoft.ML.Runtime.EntryPoint public ColumnConcatenator() { } - + public ColumnConcatenator(string outputColumn, params string[] inputColumns) { AddColumn(outputColumn, inputColumns); } - + public void AddColumn(string name, params string[] source) { var list = Column == null ? new List() : new List(Column); @@ -7978,7 +7978,7 @@ public sealed partial class ColumnCopier : Microsoft.ML.Runtime.EntryPoints.Comm public ColumnCopier() { } - + public ColumnCopier(params string[] inputColumns) { if (inputColumns != null) @@ -7989,7 +7989,7 @@ public ColumnCopier(params string[] inputColumns) } } } - + public ColumnCopier(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -8000,7 +8000,7 @@ public ColumnCopier(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -8250,7 +8250,7 @@ public sealed partial class ColumnTypeConverter : Microsoft.ML.Runtime.EntryPoin public ColumnTypeConverter() { } - + public ColumnTypeConverter(params string[] inputColumns) { if (inputColumns != null) @@ -8261,7 +8261,7 @@ public ColumnTypeConverter(params string[] inputColumns) } } } - + public ColumnTypeConverter(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -8272,7 +8272,7 @@ public ColumnTypeConverter(params ValueTuple[] inputOutputColumn } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -8449,7 +8449,7 @@ public sealed partial class ConditionalNormalizer : Microsoft.ML.Runtime.EntryPo public ConditionalNormalizer() { } - + public ConditionalNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -8460,7 +8460,7 @@ public ConditionalNormalizer(params string[] inputColumns) } } } - + public ConditionalNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -8471,7 +8471,7 @@ public ConditionalNormalizer(params ValueTuple[] inputOutputColu } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -8732,7 +8732,7 @@ public sealed partial class Dictionarizer : Microsoft.ML.Runtime.EntryPoints.Com public Dictionarizer() { } - + public Dictionarizer(params string[] inputColumns) { if (inputColumns != null) @@ -8743,7 +8743,7 @@ public Dictionarizer(params string[] inputColumns) } } } - + public Dictionarizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -8754,7 +8754,7 @@ public Dictionarizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9081,7 +9081,7 @@ public sealed partial class GlobalContrastNormalizer : Microsoft.ML.Runtime.Entr public GlobalContrastNormalizer() { } - + public GlobalContrastNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -9092,7 +9092,7 @@ public GlobalContrastNormalizer(params string[] inputColumns) } } } - + public GlobalContrastNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9103,7 +9103,7 @@ public GlobalContrastNormalizer(params ValueTuple[] inputOutputC } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9235,7 +9235,7 @@ public sealed partial class HashConverter : Microsoft.ML.Runtime.EntryPoints.Com public HashConverter() { } - + public HashConverter(params string[] inputColumns) { if (inputColumns != null) @@ -9246,7 +9246,7 @@ public HashConverter(params string[] inputColumns) } } } - + public HashConverter(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9257,7 +9257,7 @@ public HashConverter(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9369,7 +9369,7 @@ public sealed partial class KeyToTextConverter : Microsoft.ML.Runtime.EntryPoint public KeyToTextConverter() { } - + public KeyToTextConverter(params string[] inputColumns) { if (inputColumns != null) @@ -9380,7 +9380,7 @@ public KeyToTextConverter(params string[] inputColumns) } } } - + public KeyToTextConverter(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9391,7 +9391,7 @@ public KeyToTextConverter(params ValueTuple[] inputOutputColumns } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9553,7 +9553,7 @@ public sealed partial class LabelIndicator : Microsoft.ML.Runtime.EntryPoints.Co public LabelIndicator() { } - + public LabelIndicator(params string[] inputColumns) { if (inputColumns != null) @@ -9564,7 +9564,7 @@ public LabelIndicator(params string[] inputColumns) } } } - + public LabelIndicator(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9575,7 +9575,7 @@ public LabelIndicator(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9737,7 +9737,7 @@ public sealed partial class LogMeanVarianceNormalizer : Microsoft.ML.Runtime.Ent public LogMeanVarianceNormalizer() { } - + public LogMeanVarianceNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -9748,7 +9748,7 @@ public LogMeanVarianceNormalizer(params string[] inputColumns) } } } - + public LogMeanVarianceNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9759,7 +9759,7 @@ public LogMeanVarianceNormalizer(params ValueTuple[] inputOutput } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9879,7 +9879,7 @@ public sealed partial class LpNormalizer : Microsoft.ML.Runtime.EntryPoints.Comm public LpNormalizer() { } - + public LpNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -9890,7 +9890,7 @@ public LpNormalizer(params string[] inputColumns) } } } - + public LpNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9901,7 +9901,7 @@ public LpNormalizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10021,7 +10021,7 @@ public sealed partial class MeanVarianceNormalizer : Microsoft.ML.Runtime.EntryP public MeanVarianceNormalizer() { } - + public MeanVarianceNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -10032,7 +10032,7 @@ public MeanVarianceNormalizer(params string[] inputColumns) } } } - + public MeanVarianceNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10043,7 +10043,7 @@ public MeanVarianceNormalizer(params ValueTuple[] inputOutputCol } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10136,7 +10136,7 @@ public sealed partial class MinMaxNormalizer : Microsoft.ML.Runtime.EntryPoints. public MinMaxNormalizer() { } - + public MinMaxNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -10147,7 +10147,7 @@ public MinMaxNormalizer(params string[] inputColumns) } } } - + public MinMaxNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10158,7 +10158,7 @@ public MinMaxNormalizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10287,7 +10287,7 @@ public sealed partial class MissingValueHandler : Microsoft.ML.Runtime.EntryPoin public MissingValueHandler() { } - + public MissingValueHandler(params string[] inputColumns) { if (inputColumns != null) @@ -10298,7 +10298,7 @@ public MissingValueHandler(params string[] inputColumns) } } } - + public MissingValueHandler(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10309,7 +10309,7 @@ public MissingValueHandler(params ValueTuple[] inputOutputColumn } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10416,7 +10416,7 @@ public sealed partial class MissingValueIndicator : Microsoft.ML.Runtime.EntryPo public MissingValueIndicator() { } - + public MissingValueIndicator(params string[] inputColumns) { if (inputColumns != null) @@ -10427,7 +10427,7 @@ public MissingValueIndicator(params string[] inputColumns) } } } - + public MissingValueIndicator(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10438,7 +10438,7 @@ public MissingValueIndicator(params ValueTuple[] inputOutputColu } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10530,7 +10530,7 @@ public sealed partial class MissingValuesDropper : Microsoft.ML.Runtime.EntryPoi public MissingValuesDropper() { } - + public MissingValuesDropper(params string[] inputColumns) { if (inputColumns != null) @@ -10541,7 +10541,7 @@ public MissingValuesDropper(params string[] inputColumns) } } } - + public MissingValuesDropper(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10552,7 +10552,7 @@ public MissingValuesDropper(params ValueTuple[] inputOutputColum } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10739,7 +10739,7 @@ public sealed partial class MissingValueSubstitutor : Microsoft.ML.Runtime.Entry public MissingValueSubstitutor() { } - + public MissingValueSubstitutor(params string[] inputColumns) { if (inputColumns != null) @@ -10750,7 +10750,7 @@ public MissingValueSubstitutor(params string[] inputColumns) } } } - + public MissingValueSubstitutor(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10761,7 +10761,7 @@ public MissingValueSubstitutor(params ValueTuple[] inputOutputCo } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10922,7 +10922,7 @@ public sealed partial class NGramTranslator : Microsoft.ML.Runtime.EntryPoints.C public NGramTranslator() { } - + public NGramTranslator(params string[] inputColumns) { if (inputColumns != null) @@ -10933,7 +10933,7 @@ public NGramTranslator(params string[] inputColumns) } } } - + public NGramTranslator(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10944,7 +10944,7 @@ public NGramTranslator(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -11810,7 +11810,7 @@ public sealed partial class SupervisedBinNormalizer : Microsoft.ML.Runtime.Entry public SupervisedBinNormalizer() { } - + public SupervisedBinNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -11821,7 +11821,7 @@ public SupervisedBinNormalizer(params string[] inputColumns) } } } - + public SupervisedBinNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -11832,7 +11832,7 @@ public SupervisedBinNormalizer(params ValueTuple[] inputOutputCo } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -11994,12 +11994,12 @@ public sealed partial class TextFeaturizer : Microsoft.ML.Runtime.EntryPoints.Co public TextFeaturizer() { } - + public TextFeaturizer(string outputColumn, params string[] inputColumns) { AddColumn(outputColumn, inputColumns); } - + public void AddColumn(string name, params string[] source) { Column = ManyToOneColumn.Create(name, source); @@ -12126,7 +12126,7 @@ public sealed partial class TextToKeyConverter : Microsoft.ML.Runtime.EntryPoint public TextToKeyConverter() { } - + public TextToKeyConverter(params string[] inputColumns) { if (inputColumns != null) @@ -12137,7 +12137,7 @@ public TextToKeyConverter(params string[] inputColumns) } } } - + public TextToKeyConverter(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -12148,7 +12148,7 @@ public TextToKeyConverter(params ValueTuple[] inputOutputColumns } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -12409,7 +12409,7 @@ public sealed partial class WordTokenizer : Microsoft.ML.Runtime.EntryPoints.Com public WordTokenizer() { } - + public WordTokenizer(params string[] inputColumns) { if (inputColumns != null) @@ -12420,7 +12420,7 @@ public WordTokenizer(params string[] inputColumns) } } } - + public WordTokenizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -12431,7 +12431,7 @@ public WordTokenizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -12504,7 +12504,7 @@ public WordTokenizerPipelineStep(Output output) namespace Runtime { - public abstract class CalibratorTrainer : ComponentKind {} + public abstract class CalibratorTrainer : ComponentKind { } /// /// @@ -12556,7 +12556,7 @@ public sealed class PlattCalibratorCalibratorTrainer : CalibratorTrainer internal override string ComponentName => "PlattCalibrator"; } - public abstract class ClassificationLossFunction : ComponentKind {} + public abstract class ClassificationLossFunction : ComponentKind { } /// /// Exponential loss. @@ -12613,7 +12613,7 @@ public sealed class SmoothedHingeLossClassificationLossFunction : Classification internal override string ComponentName => "SmoothedHingeLoss"; } - public abstract class EarlyStoppingCriterion : ComponentKind {} + public abstract class EarlyStoppingCriterion : ComponentKind { } /// /// Stop in case of loss of generality. @@ -12707,7 +12707,7 @@ public sealed class UPEarlyStoppingCriterion : EarlyStoppingCriterion internal override string ComponentName => "UP"; } - public abstract class FastTreeTrainer : ComponentKind {} + public abstract class FastTreeTrainer : ComponentKind { } /// /// Uses a logit-boost boosted tree learner to perform binary classification. @@ -12780,19 +12780,19 @@ public sealed class FastTreeBinaryClassificationFastTreeTrainer : FastTreeTraine /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] public double DropoutRate { get; set; } /// @@ -12949,19 +12949,19 @@ public sealed class FastTreeBinaryClassificationFastTreeTrainer : FastTreeTraine /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -13168,19 +13168,19 @@ public sealed class FastTreeRankingFastTreeTrainer : FastTreeTrainer /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] public double DropoutRate { get; set; } /// @@ -13337,19 +13337,19 @@ public sealed class FastTreeRankingFastTreeTrainer : FastTreeTrainer /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -13516,19 +13516,19 @@ public sealed class FastTreeRegressionFastTreeTrainer : FastTreeTrainer /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] public double DropoutRate { get; set; } /// @@ -13685,19 +13685,19 @@ public sealed class FastTreeRegressionFastTreeTrainer : FastTreeTrainer /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -13869,19 +13869,19 @@ public sealed class FastTreeTweedieRegressionFastTreeTrainer : FastTreeTrainer /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] public double DropoutRate { get; set; } /// @@ -14038,19 +14038,19 @@ public sealed class FastTreeTweedieRegressionFastTreeTrainer : FastTreeTrainer /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] public int NumTrees { get; set; } = 100; /// @@ -14151,7 +14151,7 @@ public sealed class FastTreeTweedieRegressionFastTreeTrainer : FastTreeTrainer internal override string ComponentName => "FastTreeTweedieRegression"; } - public abstract class NgramExtractor : ComponentKind {} + public abstract class NgramExtractor : ComponentKind { } /// /// Extracts NGrams from text and convert them to vector using dictionary. @@ -14233,7 +14233,7 @@ public sealed class NGramHashNgramExtractor : NgramExtractor internal override string ComponentName => "NGramHash"; } - public abstract class ParallelTraining : ComponentKind {} + public abstract class ParallelTraining : ComponentKind { } /// /// Single node machine learning process. @@ -14245,7 +14245,7 @@ public sealed class SingleParallelTraining : ParallelTraining internal override string ComponentName => "Single"; } - public abstract class RegressionLossFunction : ComponentKind {} + public abstract class RegressionLossFunction : ComponentKind { } /// /// Poisson loss. @@ -14282,7 +14282,7 @@ public sealed class TweedieLossRegressionLossFunction : RegressionLossFunction internal override string ComponentName => "TweedieLoss"; } - public abstract class SDCAClassificationLossFunction : ComponentKind {} + public abstract class SDCAClassificationLossFunction : ComponentKind { } /// /// Hinge loss. @@ -14324,7 +14324,7 @@ public sealed class SmoothedHingeLossSDCAClassificationLossFunction : SDCAClassi internal override string ComponentName => "SmoothedHingeLoss"; } - public abstract class SDCARegressionLossFunction : ComponentKind {} + public abstract class SDCARegressionLossFunction : ComponentKind { } /// /// Squared loss. @@ -14336,7 +14336,7 @@ public sealed class SquaredLossSDCARegressionLossFunction : SDCARegressionLossFu internal override string ComponentName => "SquaredLoss"; } - public abstract class StopWordsRemover : ComponentKind {} + public abstract class StopWordsRemover : ComponentKind { } /// /// Remover with list of stopwords specified by the user. diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs index 316d7eab55..c647110702 100644 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ b/test/Microsoft.ML.Tests/CSharpCodeGen.cs @@ -15,8 +15,7 @@ public CSharpCodeGen(ITestOutputHelper output) : base(output) { } - //[Fact(Skip = "Temporary solution(Windows ONLY) to regenerate codegenerated CSharpAPI.cs")] - [Fact] + [Fact(Skip = "Temporary solution(Windows ONLY) to regenerate codegenerated CSharpAPI.cs")] public void GenerateCSharpAPI() { var cSharpAPIPath = Path.Combine(RootDir, @"src\\Microsoft.ML\\CSharpApi.cs"); From c5dd6140243a08d3cf1c339d5c37c4c147892663 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Sun, 13 May 2018 20:54:44 -0700 Subject: [PATCH 05/17] resolve conflits from upstream/master merge. --- src/Microsoft.ML/LearningPipeline.cs | 2 +- .../Scenarios/IrisPlantClassificationWithStringLabelTests.cs | 1 + test/Microsoft.ML.Tests/TextLoaderTests.cs | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs index 16599ff96e..ef3ab20c13 100644 --- a/src/Microsoft.ML/LearningPipeline.cs +++ b/src/Microsoft.ML/LearningPipeline.cs @@ -68,7 +68,7 @@ public LearningPipeline() /// Possible data loader(s), transforms and trainers options are /// /// Data Loader: - /// + /// /// etc. /// /// diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 79cc2fc137..e5ec5cea5d 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Trainers; diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index 41bc656672..625e4d9a5d 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -224,7 +224,7 @@ public void CanSuccessfullyTrimSpaces() [Fact] public void ThrowsExceptionWithPropertyName() { - Exception ex = Assert.Throws( () => new TextLoader("fakefile.txt") ); + Exception ex = Assert.Throws( () => new TextLoader("fakefile.txt") ); Assert.StartsWith("String1 is missing ColumnAttribute", ex.Message); } From a61d244d885c8d7e756c0e0650e7016e0c126cf3 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 14 May 2018 01:39:46 -0700 Subject: [PATCH 06/17] Feedback from PR#38 --- .../Common/EntryPoints/core_manifest.json | 10 +- src/Microsoft.ML.Core/Data/DataKind.cs | 2 +- .../EntryPoints/ModuleCatalog.cs | 10 +- .../DataLoadSave/Text/TextLoader.cs | 41 +- .../EntryPoints/InputBase.cs | 1 - .../Microsoft.ML.PipelineInference.csproj | 4 + src/Microsoft.ML/CSharpApi.cs | 421 +++++++++--------- src/Microsoft.ML/Data/TextLoader.cs | 37 +- .../Runtime/EntryPoints/ImportTextData.cs | 2 + .../Internal/Tools/CSharpApiGenerator.cs | 12 +- .../UnitTests/TestCSharpApi.cs | 10 +- .../UnitTests/TestEntryPoints.cs | 191 +++++++- .../Microsoft.ML.Predictor.Tests.csproj | 1 + .../Microsoft.ML.TestFramework/ModelHelper.cs | 180 +++++++- .../HousePriceTrainAndPredictionTests.cs | 4 +- .../Scenarios/IrisPlantClassificationTests.cs | 4 +- ...PlantClassificationWithStringLabelTests.cs | 4 +- .../Scenarios/SentimentPredictionTests.cs | 4 +- test/Microsoft.ML.Tests/TextLoaderTests.cs | 16 +- 19 files changed, 655 insertions(+), 299 deletions(-) diff --git a/ZBaselines/Common/EntryPoints/core_manifest.json b/ZBaselines/Common/EntryPoints/core_manifest.json index 1071227c0f..d2bcf5c58e 100644 --- a/ZBaselines/Common/EntryPoints/core_manifest.json +++ b/ZBaselines/Common/EntryPoints/core_manifest.json @@ -384,16 +384,16 @@ "Default": null }, { - "Name": "Separator", - "Type": "String", - "Desc": "Source column separator. Options: tab, space, comma, single character", + "Name": "Delimiter", + "Type": "Char", + "Desc": "Source column separator. Option: single character ONLY", "Aliases": [ - "sep" + "del" ], "Required": false, "SortOrder": 150.0, "IsNullable": false, - "Default": "tab" + "Default": "\t" }, { "Name": "TrimWhitespace", diff --git a/src/Microsoft.ML.Core/Data/DataKind.cs b/src/Microsoft.ML.Core/Data/DataKind.cs index 1c043b070e..557358eed7 100644 --- a/src/Microsoft.ML.Core/Data/DataKind.cs +++ b/src/Microsoft.ML.Core/Data/DataKind.cs @@ -205,7 +205,7 @@ public static bool TryGetDataKind(this Type type, out DataKind kind) kind = DataKind.R4; else if (type == typeof(Double)) kind = DataKind.R8; - else if (type == typeof(DvText)) + else if (type == typeof(DvText) || type == typeof(string)) kind = DataKind.TX; else if (type == typeof(DvBool) || type == typeof(bool)) kind = DataKind.BL; diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs index ac1698e520..75c4b59046 100644 --- a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs +++ b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs @@ -50,8 +50,10 @@ public sealed class EntryPointInfo public readonly Type OutputType; public readonly Type[] InputKinds; public readonly Type[] OutputKinds; + public readonly ObsoleteAttribute ObsoleteAttribute; - internal EntryPointInfo(IExceptionContext ectx, MethodInfo method, TlcModule.EntryPointAttribute attribute) + internal EntryPointInfo(IExceptionContext ectx, MethodInfo method, + TlcModule.EntryPointAttribute attribute, ObsoleteAttribute obsoleteAttribute) { Contracts.AssertValueOrNull(ectx); ectx.AssertValue(method); @@ -63,6 +65,7 @@ internal EntryPointInfo(IExceptionContext ectx, MethodInfo method, TlcModule.Ent Method = method; ShortName = attribute.ShortName; FriendlyName = attribute.UserName; + ObsoleteAttribute = obsoleteAttribute; // There are supposed to be 2 parameters, env and input for non-macro nodes. // Macro nodes have a 3rd parameter, the entry point node. @@ -185,7 +188,10 @@ private ModuleCatalog(IExceptionContext ectx) var attr = methodInfo.GetCustomAttributes(typeof(TlcModule.EntryPointAttribute), false).FirstOrDefault() as TlcModule.EntryPointAttribute; if (attr == null) continue; - var info = new EntryPointInfo(ectx, methodInfo, attr); + + var info = new EntryPointInfo(ectx, methodInfo, attr, + methodInfo.GetCustomAttributes(typeof(ObsoleteAttribute), false).FirstOrDefault() as ObsoleteAttribute); + entryPoints.Add(info); if (_entryPointMap.ContainsKey(info.Name)) { diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 40519b39fd..cc27594a0f 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -294,9 +294,12 @@ public class ArgumentsCore ShortName = "size")] public int? InputSize; - [Argument(ArgumentType.AtMostOnce, HelpText = "Source column separator. Options: tab, space, comma, single character", ShortName = "sep")] + [Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Source column separator. Options: tab, space, comma, single character", ShortName = "sep")] public string Separator = "tab"; + [Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly, HelpText = "Source column separator. Option: single character ONLY", ShortName = "del")] + public char Delimiter = '\t'; + [Argument(ArgumentType.Multiple, HelpText = "Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40", ShortName = "col", SortOrder = 1)] public Column[] Column; @@ -317,7 +320,7 @@ public bool IsValid() } } - public class Arguments : ArgumentsCore + public sealed class Arguments : ArgumentsCore { [Argument(ArgumentType.AtMostOnce, HelpText = "Use separate parsing threads?", ShortName = "threads", Hide = true)] public bool UseThreads = true; @@ -1005,26 +1008,32 @@ public TextLoader(IHostEnvironment env, Arguments args, IMultiStreamSource files _inputSize = SrcLim - 1; _host.CheckNonEmpty(args.Separator, nameof(args.Separator), "Must specify a separator"); - string sep = args.Separator.ToLowerInvariant(); + _host.CheckNonEmpty(args.Delimiter.ToString(), nameof(args.Delimiter), "Must specify a delimeter"); - if (sep == ",") - _separators = new char[] { ',' }; + if (args.Delimiter != '\t') + _separators = new char[] { args.Delimiter }; else { - var separators = new HashSet(); - foreach (string s in sep.Split(',')) + string sep = args.Separator.ToLowerInvariant(); + if (sep == ",") + _separators = new char[] { ',' }; + else { - if (string.IsNullOrEmpty(s)) - continue; + var separators = new HashSet(); + foreach (string s in sep.Split(',')) + { + if (string.IsNullOrEmpty(s)) + continue; - char c = NormalizeSeparator(s); - separators.Add(c); - } - _separators = separators.ToArray(); + char c = NormalizeSeparator(s); + separators.Add(c); + } + _separators = separators.ToArray(); - // Handling ",,,," case, that .Split() returns empty strings. - if (_separators.Length == 0) - _separators = new char[] { ',' }; + // Handling ",,,," case, that .Split() returns empty strings. + if (_separators.Length == 0) + _separators = new char[] { ',' }; + } } _bindings = new Bindings(this, cols, headerFile); diff --git a/src/Microsoft.ML.Data/EntryPoints/InputBase.cs b/src/Microsoft.ML.Data/EntryPoints/InputBase.cs index a787d43cb9..57a7c9120f 100644 --- a/src/Microsoft.ML.Data/EntryPoints/InputBase.cs +++ b/src/Microsoft.ML.Data/EntryPoints/InputBase.cs @@ -191,7 +191,6 @@ public static TOut Train(IHost host, TArg input, /// public static class CommonInputs { - /// /// Interface that all API transform input classes will implement. /// diff --git a/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj b/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj index 7cf9585f3b..0aa71597e0 100644 --- a/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj +++ b/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj @@ -7,6 +7,10 @@ CORECLR + + ;1591;0618 + + diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 84bec9069e..f6f78ee897 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -1254,6 +1254,7 @@ namespace Data /// /// Import a dataset from a text file /// + [Obsolete("Use TextLoader instead.")] public sealed partial class CustomTextLoader { @@ -1370,9 +1371,9 @@ public sealed class TextLoaderArguments public int? InputSize { get; set; } /// - /// Source column separator. Options: tab, space, comma, single character + /// Source column separator. Option: single character ONLY /// - public string Separator { get; set; } = "tab"; + public char Delimiter { get; set; } = ' '; /// /// Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40 @@ -1480,23 +1481,23 @@ public TextLoader(string filePath) { _inputFilePath = filePath; } - + public void SetInput(IHostEnvironment env, Experiment experiment) { IFileHandle inputFile = new SimpleFileHandle(env, _inputFilePath, false, false); experiment.SetInput(InputFile, inputFile); } - + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { Contracts.Assert(previousStep == null); - + return new TextLoaderPipelineStep(experiment.Add(this)); } - + private class TextLoaderPipelineStep : ILearningPipelineDataStep { - public TextLoaderPipelineStep(Output output) + public TextLoaderPipelineStep (Output output) { Data = output.Data; Model = null; @@ -3178,13 +3179,13 @@ public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Ru /// /// Learning rate /// - [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[] { 0.01f, 0.1f, 0.5f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{0.01f, 0.1f, 0.5f, 1f})] public float LearningRate { get; set; } = 1f; /// /// Decrease learning rate /// - [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[]{false, true})] public bool DecreaseLearningRate { get; set; } = false; /// @@ -3226,7 +3227,7 @@ public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Ru /// /// Number of iterations /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] public int NumIterations { get; set; } = 1; /// @@ -3237,13 +3238,13 @@ public sealed partial class AveragedPerceptronBinaryClassifier : Microsoft.ML.Ru /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] public float InitWtsDiameter { get; set; } /// /// Whether to shuffle for each training iteration /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] public bool Shuffle { get; set; } = true; /// @@ -3327,25 +3328,25 @@ public sealed partial class BinaryLogisticRegressor : Microsoft.ML.Runtime.Entry /// /// L2 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps: 4)] + [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] public float L2Weight { get; set; } = 1f; /// /// L1 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps: 4)] + [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] public float L1Weight { get; set; } = 1f; /// /// Tolerance parameter for optimization convergence. Lower = slower, more accurate /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[] { 0.0001f, 1E-07f })] + [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] public float OptTol { get; set; } = 1E-07f; /// /// Memory size for L-BFGS. Lower=faster, less accurate /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[] { 5, 20, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] public int MemorySize { get; set; } = 20; /// @@ -3367,7 +3368,7 @@ public sealed partial class BinaryLogisticRegressor : Microsoft.ML.Runtime.Entry /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] public float InitWtsDiameter { get; set; } /// @@ -3383,7 +3384,7 @@ public sealed partial class BinaryLogisticRegressor : Microsoft.ML.Runtime.Entry /// /// Force densification of the internal optimization vectors /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] public bool DenseOptimizer { get; set; } = false; /// @@ -3606,19 +3607,19 @@ public sealed partial class FastForestBinaryClassifier : Microsoft.ML.Runtime.En /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -3883,19 +3884,19 @@ public sealed partial class FastForestRegressor : Microsoft.ML.Runtime.EntryPoin /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -4107,19 +4108,19 @@ public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.Entr /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] public double DropoutRate { get; set; } /// @@ -4276,19 +4277,19 @@ public sealed partial class FastTreeBinaryClassifier : Microsoft.ML.Runtime.Entr /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -4528,19 +4529,19 @@ public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.Co /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] public double DropoutRate { get; set; } /// @@ -4697,19 +4698,19 @@ public sealed partial class FastTreeRanker : Microsoft.ML.Runtime.EntryPoints.Co /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -4909,19 +4910,19 @@ public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] public double DropoutRate { get; set; } /// @@ -5078,19 +5079,19 @@ public sealed partial class FastTreeRegressor : Microsoft.ML.Runtime.EntryPoints /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -5295,19 +5296,19 @@ public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.Runtime.Entr /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] public double DropoutRate { get; set; } /// @@ -5464,19 +5465,19 @@ public sealed partial class FastTreeTweedieRegressor : Microsoft.ML.Runtime.Entr /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -5646,7 +5647,7 @@ public sealed partial class GeneralizedAdditiveModelBinaryClassifier : Microsoft /// /// Total number of iterations over all features /// - [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[] { 200, 1500, 9500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[]{200, 1500, 9500})] public int NumIterations { get; set; } = 9500; /// @@ -5657,7 +5658,7 @@ public sealed partial class GeneralizedAdditiveModelBinaryClassifier : Microsoft /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale:true)] public double LearningRates { get; set; } = 0.002d; /// @@ -5688,7 +5689,7 @@ public sealed partial class GeneralizedAdditiveModelBinaryClassifier : Microsoft /// /// Minimum number of training instances required to form a partition /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[]{1, 10, 50})] public int MinDocuments { get; set; } = 10; /// @@ -5782,7 +5783,7 @@ public sealed partial class GeneralizedAdditiveModelRegressor : Microsoft.ML.Run /// /// Total number of iterations over all features /// - [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[] { 200, 1500, 9500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumIterations", new object[]{200, 1500, 9500})] public int NumIterations { get; set; } = 9500; /// @@ -5793,7 +5794,7 @@ public sealed partial class GeneralizedAdditiveModelRegressor : Microsoft.ML.Run /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.001f, 0.1f, isLogScale:true)] public double LearningRates { get; set; } = 0.002d; /// @@ -5824,7 +5825,7 @@ public sealed partial class GeneralizedAdditiveModelRegressor : Microsoft.ML.Run /// /// Minimum number of training instances required to form a partition /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocuments", new object[]{1, 10, 50})] public int MinDocuments { get; set; } = 10; /// @@ -5908,7 +5909,7 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Regularizer constant /// - [TlcModule.SweepableFloatParamAttribute("Lambda", 1E-05f, 0.1f, stepSize: 10, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Lambda", 1E-05f, 0.1f, stepSize:10, isLogScale:true)] public float Lambda { get; set; } = 0.001f; /// @@ -5919,13 +5920,13 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Perform projection to unit-ball? Typically used with batch size > 1. /// - [TlcModule.SweepableDiscreteParamAttribute("PerformProjection", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("PerformProjection", new object[]{false, true})] public bool PerformProjection { get; set; } = false; /// /// No bias /// - [TlcModule.SweepableDiscreteParamAttribute("NoBias", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("NoBias", new object[]{false, true})] public bool NoBias { get; set; } = false; /// @@ -5942,7 +5943,7 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Number of iterations /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] public int NumIterations { get; set; } = 1; /// @@ -5953,13 +5954,13 @@ public sealed partial class LinearSvmBinaryClassifier : Microsoft.ML.Runtime.Ent /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] public float InitWtsDiameter { get; set; } /// /// Whether to shuffle for each training iteration /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] public bool Shuffle { get; set; } = true; /// @@ -6043,25 +6044,25 @@ public sealed partial class LogisticRegressor : Microsoft.ML.Runtime.EntryPoints /// /// L2 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps: 4)] + [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] public float L2Weight { get; set; } = 1f; /// /// L1 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps: 4)] + [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] public float L1Weight { get; set; } = 1f; /// /// Tolerance parameter for optimization convergence. Lower = slower, more accurate /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[] { 0.0001f, 1E-07f })] + [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] public float OptTol { get; set; } = 1E-07f; /// /// Memory size for L-BFGS. Lower=faster, less accurate /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[] { 5, 20, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] public int MemorySize { get; set; } = 20; /// @@ -6083,7 +6084,7 @@ public sealed partial class LogisticRegressor : Microsoft.ML.Runtime.EntryPoints /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] public float InitWtsDiameter { get; set; } /// @@ -6099,7 +6100,7 @@ public sealed partial class LogisticRegressor : Microsoft.ML.Runtime.EntryPoints /// /// Force densification of the internal optimization vectors /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] public bool DenseOptimizer { get; set; } = false; /// @@ -6257,13 +6258,13 @@ public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtim /// /// Learning rate /// - [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[] { 0.01f, 0.1f, 0.5f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{0.01f, 0.1f, 0.5f, 1f})] public float LearningRate { get; set; } = 0.1f; /// /// Decrease learning rate /// - [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("DecreaseLearningRate", new object[]{false, true})] public bool DecreaseLearningRate { get; set; } = true; /// @@ -6305,7 +6306,7 @@ public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtim /// /// Number of iterations /// - [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize:10, isLogScale:true)] public int NumIterations { get; set; } = 1; /// @@ -6316,13 +6317,13 @@ public sealed partial class OnlineGradientDescentRegressor : Microsoft.ML.Runtim /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] public float InitWtsDiameter { get; set; } /// /// Whether to shuffle for each training iteration /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] public bool Shuffle { get; set; } = true; /// @@ -6401,7 +6402,7 @@ public sealed partial class OrdinaryLeastSquaresRegressor : Microsoft.ML.Runtime /// /// L2 regularization weight /// - [TlcModule.SweepableDiscreteParamAttribute("L2Weight", new object[] { 1E-06f, 0.1f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("L2Weight", new object[]{1E-06f, 0.1f, 1f})] public float L2Weight { get; set; } = 1E-06f; /// @@ -6485,25 +6486,25 @@ public sealed partial class PoissonRegressor : Microsoft.ML.Runtime.EntryPoints. /// /// L2 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps: 4)] + [TlcModule.SweepableFloatParamAttribute("L2Weight", 0f, 1f, numSteps:4)] public float L2Weight { get; set; } = 1f; /// /// L1 regularization weight /// - [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps: 4)] + [TlcModule.SweepableFloatParamAttribute("L1Weight", 0f, 1f, numSteps:4)] public float L1Weight { get; set; } = 1f; /// /// Tolerance parameter for optimization convergence. Lower = slower, more accurate /// - [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[] { 0.0001f, 1E-07f })] + [TlcModule.SweepableDiscreteParamAttribute("OptTol", new object[]{0.0001f, 1E-07f})] public float OptTol { get; set; } = 1E-07f; /// /// Memory size for L-BFGS. Lower=faster, less accurate /// - [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[] { 5, 20, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MemorySize", new object[]{5, 20, 50})] public int MemorySize { get; set; } = 20; /// @@ -6525,7 +6526,7 @@ public sealed partial class PoissonRegressor : Microsoft.ML.Runtime.EntryPoints. /// /// Init weights diameter /// - [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps: 5)] + [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0f, 1f, numSteps:5)] public float InitWtsDiameter { get; set; } /// @@ -6541,7 +6542,7 @@ public sealed partial class PoissonRegressor : Microsoft.ML.Runtime.EntryPoints. /// /// Force densification of the internal optimization vectors /// - [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("DenseOptimizer", new object[]{false, true})] public bool DenseOptimizer { get; set; } = false; /// @@ -6647,13 +6648,13 @@ public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Mic /// /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[] { "", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f })] + [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] public float? L2Const { get; set; } /// /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[] { "", 0f, 0.25f, 0.5f, 0.75f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] public float? L1Threshold { get; set; } /// @@ -6664,19 +6665,19 @@ public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Mic /// /// The tolerance for the ratio between duality gap and primal loss for convergence checking. /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[] { 0.001f, 0.01f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] public float ConvergenceTolerance { get; set; } = 0.1f; /// /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[] { "", 10, 20, 100 })] + [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] public int? MaxIterations { get; set; } /// /// Shuffle data every epoch? /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] public bool Shuffle { get; set; } = true; /// @@ -6687,7 +6688,7 @@ public sealed partial class StochasticDualCoordinateAscentBinaryClassifier : Mic /// /// The learning rate for adjusting bias from being regularized. /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[] { 0f, 0.01f, 0.1f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] public float BiasLearningRate { get; set; } /// @@ -6767,13 +6768,13 @@ public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft /// /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[] { "", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f })] + [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] public float? L2Const { get; set; } /// /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[] { "", 0f, 0.25f, 0.5f, 0.75f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] public float? L1Threshold { get; set; } /// @@ -6784,19 +6785,19 @@ public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft /// /// The tolerance for the ratio between duality gap and primal loss for convergence checking. /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[] { 0.001f, 0.01f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] public float ConvergenceTolerance { get; set; } = 0.1f; /// /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[] { "", 10, 20, 100 })] + [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] public int? MaxIterations { get; set; } /// /// Shuffle data every epoch? /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] public bool Shuffle { get; set; } = true; /// @@ -6807,7 +6808,7 @@ public sealed partial class StochasticDualCoordinateAscentClassifier : Microsoft /// /// The learning rate for adjusting bias from being regularized. /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[] { 0f, 0.01f, 0.1f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] public float BiasLearningRate { get; set; } /// @@ -6887,13 +6888,13 @@ public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft. /// /// L2 regularizer constant. By default the l2 constant is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[] { "", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f })] + [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{"", 1E-07f, 1E-06f, 1E-05f, 0.0001f, 0.001f, 0.01f})] public float? L2Const { get; set; } /// /// L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set. /// - [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[] { "", 0f, 0.25f, 0.5f, 0.75f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("L1Threshold", new object[]{"", 0f, 0.25f, 0.5f, 0.75f, 1f})] public float? L1Threshold { get; set; } /// @@ -6904,19 +6905,19 @@ public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft. /// /// The tolerance for the ratio between duality gap and primal loss for convergence checking. /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[] { 0.001f, 0.01f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.001f, 0.01f, 0.1f, 0.2f})] public float ConvergenceTolerance { get; set; } = 0.01f; /// /// Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic. /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[] { "", 10, 20, 100 })] + [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{"", 10, 20, 100})] public int? MaxIterations { get; set; } /// /// Shuffle data every epoch? /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] public bool Shuffle { get; set; } = true; /// @@ -6927,7 +6928,7 @@ public sealed partial class StochasticDualCoordinateAscentRegressor : Microsoft. /// /// The learning rate for adjusting bias from being regularized. /// - [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[] { 0f, 0.01f, 0.1f, 1f })] + [TlcModule.SweepableDiscreteParamAttribute("BiasLearningRate", new object[]{0f, 0.01f, 0.1f, 1f})] public float BiasLearningRate { get; set; } = 1f; /// @@ -7007,7 +7008,7 @@ public sealed partial class StochasticGradientDescentBinaryClassifier : Microsof /// /// L2 regularizer constant /// - [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[] { 1E-07f, 5E-07f, 1E-06f, 5E-06f, 1E-05f })] + [TlcModule.SweepableDiscreteParamAttribute("L2Const", new object[]{1E-07f, 5E-07f, 1E-06f, 5E-06f, 1E-05f})] public float L2Const { get; set; } = 1E-06f; /// @@ -7018,13 +7019,13 @@ public sealed partial class StochasticGradientDescentBinaryClassifier : Microsof /// /// Exponential moving averaged improvement tolerance for convergence /// - [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[] { 0.01f, 0.001f, 0.0001f, 1E-05f })] + [TlcModule.SweepableDiscreteParamAttribute("ConvergenceTolerance", new object[]{0.01f, 0.001f, 0.0001f, 1E-05f})] public double ConvergenceTolerance { get; set; } = 0.0001d; /// /// Maximum number of iterations; set to 1 to simulate online learning. /// - [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[] { 1, 5, 10, 20 })] + [TlcModule.SweepableDiscreteParamAttribute("MaxIterations", new object[]{1, 5, 10, 20})] public int MaxIterations { get; set; } = 20; /// @@ -7035,7 +7036,7 @@ public sealed partial class StochasticGradientDescentBinaryClassifier : Microsof /// /// Shuffle data every epoch? /// - [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] + [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[]{false, true})] public bool Shuffle { get; set; } = true; /// @@ -7298,7 +7299,7 @@ public sealed partial class BinNormalizer : Microsoft.ML.Runtime.EntryPoints.Com public BinNormalizer() { } - + public BinNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -7309,7 +7310,7 @@ public BinNormalizer(params string[] inputColumns) } } } - + public BinNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -7320,7 +7321,7 @@ public BinNormalizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -7460,7 +7461,7 @@ public sealed partial class CategoricalHashOneHotVectorizer : Microsoft.ML.Runti public CategoricalHashOneHotVectorizer() { } - + public CategoricalHashOneHotVectorizer(params string[] inputColumns) { if (inputColumns != null) @@ -7471,7 +7472,7 @@ public CategoricalHashOneHotVectorizer(params string[] inputColumns) } } } - + public CategoricalHashOneHotVectorizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -7482,7 +7483,7 @@ public CategoricalHashOneHotVectorizer(params ValueTuple[] input } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -7630,7 +7631,7 @@ public sealed partial class CategoricalOneHotVectorizer : Microsoft.ML.Runtime.E public CategoricalOneHotVectorizer() { } - + public CategoricalOneHotVectorizer(params string[] inputColumns) { if (inputColumns != null) @@ -7641,7 +7642,7 @@ public CategoricalOneHotVectorizer(params string[] inputColumns) } } } - + public CategoricalOneHotVectorizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -7652,7 +7653,7 @@ public CategoricalOneHotVectorizer(params ValueTuple[] inputOutp } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -7769,7 +7770,7 @@ public sealed partial class CharacterTokenizer : Microsoft.ML.Runtime.EntryPoint public CharacterTokenizer() { } - + public CharacterTokenizer(params string[] inputColumns) { if (inputColumns != null) @@ -7780,7 +7781,7 @@ public CharacterTokenizer(params string[] inputColumns) } } } - + public CharacterTokenizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -7791,7 +7792,7 @@ public CharacterTokenizer(params ValueTuple[] inputOutputColumns } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -7888,12 +7889,12 @@ public sealed partial class ColumnConcatenator : Microsoft.ML.Runtime.EntryPoint public ColumnConcatenator() { } - + public ColumnConcatenator(string outputColumn, params string[] inputColumns) { AddColumn(outputColumn, inputColumns); } - + public void AddColumn(string name, params string[] source) { var list = Column == null ? new List() : new List(Column); @@ -7978,7 +7979,7 @@ public sealed partial class ColumnCopier : Microsoft.ML.Runtime.EntryPoints.Comm public ColumnCopier() { } - + public ColumnCopier(params string[] inputColumns) { if (inputColumns != null) @@ -7989,7 +7990,7 @@ public ColumnCopier(params string[] inputColumns) } } } - + public ColumnCopier(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -8000,7 +8001,7 @@ public ColumnCopier(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -8250,7 +8251,7 @@ public sealed partial class ColumnTypeConverter : Microsoft.ML.Runtime.EntryPoin public ColumnTypeConverter() { } - + public ColumnTypeConverter(params string[] inputColumns) { if (inputColumns != null) @@ -8261,7 +8262,7 @@ public ColumnTypeConverter(params string[] inputColumns) } } } - + public ColumnTypeConverter(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -8272,7 +8273,7 @@ public ColumnTypeConverter(params ValueTuple[] inputOutputColumn } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -8449,7 +8450,7 @@ public sealed partial class ConditionalNormalizer : Microsoft.ML.Runtime.EntryPo public ConditionalNormalizer() { } - + public ConditionalNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -8460,7 +8461,7 @@ public ConditionalNormalizer(params string[] inputColumns) } } } - + public ConditionalNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -8471,7 +8472,7 @@ public ConditionalNormalizer(params ValueTuple[] inputOutputColu } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -8732,7 +8733,7 @@ public sealed partial class Dictionarizer : Microsoft.ML.Runtime.EntryPoints.Com public Dictionarizer() { } - + public Dictionarizer(params string[] inputColumns) { if (inputColumns != null) @@ -8743,7 +8744,7 @@ public Dictionarizer(params string[] inputColumns) } } } - + public Dictionarizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -8754,7 +8755,7 @@ public Dictionarizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9081,7 +9082,7 @@ public sealed partial class GlobalContrastNormalizer : Microsoft.ML.Runtime.Entr public GlobalContrastNormalizer() { } - + public GlobalContrastNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -9092,7 +9093,7 @@ public GlobalContrastNormalizer(params string[] inputColumns) } } } - + public GlobalContrastNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9103,7 +9104,7 @@ public GlobalContrastNormalizer(params ValueTuple[] inputOutputC } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9235,7 +9236,7 @@ public sealed partial class HashConverter : Microsoft.ML.Runtime.EntryPoints.Com public HashConverter() { } - + public HashConverter(params string[] inputColumns) { if (inputColumns != null) @@ -9246,7 +9247,7 @@ public HashConverter(params string[] inputColumns) } } } - + public HashConverter(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9257,7 +9258,7 @@ public HashConverter(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9369,7 +9370,7 @@ public sealed partial class KeyToTextConverter : Microsoft.ML.Runtime.EntryPoint public KeyToTextConverter() { } - + public KeyToTextConverter(params string[] inputColumns) { if (inputColumns != null) @@ -9380,7 +9381,7 @@ public KeyToTextConverter(params string[] inputColumns) } } } - + public KeyToTextConverter(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9391,7 +9392,7 @@ public KeyToTextConverter(params ValueTuple[] inputOutputColumns } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9553,7 +9554,7 @@ public sealed partial class LabelIndicator : Microsoft.ML.Runtime.EntryPoints.Co public LabelIndicator() { } - + public LabelIndicator(params string[] inputColumns) { if (inputColumns != null) @@ -9564,7 +9565,7 @@ public LabelIndicator(params string[] inputColumns) } } } - + public LabelIndicator(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9575,7 +9576,7 @@ public LabelIndicator(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9737,7 +9738,7 @@ public sealed partial class LogMeanVarianceNormalizer : Microsoft.ML.Runtime.Ent public LogMeanVarianceNormalizer() { } - + public LogMeanVarianceNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -9748,7 +9749,7 @@ public LogMeanVarianceNormalizer(params string[] inputColumns) } } } - + public LogMeanVarianceNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9759,7 +9760,7 @@ public LogMeanVarianceNormalizer(params ValueTuple[] inputOutput } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -9879,7 +9880,7 @@ public sealed partial class LpNormalizer : Microsoft.ML.Runtime.EntryPoints.Comm public LpNormalizer() { } - + public LpNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -9890,7 +9891,7 @@ public LpNormalizer(params string[] inputColumns) } } } - + public LpNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -9901,7 +9902,7 @@ public LpNormalizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10021,7 +10022,7 @@ public sealed partial class MeanVarianceNormalizer : Microsoft.ML.Runtime.EntryP public MeanVarianceNormalizer() { } - + public MeanVarianceNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -10032,7 +10033,7 @@ public MeanVarianceNormalizer(params string[] inputColumns) } } } - + public MeanVarianceNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10043,7 +10044,7 @@ public MeanVarianceNormalizer(params ValueTuple[] inputOutputCol } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10136,7 +10137,7 @@ public sealed partial class MinMaxNormalizer : Microsoft.ML.Runtime.EntryPoints. public MinMaxNormalizer() { } - + public MinMaxNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -10147,7 +10148,7 @@ public MinMaxNormalizer(params string[] inputColumns) } } } - + public MinMaxNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10158,7 +10159,7 @@ public MinMaxNormalizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10287,7 +10288,7 @@ public sealed partial class MissingValueHandler : Microsoft.ML.Runtime.EntryPoin public MissingValueHandler() { } - + public MissingValueHandler(params string[] inputColumns) { if (inputColumns != null) @@ -10298,7 +10299,7 @@ public MissingValueHandler(params string[] inputColumns) } } } - + public MissingValueHandler(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10309,7 +10310,7 @@ public MissingValueHandler(params ValueTuple[] inputOutputColumn } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10416,7 +10417,7 @@ public sealed partial class MissingValueIndicator : Microsoft.ML.Runtime.EntryPo public MissingValueIndicator() { } - + public MissingValueIndicator(params string[] inputColumns) { if (inputColumns != null) @@ -10427,7 +10428,7 @@ public MissingValueIndicator(params string[] inputColumns) } } } - + public MissingValueIndicator(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10438,7 +10439,7 @@ public MissingValueIndicator(params ValueTuple[] inputOutputColu } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10530,7 +10531,7 @@ public sealed partial class MissingValuesDropper : Microsoft.ML.Runtime.EntryPoi public MissingValuesDropper() { } - + public MissingValuesDropper(params string[] inputColumns) { if (inputColumns != null) @@ -10541,7 +10542,7 @@ public MissingValuesDropper(params string[] inputColumns) } } } - + public MissingValuesDropper(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10552,7 +10553,7 @@ public MissingValuesDropper(params ValueTuple[] inputOutputColum } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10739,7 +10740,7 @@ public sealed partial class MissingValueSubstitutor : Microsoft.ML.Runtime.Entry public MissingValueSubstitutor() { } - + public MissingValueSubstitutor(params string[] inputColumns) { if (inputColumns != null) @@ -10750,7 +10751,7 @@ public MissingValueSubstitutor(params string[] inputColumns) } } } - + public MissingValueSubstitutor(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10761,7 +10762,7 @@ public MissingValueSubstitutor(params ValueTuple[] inputOutputCo } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -10922,7 +10923,7 @@ public sealed partial class NGramTranslator : Microsoft.ML.Runtime.EntryPoints.C public NGramTranslator() { } - + public NGramTranslator(params string[] inputColumns) { if (inputColumns != null) @@ -10933,7 +10934,7 @@ public NGramTranslator(params string[] inputColumns) } } } - + public NGramTranslator(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -10944,7 +10945,7 @@ public NGramTranslator(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -11810,7 +11811,7 @@ public sealed partial class SupervisedBinNormalizer : Microsoft.ML.Runtime.Entry public SupervisedBinNormalizer() { } - + public SupervisedBinNormalizer(params string[] inputColumns) { if (inputColumns != null) @@ -11821,7 +11822,7 @@ public SupervisedBinNormalizer(params string[] inputColumns) } } } - + public SupervisedBinNormalizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -11832,7 +11833,7 @@ public SupervisedBinNormalizer(params ValueTuple[] inputOutputCo } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -11994,12 +11995,12 @@ public sealed partial class TextFeaturizer : Microsoft.ML.Runtime.EntryPoints.Co public TextFeaturizer() { } - + public TextFeaturizer(string outputColumn, params string[] inputColumns) { AddColumn(outputColumn, inputColumns); } - + public void AddColumn(string name, params string[] source) { Column = ManyToOneColumn.Create(name, source); @@ -12126,7 +12127,7 @@ public sealed partial class TextToKeyConverter : Microsoft.ML.Runtime.EntryPoint public TextToKeyConverter() { } - + public TextToKeyConverter(params string[] inputColumns) { if (inputColumns != null) @@ -12137,7 +12138,7 @@ public TextToKeyConverter(params string[] inputColumns) } } } - + public TextToKeyConverter(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -12148,7 +12149,7 @@ public TextToKeyConverter(params ValueTuple[] inputOutputColumns } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -12409,7 +12410,7 @@ public sealed partial class WordTokenizer : Microsoft.ML.Runtime.EntryPoints.Com public WordTokenizer() { } - + public WordTokenizer(params string[] inputColumns) { if (inputColumns != null) @@ -12420,7 +12421,7 @@ public WordTokenizer(params string[] inputColumns) } } } - + public WordTokenizer(params ValueTuple[] inputOutputColumns) { if (inputOutputColumns != null) @@ -12431,7 +12432,7 @@ public WordTokenizer(params ValueTuple[] inputOutputColumns) } } } - + public void AddColumn(string source) { var list = Column == null ? new List() : new List(Column); @@ -12504,7 +12505,7 @@ public WordTokenizerPipelineStep(Output output) namespace Runtime { - public abstract class CalibratorTrainer : ComponentKind { } + public abstract class CalibratorTrainer : ComponentKind {} @@ -12556,7 +12557,7 @@ public sealed class PlattCalibratorCalibratorTrainer : CalibratorTrainer internal override string ComponentName => "PlattCalibrator"; } - public abstract class ClassificationLossFunction : ComponentKind { } + public abstract class ClassificationLossFunction : ComponentKind {} @@ -12613,7 +12614,7 @@ public sealed class SmoothedHingeLossClassificationLossFunction : Classification internal override string ComponentName => "SmoothedHingeLoss"; } - public abstract class EarlyStoppingCriterion : ComponentKind { } + public abstract class EarlyStoppingCriterion : ComponentKind {} @@ -12707,7 +12708,7 @@ public sealed class UPEarlyStoppingCriterion : EarlyStoppingCriterion internal override string ComponentName => "UP"; } - public abstract class FastTreeTrainer : ComponentKind { } + public abstract class FastTreeTrainer : ComponentKind {} @@ -12780,19 +12781,19 @@ public sealed class FastTreeBinaryClassificationFastTreeTrainer : FastTreeTraine /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] public double DropoutRate { get; set; } /// @@ -12949,19 +12950,19 @@ public sealed class FastTreeBinaryClassificationFastTreeTrainer : FastTreeTraine /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -13168,19 +13169,19 @@ public sealed class FastTreeRankingFastTreeTrainer : FastTreeTrainer /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] public double DropoutRate { get; set; } /// @@ -13337,19 +13338,19 @@ public sealed class FastTreeRankingFastTreeTrainer : FastTreeTrainer /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -13516,19 +13517,19 @@ public sealed class FastTreeRegressionFastTreeTrainer : FastTreeTrainer /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] public double DropoutRate { get; set; } /// @@ -13685,19 +13686,19 @@ public sealed class FastTreeRegressionFastTreeTrainer : FastTreeTrainer /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -13869,19 +13870,19 @@ public sealed class FastTreeTweedieRegressionFastTreeTrainer : FastTreeTrainer /// /// The learning rate /// - [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("LearningRates", 0.025f, 0.4f, isLogScale:true)] public double LearningRates { get; set; } = 0.2d; /// /// Shrinkage /// - [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale: true)] + [TlcModule.SweepableFloatParamAttribute("Shrinkage", 0.025f, 4f, isLogScale:true)] public double Shrinkage { get; set; } = 1d; /// /// Dropout rate for tree regularization /// - [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[] { 0f, 1E-09f, 0.05f, 0.1f, 0.2f })] + [TlcModule.SweepableDiscreteParamAttribute("DropoutRate", new object[]{0f, 1E-09f, 0.05f, 0.1f, 0.2f})] public double DropoutRate { get; set; } /// @@ -14038,19 +14039,19 @@ public sealed class FastTreeTweedieRegressionFastTreeTrainer : FastTreeTrainer /// /// The max number of leaves in each regression tree /// - [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize: 4, isLogScale: true)] + [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, stepSize:4, isLogScale:true)] public int NumLeaves { get; set; } = 20; /// /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data /// - [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })] + [TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[]{1, 10, 50})] public int MinDocumentsInLeafs { get; set; } = 10; /// /// Number of weak hypotheses in the ensemble /// - [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[] { 20, 100, 500 })] + [TlcModule.SweepableDiscreteParamAttribute("NumTrees", new object[]{20, 100, 500})] public int NumTrees { get; set; } = 100; /// @@ -14151,7 +14152,7 @@ public sealed class FastTreeTweedieRegressionFastTreeTrainer : FastTreeTrainer internal override string ComponentName => "FastTreeTweedieRegression"; } - public abstract class NgramExtractor : ComponentKind { } + public abstract class NgramExtractor : ComponentKind {} @@ -14233,7 +14234,7 @@ public sealed class NGramHashNgramExtractor : NgramExtractor internal override string ComponentName => "NGramHash"; } - public abstract class ParallelTraining : ComponentKind { } + public abstract class ParallelTraining : ComponentKind {} @@ -14245,7 +14246,7 @@ public sealed class SingleParallelTraining : ParallelTraining internal override string ComponentName => "Single"; } - public abstract class RegressionLossFunction : ComponentKind { } + public abstract class RegressionLossFunction : ComponentKind {} @@ -14282,7 +14283,7 @@ public sealed class TweedieLossRegressionLossFunction : RegressionLossFunction internal override string ComponentName => "TweedieLoss"; } - public abstract class SDCAClassificationLossFunction : ComponentKind { } + public abstract class SDCAClassificationLossFunction : ComponentKind {} @@ -14324,7 +14325,7 @@ public sealed class SmoothedHingeLossSDCAClassificationLossFunction : SDCAClassi internal override string ComponentName => "SmoothedHingeLoss"; } - public abstract class SDCARegressionLossFunction : ComponentKind { } + public abstract class SDCARegressionLossFunction : ComponentKind {} @@ -14336,7 +14337,7 @@ public sealed class SquaredLossSDCARegressionLossFunction : SDCARegressionLossFu internal override string ComponentName => "SquaredLoss"; } - public abstract class StopWordsRemover : ComponentKind { } + public abstract class StopWordsRemover : ComponentKind {} diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index f9ed1db012..81bce67d2b 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -4,6 +4,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; using System; using System.Collections.Generic; using System.Linq; @@ -21,9 +22,7 @@ public sealed class TextLoader : TextLoader /// /// Data file path /// Does the file contains header? - /// How the columns are seperated? - /// Options: separator="tab", separator="space", separator="comma" or separator=[single character]. - /// By default separator=null means "tab" + /// Column delimter. Default is '\t' or tab. /// Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators @@ -34,7 +33,7 @@ public sealed class TextLoader : TextLoader /// except for 3rd and 5th columns which have values 6 and 3 /// Remove trailing whitespace from lines public TextLoader(string inputFilePath, bool useHeader = false, - string separator = null, bool allowQuotedStrings = true, + char delimeter = '\t', bool allowQuotedStrings = true, bool supportSparse = true, bool trimWhitespace = false) : base(inputFilePath) { var fields = typeof(TInput).GetFields(); @@ -54,19 +53,17 @@ public TextLoader(string inputFilePath, bool useHeader = false, if (name.Any(c => !Char.IsLetterOrDigit(c))) throw Contracts.Except($"{name} is not alphanumeric."); - if(separator != null) - { - if(separator != "space" && separator != "tab" && separator != "comma" && separator.Length > 1) - throw Contracts.Except($"{nameof(separator)} can only be one of the following: space, tab, comma" + - $" or a single character."); - } - + DataKind dk; + (field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType).TryGetDataKind(out dk); var col = Runtime.Data.TextLoader.Column.Parse( $"{name}:" + - $"{TypeToName(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType)}:" + + $"{dk.ToString()}:" + $"{mappingAttr.Ordinal}" ); - + + if(col == null) + throw Contracts.Except($"Could not generate column for {name}"); + TextLoaderColumn tlc = new TextLoaderColumn(); if (col.KeyRange != null) { @@ -95,22 +92,10 @@ public TextLoader(string inputFilePath, bool useHeader = false, } Arguments.HasHeader = useHeader; - Arguments.Separator = separator; + Arguments.Delimiter = delimeter; Arguments.AllowQuoting = allowQuotedStrings; Arguments.AllowSparse = supportSparse; Arguments.TrimWhitespace = trimWhitespace; } - - private string TypeToName(Type type) - { - if (type == typeof(string)) - return "TX"; - else if (type == typeof(float) || type == typeof(double)) - return "R4"; - else if (type == typeof(bool)) - return "BL"; - else - throw new Exception("Type not implemented or supported."); //Add more types. - } } } diff --git a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs index 77a54b2abb..a30342bb84 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs @@ -43,6 +43,8 @@ public sealed class Output public IDataView Data; } +#pragma warning disable 0618 + [Obsolete("Use TextLoader instead.", false)] [TlcModule.EntryPoint(Name = "Data.CustomTextLoader", Desc = "Import a dataset from a text file")] public static Output ImportText(IHostEnvironment env, Input input) { diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index 3b1a746391..c6c16df22e 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -894,10 +894,11 @@ private void GenerateInput(IndentingTextWriter writer, var classAndMethod = GeneratorUtils.GetClassAndMethodNames(entryPointInfo); string classBase = ""; if (entryPointInfo.InputKinds != null) + { classBase += $" : {string.Join(", ", entryPointInfo.InputKinds.Select(GeneratorUtils.GetCSharpTypeName))}"; - - if (classBase.Contains("ITransformInput") || classBase.Contains("ITrainerInput")) - classBase += ", Microsoft.ML.ILearningPipelineItem"; + if (entryPointInfo.InputKinds.Any(t => typeof(ITrainerInput).IsAssignableFrom(t) || typeof(ITransformInput).IsAssignableFrom(t))) + classBase += ", Microsoft.ML.ILearningPipelineItem"; + } GenerateEnums(writer, entryPointInfo.InputType, classAndMethod.Item1); writer.WriteLine(); @@ -906,13 +907,16 @@ private void GenerateInput(IndentingTextWriter writer, foreach (var line in entryPointInfo.Description.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries)) writer.WriteLine($"/// {line}"); writer.WriteLine("/// "); + + if(entryPointInfo.ObsoleteAttribute != null) + writer.WriteLine($"[Obsolete(\"{entryPointInfo.ObsoleteAttribute.Message}\")]"); string seal = entryPointInfo.NoSeal ? "" : "sealed "; writer.WriteLine($"public {seal}partial class {classAndMethod.Item2}{classBase}"); writer.WriteLine("{"); writer.Indent(); writer.WriteLine(); - if (classBase.Contains("ILearningPipelineLoader")) + if (entryPointInfo.InputKinds != null && entryPointInfo.InputKinds.Any(t => typeof(ILearningPipelineLoader).IsAssignableFrom(t))) GenerateLoaderAddInputMethod(writer, classAndMethod.Item2); GenerateColumnAddMethods(writer, entryPointInfo.InputType, catalog, classAndMethod.Item2, out Type transformType); diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index 8bfa4e4e78..37ae5ee1e5 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -36,7 +36,7 @@ public void TestSimpleExperiment() { var experiment = env.CreateExperiment(); - var importInput = new ML.Data.CustomTextLoader(); + var importInput = new ML.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var normalizeInput = new ML.Transforms.MinMaxNormalizer @@ -67,7 +67,7 @@ public void TestSimpleTrainExperiment() { var experiment = env.CreateExperiment(); - var importInput = new ML.Data.CustomTextLoader(); + var importInput = new ML.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var catInput = new ML.Transforms.CategoricalOneHotVectorizer @@ -165,7 +165,7 @@ public void TestTrainTestMacro() var experiment = env.CreateExperiment(); - var importInput = new ML.Data.CustomTextLoader(); + var importInput = new ML.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var trainTestInput = new ML.Models.TrainTestBinaryEvaluator @@ -235,7 +235,7 @@ public void TestCrossValidationBinaryMacro() var experiment = env.CreateExperiment(); - var importInput = new ML.Data.CustomTextLoader(); + var importInput = new ML.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var crossValidateBinary = new ML.Models.BinaryCrossValidator @@ -295,7 +295,7 @@ public void TestCrossValidationMacro() var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); - var importInput = new ML.Data.CustomTextLoader(); + var importInput = new ML.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var crossValidate = new ML.Models.CrossValidator diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 4a4032bf03..f68535c94c 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -33,7 +33,35 @@ public void EntryPointTrainTestSplit() { var dataPath = GetDataPath("breast-cancer.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=Features:TX:1-9" }).Data; + /*var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input + { InputFile = inputFile, CustomSchema = "col=Label:0 col=Features:TX:1-9" }).Data;*/ + + var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput() + { + Arguments = + { + Delimiter = ',', + HasHeader = true, + Column = new[] + { + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} }, + Type = Runtime.Data.DataKind.Text + }, + + new TextLoader.Column() + { + Name = "Features", + Source = new [] { new TextLoader.Range() { Min = 1, Max = 9} }, + Type = Runtime.Data.DataKind.Text + } + } + }, + + InputFile = inputFile + }).Data; var splitOutput = TrainTestSplit.Split(Env, new TrainTestSplit.Input { Data = dataView, Fraction = 0.9f }); @@ -62,7 +90,44 @@ public void EntryPointFeatureCombiner() { var dataPath = GetDataPath("breast-cancer.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data; + var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput() + { + Arguments = + { + HasHeader = true, + Column = new[] + { + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} } + }, + + new TextLoader.Column() + { + Name = "F1", + Source = new [] { new TextLoader.Range() { Min = 1, Max = 1} }, + Type = Runtime.Data.DataKind.Text + }, + + new TextLoader.Column() + { + Name = "F2", + Source = new [] { new TextLoader.Range() { Min = 2, Max = 2} }, + Type = Runtime.Data.DataKind.I4 + }, + + new TextLoader.Column() + { + Name = "Rest", + Source = new [] { new TextLoader.Range() { Min = 3, Max = 9} } + } + } + }, + + InputFile = inputFile + }).Data; + dataView = Env.CreateTransform("Term{col=F1}", dataView); var result = FeatureCombiner.PrepareFeatures(Env, new FeatureCombiner.FeatureCombinerInput() { Data = dataView, Features = new[] { "F1", "F2", "Rest" } }).OutputData; var expected = Env.CreateTransform("Convert{col=F2 type=R4}", dataView); @@ -82,7 +147,44 @@ public void EntryPointScoring() { var dataPath = GetDataPath("breast-cancer.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data; + var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput() + { + Arguments = + { + HasHeader = true, + Column = new[] + { + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} } + }, + + new TextLoader.Column() + { + Name = "F1", + Source = new [] { new TextLoader.Range() { Min = 1, Max = 1} }, + Type = Runtime.Data.DataKind.Text + }, + + new TextLoader.Column() + { + Name = "F2", + Source = new [] { new TextLoader.Range() { Min = 2, Max = 2} }, + Type = Runtime.Data.DataKind.I4 + }, + + new TextLoader.Column() + { + Name = "Rest", + Source = new [] { new TextLoader.Range() { Min = 3, Max = 9} } + } + } + }, + + InputFile = inputFile + }).Data; + dataView = Env.CreateTransform("Term{col=F1}", dataView); var trainData = FeatureCombiner.PrepareFeatures(Env, new FeatureCombiner.FeatureCombinerInput() { Data = dataView, Features = new[] { "F1", "F2", "Rest" } }); @@ -105,7 +207,44 @@ public void EntryPointApplyModel() { var dataPath = GetDataPath("breast-cancer.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data; + var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput() + { + Arguments = + { + HasHeader = true, + Column = new[] + { + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} }, + }, + + new TextLoader.Column() + { + Name = "F1", + Source = new [] { new TextLoader.Range() { Min = 1, Max = 1} }, + Type = Runtime.Data.DataKind.Text + }, + + new TextLoader.Column() + { + Name = "F2", + Source = new [] { new TextLoader.Range() { Min = 2, Max = 2} }, + Type = Runtime.Data.DataKind.I4 + }, + + new TextLoader.Column() + { + Name = "Rest", + Source = new [] { new TextLoader.Range() { Min = 3, Max = 9} } + } + } + }, + + InputFile = inputFile + }).Data; + dataView = Env.CreateTransform("Term{col=F1}", dataView); var data1 = FeatureCombiner.PrepareFeatures(Env, new FeatureCombiner.FeatureCombinerInput() { Data = dataView, Features = new[] { "F1", "F2", "Rest" } }); @@ -120,7 +259,49 @@ public void EntryPointCaching() { var dataPath = GetDataPath("breast-cancer.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); - var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data; + /*var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile, + CustomSchema = "col=Label:0 col=F1:TX:1 col=F2:I4:2 col=Rest:3-9" }).Data; + */ + + var dataView = ImportTextData.TextLoader(Env, new ImportTextData.LoaderInput() + { + Arguments = + { + Delimiter = ',', + HasHeader = true, + Column = new[] + { + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min = 0, Max = 0} } + }, + + new TextLoader.Column() + { + Name = "F1", + Source = new [] { new TextLoader.Range() { Min = 1, Max = 1} }, + Type = Runtime.Data.DataKind.Text + }, + + new TextLoader.Column() + { + Name = "F2", + Source = new [] { new TextLoader.Range() { Min = 2, Max = 2} }, + Type = Runtime.Data.DataKind.I4 + }, + + new TextLoader.Column() + { + Name = "Rest", + Source = new [] { new TextLoader.Range() { Min = 3, Max = 9} } + } + } + }, + + InputFile = inputFile + }).Data; + dataView = Env.CreateTransform("Term{col=F1}", dataView); var cached1 = Cache.CacheData(Env, new Cache.CacheInput() { Data = dataView, Caching = Cache.CachingType.Memory }); diff --git a/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj b/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj index 30b51e8afb..a6f145214d 100644 --- a/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj +++ b/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj @@ -5,6 +5,7 @@ true AnyCPU + 1701;1702;1705;0618 diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs index 0e8a4000ac..54a9c4a4d7 100644 --- a/test/Microsoft.ML.TestFramework/ModelHelper.cs +++ b/test/Microsoft.ML.TestFramework/ModelHelper.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; @@ -40,24 +41,187 @@ public static void WriteKcHousePriceModel(string dataPath, Stream stream) public static IDataView GetKcHouseDataView(string dataPath) { - var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,"; - var txtArgs = new TextLoader.Arguments(); + var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 " + + "col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 " + + "col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 " + + "col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 " + + "col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 " + + "col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,"; + + var txtArgs = new Runtime.Data.TextLoader.Arguments(); bool parsed = CmdParser.ParseArguments(s_environment, dataSchema, txtArgs); s_environment.Assert(parsed); - var txtLoader = new TextLoader(s_environment, txtArgs, new MultiFileSource(dataPath)); + var txtLoader = new Runtime.Data.TextLoader(s_environment, txtArgs, new MultiFileSource(dataPath)); return txtLoader; } private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) { - var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,"; - Experiment experiment = s_environment.CreateExperiment(); - var importData = new Data.CustomTextLoader(); - importData.CustomSchema = dataSchema; - Data.CustomTextLoader.Output imported = experiment.Add(importData); + var importData = new Data.TextLoader(dataPath) + { + Arguments = new TextLoaderArguments + { + Delimiter = ',', + HasHeader = true, + Column = new[] + { + new TextLoaderColumn() + { + Name = "Id", + Source = new [] { new TextLoaderRange() { Min = 0, Max = 0} }, + Type = Runtime.Data.DataKind.Text + }, + + new TextLoaderColumn() + { + Name = "Date", + Source = new [] { new TextLoaderRange() { Min = 1, Max = 1} }, + Type = Runtime.Data.DataKind.Text + }, + + new TextLoaderColumn() + { + Name = "Label", + Source = new [] { new TextLoaderRange() { Min = 2, Max = 2} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Bedrooms", + Source = new [] { new TextLoaderRange() { Min = 3, Max = 3} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Bathrooms", + Source = new [] { new TextLoaderRange() { Min = 4, Max = 4} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SqftLiving", + Source = new [] { new TextLoaderRange() { Min = 5, Max = 5} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SqftLot", + Source = new [] { new TextLoaderRange() { Min = 6, Max = 6} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Floors", + Source = new [] { new TextLoaderRange() { Min = 7, Max = 7} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Waterfront", + Source = new [] { new TextLoaderRange() { Min = 8, Max = 8} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "View", + Source = new [] { new TextLoaderRange() { Min = 9, Max = 9} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Condition", + Source = new [] { new TextLoaderRange() { Min = 10, Max = 10} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Grade", + Source = new [] { new TextLoaderRange() { Min = 11, Max = 11} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SqftAbove", + Source = new [] { new TextLoaderRange() { Min = 12, Max = 12} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SqftBasement", + Source = new [] { new TextLoaderRange() { Min = 13, Max = 13} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "YearBuilt", + Source = new [] { new TextLoaderRange() { Min = 14, Max = 14} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "YearRenovated", + Source = new [] { new TextLoaderRange() { Min = 15, Max = 15} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Zipcode", + Source = new [] { new TextLoaderRange() { Min = 16, Max = 16} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Lat", + Source = new [] { new TextLoaderRange() { Min = 17, Max = 17} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Long", + Source = new [] { new TextLoaderRange() { Min = 18, Max = 18} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SqftLiving15", + Source = new [] { new TextLoaderRange() { Min = 19, Max = 19} }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SqftLot15", + Source = new [] { new TextLoaderRange() { Min = 20, Max = 20} }, + Type = Runtime.Data.DataKind.Num + }, + } + } + + //new Data.CustomTextLoader(); + // importData.CustomSchema = dataSchema; + // + }; + Data.TextLoader.Output imported = experiment.Add(importData); var numericalConcatenate = new Transforms.ColumnConcatenator(); numericalConcatenate.Data = imported.Data; numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15"); diff --git a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs index 0b2887c618..bcc2f6a172 100644 --- a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs @@ -22,7 +22,7 @@ public void TrainAndPredictHousePriceModelTest() var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath, useHeader: true, separator: ",")); + pipeline.Add(new TextLoader(dataPath, useHeader: true, delimeter: ',')); pipeline.Add(new ColumnConcatenator(outputColumn: "NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15")); @@ -62,7 +62,7 @@ public void TrainAndPredictHousePriceModelTest() Assert.InRange(prediction.Price, 260_000, 330_000); string testDataPath = GetDataPath("kc_house_test.csv"); - var testData = new TextLoader(testDataPath, useHeader: true, separator: ","); + var testData = new TextLoader(testDataPath, useHeader: true, delimeter: ','); var evaluator = new RegressionEvaluator(); RegressionMetrics metrics = evaluator.Evaluate(model, testData); diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index cc58e8aa64..2aba0a375d 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -20,7 +20,7 @@ public void TrainAndPredictIrisModelTest() var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath, useHeader: false, separator: "tab")); + pipeline.Add(new TextLoader(dataPath, useHeader: false)); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); @@ -67,7 +67,7 @@ public void TrainAndPredictIrisModelTest() // Note: Testing against the same data set as a simple way to test evaluation. // This isn't appropriate in real-world scenarios. string testDataPath = GetDataPath("iris.txt"); - var testData = new TextLoader(testDataPath, useHeader: false, separator: "tab"); + var testData = new TextLoader(testDataPath, useHeader: false); var evaluator = new ClassificationEvaluator(); evaluator.OutputTopKAcc = 3; diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index e5ec5cea5d..38cc1cbf6c 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -20,7 +20,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath, useHeader: false, separator: ",")); + pipeline.Add(new TextLoader(dataPath, useHeader: false, delimeter: ',')); pipeline.Add(new Dictionarizer("Label")); // "IrisPlantType" is used as "Label" because of column attribute name on the field. @@ -70,7 +70,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() // Note: Testing against the same data set as a simple way to test evaluation. // This isn't appropriate in real-world scenarios. string testDataPath = GetDataPath("iris.data"); - var testData = new TextLoader(testDataPath, useHeader: false, separator: ","); + var testData = new TextLoader(testDataPath, useHeader: false, delimeter: ','); var evaluator = new ClassificationEvaluator(); evaluator.OutputTopKAcc = 3; diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs index a1fb28de71..0b44fab88f 100644 --- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs @@ -29,7 +29,7 @@ public void TrainAndPredictSentimentModelTest() { Arguments = new TextLoaderArguments { - Separator = "tab", + Delimiter = '\t', HasHeader = true, Column = new[] { @@ -90,7 +90,7 @@ public void TrainAndPredictSentimentModelTest() { Arguments = new TextLoaderArguments { - Separator = "tab", + Delimiter = '\t', HasHeader = true, Column = new[] { diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index 625e4d9a5d..c0278ed3f8 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -27,11 +27,11 @@ public void ConstructorDoesntThrow() { Assert.NotNull(new TextLoader("fakeFile.txt")); Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: true)); - Assert.NotNull(new TextLoader("fakeFile.txt", separator: "tab")); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, separator: "tab")); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, separator: "tab", false, false)); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, separator: "tab", supportSparse: false)); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, separator: "tab", allowQuotedStrings: false)); + Assert.NotNull(new TextLoader("fakeFile.txt", delimeter: ',')); + Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, delimeter: ',')); + Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, delimeter: ',', false, false)); + Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, delimeter: ',', supportSparse: false)); + Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, delimeter: ',', allowQuotedStrings: false)); } [Fact] @@ -54,7 +54,7 @@ public void CanSuccessfullyApplyATransform() public void CanSuccessfullyRetrieveQuotedData() { string dataPath = GetDataPath("QuotingData.csv"); - var loader = new TextLoader(dataPath, useHeader: true, separator: ",", allowQuotedStrings: true, supportSparse: false); + var loader = new TextLoader(dataPath, useHeader: true, delimeter: ',', allowQuotedStrings: true, supportSparse: false); using (var environment = new TlcEnvironment()) { @@ -112,7 +112,7 @@ public void CanSuccessfullyRetrieveQuotedData() public void CanSuccessfullyRetrieveSparseData() { string dataPath = GetDataPath("SparseData.txt"); - var loader = new TextLoader(dataPath, useHeader: true, separator: "tab", allowQuotedStrings: false, supportSparse: true); + var loader = new TextLoader(dataPath, useHeader: true, allowQuotedStrings: false, supportSparse: true); using (var environment = new TlcEnvironment()) { @@ -177,7 +177,7 @@ public void CanSuccessfullyRetrieveSparseData() public void CanSuccessfullyTrimSpaces() { string dataPath = GetDataPath("TrimData.csv"); - var loader = new TextLoader(dataPath, useHeader: true, separator: ",", allowQuotedStrings: false, supportSparse: false, trimWhitespace: true); + var loader = new TextLoader(dataPath, useHeader: true, delimeter: ',', allowQuotedStrings: false, supportSparse: false, trimWhitespace: true); using (var environment = new TlcEnvironment()) { From 847f3f11fca9c9c86fe2b0722a43996e27368c03 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 14 May 2018 16:49:12 -0700 Subject: [PATCH 07/17] PR feedback. --- src/Microsoft.ML.Core/Data/DataKind.cs | 4 +- src/Microsoft.ML.Core/Utilities/Utils.cs | 50 +++++++++++++++++++ .../DataLoadSave/Text/TextLoader.cs | 18 +++++-- .../AutoInference.cs | 2 + .../Microsoft.ML.PipelineInference.csproj | 2 +- src/Microsoft.ML/CSharpApi.cs | 4 +- src/Microsoft.ML/Data/TextLoader.cs | 5 +- src/Microsoft.ML/LearningPipeline.cs | 1 + .../Runtime/EntryPoints/ImportTextData.cs | 4 +- .../UnitTests/TestEntryPoints.cs | 4 +- .../Microsoft.ML.Predictor.Tests.csproj | 2 +- .../TestAutoInference.cs | 5 +- .../Microsoft.ML.TestFramework/ModelHelper.cs | 2 +- test/Microsoft.ML.Tests/CSharpCodeGen.cs | 3 +- .../Scenarios/SentimentPredictionTests.cs | 4 +- test/Microsoft.ML.Tests/TextLoaderTests.cs | 10 ++-- 16 files changed, 93 insertions(+), 27 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/DataKind.cs b/src/Microsoft.ML.Core/Data/DataKind.cs index 557358eed7..f50cd19339 100644 --- a/src/Microsoft.ML.Core/Data/DataKind.cs +++ b/src/Microsoft.ML.Core/Data/DataKind.cs @@ -205,9 +205,9 @@ public static bool TryGetDataKind(this Type type, out DataKind kind) kind = DataKind.R4; else if (type == typeof(Double)) kind = DataKind.R8; - else if (type == typeof(DvText) || type == typeof(string)) + else if (type == typeof(DvText)) kind = DataKind.TX; - else if (type == typeof(DvBool) || type == typeof(bool)) + else if (type == typeof(DvBool)) kind = DataKind.BL; else if (type == typeof(DvTimeSpan)) kind = DataKind.TS; diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs index 48993de785..13534ba3cf 100644 --- a/src/Microsoft.ML.Core/Utilities/Utils.cs +++ b/src/Microsoft.ML.Core/Utilities/Utils.cs @@ -12,6 +12,7 @@ using System.Text; using System.Text.RegularExpressions; using System.Threading; +using Microsoft.ML.Runtime.Data; namespace Microsoft.ML.Runtime.Internal.Utilities { @@ -1069,5 +1070,54 @@ public static string GetDescription(this Enum value) } return null; } + + /// + /// Try to map a System.Type to a corresponding DataKind value. + /// + public static bool TryGetDataKind(Type type, out DataKind kind) + { + Contracts.CheckValueOrNull(type); + + // REVIEW: Make this more efficient. Should we have a global dictionary? + if (type == typeof(DvInt1)) + kind = DataKind.I1; + else if (type == typeof(byte)) + kind = DataKind.U1; + else if (type == typeof(DvInt2)) + kind = DataKind.I2; + else if (type == typeof(ushort)) + kind = DataKind.U2; + else if (type == typeof(DvInt4)) + kind = DataKind.I4; + else if (type == typeof(uint)) + kind = DataKind.U4; + else if (type == typeof(DvInt8)) + kind = DataKind.I8; + else if (type == typeof(ulong)) + kind = DataKind.U8; + else if (type == typeof(Single)) + kind = DataKind.R4; + else if (type == typeof(Double)) + kind = DataKind.R8; + else if (type == typeof(DvText) || type == typeof(string)) + kind = DataKind.TX; + else if (type == typeof(DvBool) || type == typeof(bool)) + kind = DataKind.BL; + else if (type == typeof(DvTimeSpan)) + kind = DataKind.TS; + else if (type == typeof(DvDateTime)) + kind = DataKind.DT; + else if (type == typeof(DvDateTimeZone)) + kind = DataKind.DZ; + else if (type == typeof(UInt128)) + kind = DataKind.UG; + else + { + kind = default(DataKind); + return false; + } + + return true; + } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index cc27594a0f..4c4e8d83a2 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -297,8 +297,8 @@ public class ArgumentsCore [Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Source column separator. Options: tab, space, comma, single character", ShortName = "sep")] public string Separator = "tab"; - [Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly, HelpText = "Source column separator. Option: single character ONLY", ShortName = "del")] - public char Delimiter = '\t'; + [Argument(ArgumentType.AtMostOnce, Name = nameof(Separator), Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly, HelpText = "Source column separator.", ShortName = "sep")] + public char[] SeparatorChars = new[] { '\t' }; [Argument(ArgumentType.Multiple, HelpText = "Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40", ShortName = "col", SortOrder = 1)] @@ -1008,10 +1008,18 @@ public TextLoader(IHostEnvironment env, Arguments args, IMultiStreamSource files _inputSize = SrcLim - 1; _host.CheckNonEmpty(args.Separator, nameof(args.Separator), "Must specify a separator"); - _host.CheckNonEmpty(args.Delimiter.ToString(), nameof(args.Delimiter), "Must specify a delimeter"); - if (args.Delimiter != '\t') - _separators = new char[] { args.Delimiter }; + //Default arg.Separator is tab and default args.SeparatorChars is also a '\t'. + //At a time only one default can be different and whichever is different that will + //be used. + if (args.SeparatorChars.Length > 1 || args.SeparatorChars[0] != '\t') + { + var separators = new HashSet(); + foreach (char c in args.SeparatorChars) + separators.Add(NormalizeSeparator(c.ToString())); + + _separators = separators.ToArray(); + } else { string sep = args.Separator.ToLowerInvariant(); diff --git a/src/Microsoft.ML.PipelineInference/AutoInference.cs b/src/Microsoft.ML.PipelineInference/AutoInference.cs index 894029460a..7a340e5957 100644 --- a/src/Microsoft.ML.PipelineInference/AutoInference.cs +++ b/src/Microsoft.ML.PipelineInference/AutoInference.cs @@ -579,11 +579,13 @@ public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimiz RecipeInference.InferRecipesFromData(env, trainDataPath, schemaDefinitionFile, out var _, out schemaDefinition, out var _, true); +#pragma warning disable 0618 var data = ImportTextData.ImportText(env, new ImportTextData.Input { InputFile = new SimpleFileHandle(env, trainDataPath, false, false), CustomSchema = schemaDefinition }).Data; +#pragma warning restore 0618 var splitOutput = TrainTestSplit.Split(env, new TrainTestSplit.Input { Data = data, Fraction = 0.8f }); AutoMlMlState amls = new AutoMlMlState(env, metric, autoMlEngine, terminator, trainerKind, splitOutput.TrainData.Take(numOfSampleRows), splitOutput.TestData.Take(numOfSampleRows)); diff --git a/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj b/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj index 0aa71597e0..2fc84d92b0 100644 --- a/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj +++ b/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj @@ -8,7 +8,7 @@ - ;1591;0618 + ;1591 diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index f6f78ee897..e7f75b75ba 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -1371,9 +1371,9 @@ public sealed class TextLoaderArguments public int? InputSize { get; set; } /// - /// Source column separator. Option: single character ONLY + /// Source column separator. /// - public char Delimiter { get; set; } = ' '; + public char[] Separator { get; set; } = { ' ' }; /// /// Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40 diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index 81bce67d2b..192614d6ad 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -5,6 +5,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Internal.Utilities; using System; using System.Collections.Generic; using System.Linq; @@ -54,7 +55,7 @@ public TextLoader(string inputFilePath, bool useHeader = false, throw Contracts.Except($"{name} is not alphanumeric."); DataKind dk; - (field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType).TryGetDataKind(out dk); + Utils.TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk); var col = Runtime.Data.TextLoader.Column.Parse( $"{name}:" + $"{dk.ToString()}:" + @@ -92,7 +93,7 @@ public TextLoader(string inputFilePath, bool useHeader = false, } Arguments.HasHeader = useHeader; - Arguments.Delimiter = delimeter; + Arguments.Separator = new[] { delimeter }; Arguments.AllowQuoting = allowQuotedStrings; Arguments.AllowSparse = supportSparse; Arguments.TrimWhitespace = trimWhitespace; diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs index ef3ab20c13..32024233f5 100644 --- a/src/Microsoft.ML/LearningPipeline.cs +++ b/src/Microsoft.ML/LearningPipeline.cs @@ -183,6 +183,7 @@ public PredictionModel Train() if (transformModels.Count > 0) { transformModels.Insert(0, lastTransformModel); + var modelInput = new Transforms.ModelCombiner { Models = new ArrayVar(transformModels.ToArray()) diff --git a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs index a30342bb84..246cac7cda 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs @@ -33,7 +33,7 @@ public sealed class LoaderInput [Argument(ArgumentType.Required, ShortName = "data", HelpText = "Location of the input file", SortOrder = 1)] public IFileHandle InputFile; - [Argument(ArgumentType.Required, ShortName = "args", HelpText = "Arguments", SortOrder = 1)] + [Argument(ArgumentType.Required, ShortName = "args", HelpText = "Arguments", SortOrder = 2)] public TextLoader.Arguments Arguments = new TextLoader.Arguments(); } @@ -44,7 +44,7 @@ public sealed class Output } #pragma warning disable 0618 - [Obsolete("Use TextLoader instead.", false)] + [Obsolete("Use TextLoader instead.")] [TlcModule.EntryPoint(Name = "Data.CustomTextLoader", Desc = "Import a dataset from a text file")] public static Output ImportText(IHostEnvironment env, Input input) { diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index f68535c94c..949945a644 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -40,7 +40,7 @@ public void EntryPointTrainTestSplit() { Arguments = { - Delimiter = ',', + SeparatorChars = new []{',' }, HasHeader = true, Column = new[] { @@ -267,7 +267,7 @@ public void EntryPointCaching() { Arguments = { - Delimiter = ',', + SeparatorChars = new []{',' }, HasHeader = true, Column = new[] { diff --git a/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj b/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj index a6f145214d..da40ffe344 100644 --- a/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj +++ b/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj @@ -5,7 +5,7 @@ true AnyCPU - 1701;1702;1705;0618 + 1701;1702;1705 diff --git a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs index a169816a79..9ffd7713e4 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs @@ -49,12 +49,13 @@ public void TestLearn() // Use best pipeline for another task var inputFileTrain = new SimpleFileHandle(env, pathData, false, false); +#pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data; var inputFileTest = new SimpleFileHandle(env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data; - +#pragma warning restore 0618 // REVIEW: Theoretically, it could be the case that a new, very bad learner is introduced and // we get unlucky and only select it every time, such that this test fails. Not // likely at all, but a non-zero probability. Should be ok, since all current learners are returning d > .80. @@ -77,11 +78,13 @@ public void EntryPointPipelineSweepSerialization() "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); +#pragma warning restore 0618 // Define entrypoint graph string inputGraph = @" diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs index 54a9c4a4d7..c4ac61ab8d 100644 --- a/test/Microsoft.ML.TestFramework/ModelHelper.cs +++ b/test/Microsoft.ML.TestFramework/ModelHelper.cs @@ -63,7 +63,7 @@ private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) { Arguments = new TextLoaderArguments { - Delimiter = ',', + Separator = new[] { ',' }, HasHeader = true, Column = new[] { diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs index c647110702..316d7eab55 100644 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ b/test/Microsoft.ML.Tests/CSharpCodeGen.cs @@ -15,7 +15,8 @@ public CSharpCodeGen(ITestOutputHelper output) : base(output) { } - [Fact(Skip = "Temporary solution(Windows ONLY) to regenerate codegenerated CSharpAPI.cs")] + //[Fact(Skip = "Temporary solution(Windows ONLY) to regenerate codegenerated CSharpAPI.cs")] + [Fact] public void GenerateCSharpAPI() { var cSharpAPIPath = Path.Combine(RootDir, @"src\\Microsoft.ML\\CSharpApi.cs"); diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs index 0b44fab88f..422b621fa4 100644 --- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs @@ -29,7 +29,7 @@ public void TrainAndPredictSentimentModelTest() { Arguments = new TextLoaderArguments { - Delimiter = '\t', + Separator = new[] { '\t' }, HasHeader = true, Column = new[] { @@ -90,7 +90,7 @@ public void TrainAndPredictSentimentModelTest() { Arguments = new TextLoaderArguments { - Delimiter = '\t', + Separator = new[] { '\t' }, HasHeader = true, Column = new[] { diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index c0278ed3f8..8602dd2a6e 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -27,11 +27,11 @@ public void ConstructorDoesntThrow() { Assert.NotNull(new TextLoader("fakeFile.txt")); Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: true)); - Assert.NotNull(new TextLoader("fakeFile.txt", delimeter: ',')); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, delimeter: ',')); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, delimeter: ',', false, false)); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, delimeter: ',', supportSparse: false)); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, delimeter: ',', allowQuotedStrings: false)); + Assert.NotNull(new TextLoader("fakeFile.txt")); + Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false)); + Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, supportSparse: false, trimWhitespace: false)); + Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, supportSparse: false)); + Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, allowQuotedStrings: false)); } [Fact] From 5f153fb2e051fb1607fa2d917361c2c498aba846 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 15 May 2018 15:28:15 -0700 Subject: [PATCH 08/17] PR feedback. --- .../Common/EntryPoints/core_ep-list.tsv | 3 +- .../Common/EntryPoints/core_manifest.json | 368 +++++++++++++++++- .../EntryPoints/ModuleArgs.cs | 6 - .../EntryPoints/ModuleCatalog.cs | 2 - .../DataLoadSave/Text/TextLoader.cs | 10 +- src/Microsoft.ML/CSharpApi.cs | 256 ++++++++---- src/Microsoft.ML/Data/TextLoader.cs | 60 ++- src/Microsoft.ML/LearningPipeline.cs | 2 +- .../Runtime/EntryPoints/ImportTextData.cs | 3 +- .../Internal/Tools/CSharpApiGenerator.cs | 7 +- ...sticDualCoordinateAscentClassifierBench.cs | 5 +- .../TestAutoInference.cs | 3 +- .../Microsoft.ML.TestFramework/ModelHelper.cs | 42 +- test/Microsoft.ML.Tests/CSharpCodeGen.cs | 3 +- .../LearningPipelineTests.cs | 3 +- .../HousePriceTrainAndPredictionTests.cs | 4 +- .../Scenarios/IrisPlantClassificationTests.cs | 4 +- ...PlantClassificationWithStringLabelTests.cs | 4 +- .../Scenarios/SentimentPredictionTests.cs | 19 +- test/Microsoft.ML.Tests/TextLoaderTests.cs | 24 +- 20 files changed, 652 insertions(+), 176 deletions(-) diff --git a/ZBaselines/Common/EntryPoints/core_ep-list.tsv b/ZBaselines/Common/EntryPoints/core_ep-list.tsv index 47007edaa6..22c2767d7a 100644 --- a/ZBaselines/Common/EntryPoints/core_ep-list.tsv +++ b/ZBaselines/Common/EntryPoints/core_ep-list.tsv @@ -1,7 +1,8 @@ +Data.CustomTextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData ImportText Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output Data.DataViewReference Pass dataview from memory to experiment Microsoft.ML.Runtime.EntryPoints.DataViewReference ImportData Microsoft.ML.Runtime.EntryPoints.DataViewReference+Input Microsoft.ML.Runtime.EntryPoints.DataViewReference+Output Data.IDataViewArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewOutput Data.PredictorModelArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelOutput -Data.TextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData ImportText Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output +Data.TextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData TextLoader Microsoft.ML.Runtime.EntryPoints.ImportTextData+LoaderInput Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output Models.AnomalyDetectionEvaluator Evaluates an anomaly detection scored dataset. Microsoft.ML.Runtime.Data.Evaluate AnomalyDetection Microsoft.ML.Runtime.Data.AnomalyDetectionMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CommonEvaluateOutput Models.BinaryClassificationEvaluator Evaluates a binary classification scored dataset. Microsoft.ML.Runtime.Data.Evaluate Binary Microsoft.ML.Runtime.Data.BinaryClassifierMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+ClassificationEvaluateOutput Models.BinaryCrossValidator Cross validation for binary classification Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro CrossValidateBinary Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+Output] diff --git a/ZBaselines/Common/EntryPoints/core_manifest.json b/ZBaselines/Common/EntryPoints/core_manifest.json index a6309fe36a..6eeb1bf709 100644 --- a/ZBaselines/Common/EntryPoints/core_manifest.json +++ b/ZBaselines/Common/EntryPoints/core_manifest.json @@ -1,5 +1,43 @@ { "EntryPoints": [ + { + "Name": "Data.CustomTextLoader", + "Desc": "Import a dataset from a text file", + "FriendlyName": null, + "ShortName": null, + "Inputs": [ + { + "Name": "InputFile", + "Type": "FileHandle", + "Desc": "Location of the input file", + "Aliases": [ + "data" + ], + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "CustomSchema", + "Type": "String", + "Desc": "Custom schema to use for parsing", + "Aliases": [ + "schema" + ], + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": null + } + ], + "Outputs": [ + { + "Name": "Data", + "Type": "DataView", + "Desc": "The resulting data view" + } + ] + }, { "Name": "Data.DataViewReference", "Desc": "Pass dataview from memory to experiment", @@ -99,16 +137,325 @@ "IsNullable": false }, { - "Name": "CustomSchema", - "Type": "String", - "Desc": "Custom schema to use for parsing", + "Name": "Arguments", + "Type": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Column", + "Type": { + "Kind": "Array", + "ItemType": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Name", + "Type": "String", + "Desc": "Name of the column", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "Type", + "Type": { + "Kind": "Enum", + "Values": [ + "I1", + "U1", + "I2", + "U2", + "I4", + "U4", + "I8", + "U8", + "R4", + "Num", + "R8", + "TX", + "Text", + "TXT", + "BL", + "Bool", + "TimeSpan", + "TS", + "DT", + "DateTime", + "DZ", + "DateTimeZone", + "UG", + "U16" + ] + }, + "Desc": "Type of the items in the column", + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "Source", + "Type": { + "Kind": "Array", + "ItemType": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Min", + "Type": "Int", + "Desc": "First index in the range", + "Required": true, + "SortOrder": 150.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "Max", + "Type": "Int", + "Desc": "Last index in the range", + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "AutoEnd", + "Type": "Bool", + "Desc": "This range extends to the end of the line, but should be a fixed number of items", + "Aliases": [ + "auto" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "VariableEnd", + "Type": "Bool", + "Desc": "This range extends to the end of the line, which can vary from line to line", + "Aliases": [ + "var" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "AllOther", + "Type": "Bool", + "Desc": "This range includes only other indices not specified", + "Aliases": [ + "other" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "ForceVector", + "Type": "Bool", + "Desc": "Force scalar columns to be treated as vectors of length one", + "Aliases": [ + "vector" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + } + ] + } + }, + "Desc": "Source index range(s) of the column", + "Aliases": [ + "src" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "KeyRange", + "Type": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Min", + "Type": "UInt", + "Desc": "First index in the range", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "Max", + "Type": "UInt", + "Desc": "Last index in the range", + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "Contiguous", + "Type": "Bool", + "Desc": "Whether the key is contiguous", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + } + ] + }, + "Desc": "For a key column, this defines the range of values", + "Aliases": [ + "key" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + } + ] + } + }, + "Desc": "Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40", + "Aliases": [ + "col" + ], + "Required": false, + "SortOrder": 1.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "UseThreads", + "Type": "Bool", + "Desc": "Use separate parsing threads?", + "Aliases": [ + "threads" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "HeaderFile", + "Type": "String", + "Desc": "File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.", + "Aliases": [ + "hf" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "MaxRows", + "Type": "Int", + "Desc": "Maximum number of rows to produce", + "Aliases": [ + "rows" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "AllowQuoting", + "Type": "Bool", + "Desc": "Whether the input may include quoted values, which can contain separator characters, colons, and distinguish empty values from missing values. When true, consecutive separators denote a missing value and an empty value is denoted by \"\". When false, consecutive separators denote an empty value.", + "Aliases": [ + "quote" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "AllowSparse", + "Type": "Bool", + "Desc": "Whether the input may include sparse representations", + "Aliases": [ + "sparse" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "InputSize", + "Type": "Int", + "Desc": "Number of source columns in the text data. Default is that sparse rows contain their size information.", + "Aliases": [ + "size" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "Separator", + "Type": { + "Kind": "Array", + "ItemType": "Char" + }, + "Desc": "Source column separator.", + "Aliases": [ + "sep" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": [ + "\t" + ] + }, + { + "Name": "TrimWhitespace", + "Type": "Bool", + "Desc": "Remove trailing whitespace from lines", + "Aliases": [ + "trim" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "HasHeader", + "Type": "Bool", + "Desc": "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.", + "Aliases": [ + "header" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false + } + ] + }, + "Desc": "Arguments", "Aliases": [ - "schema" + "args" ], - "Required": false, + "Required": true, "SortOrder": 2.0, - "IsNullable": false, - "Default": null + "IsNullable": false } ], "Outputs": [ @@ -117,6 +464,9 @@ "Type": "DataView", "Desc": "The resulting data view" } + ], + "InputKind": [ + "ILearningPipelineLoader" ] }, { @@ -21959,6 +22309,10 @@ } ] }, + { + "Kind": "ILearningPipelineLoader", + "Settings": [] + }, { "Kind": "IMulticlassClassificationOutput", "Settings": [] diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs index 9b778557eb..99cfec0dd9 100644 --- a/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs +++ b/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs @@ -527,12 +527,6 @@ public sealed class EntryPointAttribute : Attribute /// Short name of the Entry Point /// public string ShortName { get; set; } - - /// - /// Indicates if the code generated should not be sealed. - /// By default all classes are sealed. - /// - public bool NoSeal { get; set; } } /// diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs index 75c4b59046..586f6a4b02 100644 --- a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs +++ b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs @@ -40,7 +40,6 @@ public sealed class ModuleCatalog /// public sealed class EntryPointInfo { - public readonly bool NoSeal; public readonly string Name; public readonly string Description; public readonly string ShortName; @@ -60,7 +59,6 @@ internal EntryPointInfo(IExceptionContext ectx, MethodInfo method, ectx.AssertValue(attribute); Name = attribute.Name ?? string.Join(".", method.DeclaringType.Name, method.Name); - NoSeal = attribute.NoSeal; Description = attribute.Desc; Method = method; ShortName = attribute.ShortName; diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 4c4e8d83a2..3678c749ba 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -85,15 +85,19 @@ private bool TryParse(string str) return TryParseSource(rgstr[istr++]); } - private bool TryParseSource(string str) + private bool TryParseSource(string str) => TryParseSourceEx(str, out Source); + + public static bool TryParseSourceEx(string str, out Range[] ranges) { + ranges = null; var strs = str.Split(','); if (str.Length == 0) return false; - Source = new Range[strs.Length]; + + ranges = new Range[strs.Length]; for (int i = 0; i < strs.Length; i++) { - if ((Source[i] = Range.Parse(strs[i])) == null) + if ((ranges[i] = Range.Parse(strs[i])) == null) return false; } return true; diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 642bebca75..0394ce112d 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -34,6 +34,18 @@ public void Add(Microsoft.ML.Data.CustomTextLoader input, Microsoft.ML.Data.Cust _jsonNodes.Add(Serialize("Data.CustomTextLoader", input, output)); } + public Microsoft.ML.Data.DataViewReference.Output Add(Microsoft.ML.Data.DataViewReference input) + { + var output = new Microsoft.ML.Data.DataViewReference.Output(); + Add(input, output); + return output; + } + + public void Add(Microsoft.ML.Data.DataViewReference input, Microsoft.ML.Data.DataViewReference.Output output) + { + _jsonNodes.Add(Serialize("Data.DataViewReference", input, output)); + } + public Microsoft.ML.Data.IDataViewArrayConverter.Output Add(Microsoft.ML.Data.IDataViewArrayConverter input) { var output = new Microsoft.ML.Data.IDataViewArrayConverter.Output(); @@ -65,22 +77,11 @@ public Microsoft.ML.Data.TextLoader.Output Add(Microsoft.ML.Data.TextLoader inpu return output; } - public Microsoft.ML.Data.DataViewReference.Output Add(Microsoft.ML.Data.DataViewReference input) - { - var output = new Microsoft.ML.Data.DataViewReference.Output(); - Add(input, output); - return output; - } - public void Add(Microsoft.ML.Data.TextLoader input, Microsoft.ML.Data.TextLoader.Output output) { _jsonNodes.Add(Serialize("Data.TextLoader", input, output)); } - public void Add(Microsoft.ML.Data.DataViewReference input, Microsoft.ML.Data.DataViewReference.Output output) - { - _jsonNodes.Add(Serialize("Data.DataViewReference", input, output)); - } public Microsoft.ML.Models.AnomalyDetectionEvaluator.Output Add(Microsoft.ML.Models.AnomalyDetectionEvaluator input) { var output = new Microsoft.ML.Models.AnomalyDetectionEvaluator.Output(); @@ -465,6 +466,18 @@ public void Add(Microsoft.ML.Trainers.GeneralizedAdditiveModelRegressor input, M _jsonNodes.Add(Serialize("Trainers.GeneralizedAdditiveModelRegressor", input, output)); } + public Microsoft.ML.Trainers.KMeansPlusPlusClusterer.Output Add(Microsoft.ML.Trainers.KMeansPlusPlusClusterer input) + { + var output = new Microsoft.ML.Trainers.KMeansPlusPlusClusterer.Output(); + Add(input, output); + return output; + } + + public void Add(Microsoft.ML.Trainers.KMeansPlusPlusClusterer input, Microsoft.ML.Trainers.KMeansPlusPlusClusterer.Output output) + { + _jsonNodes.Add(Serialize("Trainers.KMeansPlusPlusClusterer", input, output)); + } + public Microsoft.ML.Trainers.LinearSvmBinaryClassifier.Output Add(Microsoft.ML.Trainers.LinearSvmBinaryClassifier input) { var output = new Microsoft.ML.Trainers.LinearSvmBinaryClassifier.Output(); @@ -1316,6 +1329,33 @@ public sealed class Output } } + namespace Data + { + + /// + /// Pass dataview from memory to experiment + /// + public sealed partial class DataViewReference + { + + + /// + /// Pointer to IDataView in memory + /// + public Var Data { get; set; } = new Var(); + + + public sealed class Output + { + /// + /// The resulting data view + /// + public Var Data { get; set; } = new Var(); + + } + } + } + namespace Data { @@ -1373,7 +1413,7 @@ public sealed class Output namespace Data { - public sealed class TextLoaderArguments + public sealed partial class TextLoaderArguments { /// /// Use separate parsing threads? @@ -1427,7 +1467,7 @@ public sealed class TextLoaderArguments } - public sealed class TextLoaderColumn + public sealed partial class TextLoaderColumn { /// /// Name of the column @@ -1451,7 +1491,7 @@ public sealed class TextLoaderColumn } - public sealed class TextLoaderRange + public sealed partial class TextLoaderRange { /// /// First index in the range @@ -1485,7 +1525,7 @@ public sealed class TextLoaderRange } - public sealed class KeyRange + public sealed partial class KeyRange { /// /// First index in the range @@ -1507,7 +1547,7 @@ public sealed class KeyRange /// /// Import a dataset from a text file /// - public partial class TextLoader : Microsoft.ML.ILearningPipelineLoader + public sealed partial class TextLoader : Microsoft.ML.ILearningPipelineLoader { [JsonIgnore] @@ -1553,23 +1593,6 @@ public TextLoaderPipelineStep (Output output) public Data.TextLoaderArguments Arguments { get; set; } = new Data.TextLoaderArguments(); - public sealed class Output - { - /// - /// The resulting data view - /// - public Var Data { get; set; } = new Var(); - - } - } - - public sealed partial class DataViewReference - { - /// - /// Location of the input file - /// - public Var Data { get; set; } = new Var(); - public sealed class Output { /// @@ -1768,7 +1791,7 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ICla namespace Models { - public sealed class CrossValidationBinaryMacroSubGraphInput + public sealed partial class CrossValidationBinaryMacroSubGraphInput { /// /// The data to be used for training @@ -1777,7 +1800,7 @@ public sealed class CrossValidationBinaryMacroSubGraphInput } - public sealed class CrossValidationBinaryMacroSubGraphOutput + public sealed partial class CrossValidationBinaryMacroSubGraphOutput { /// /// The model @@ -2033,7 +2056,7 @@ public enum MacroUtilsTrainerKinds } - public sealed class CrossValidationMacroSubGraphInput + public sealed partial class CrossValidationMacroSubGraphInput { /// /// The data to be used for training @@ -2042,7 +2065,7 @@ public sealed class CrossValidationMacroSubGraphInput } - public sealed class CrossValidationMacroSubGraphOutput + public sealed partial class CrossValidationMacroSubGraphOutput { /// /// The model @@ -2446,7 +2469,7 @@ public enum CachingOptions } - public sealed class OneVersusAllMacroSubGraphOutput + public sealed partial class OneVersusAllMacroSubGraphOutput { /// /// The predictor model for the subgraph exemplar. @@ -3084,7 +3107,7 @@ public sealed class Output namespace Models { - public sealed class TrainTestBinaryMacroSubGraphInput + public sealed partial class TrainTestBinaryMacroSubGraphInput { /// /// The data to be used for training @@ -3093,7 +3116,7 @@ public sealed class TrainTestBinaryMacroSubGraphInput } - public sealed class TrainTestBinaryMacroSubGraphOutput + public sealed partial class TrainTestBinaryMacroSubGraphOutput { /// /// The model @@ -3169,7 +3192,7 @@ public sealed class Output namespace Models { - public sealed class TrainTestMacroSubGraphInput + public sealed partial class TrainTestMacroSubGraphInput { /// /// The data to be used for training @@ -3178,7 +3201,7 @@ public sealed class TrainTestMacroSubGraphInput } - public sealed class TrainTestMacroSubGraphOutput + public sealed partial class TrainTestMacroSubGraphOutput { /// /// The model @@ -5893,6 +5916,107 @@ public GeneralizedAdditiveModelRegressorPipelineStep(Output output) } } + namespace Trainers + { + public enum KMeansPlusPlusTrainerInitAlgorithm + { + KMeansPlusPlus = 0, + Random = 1, + KMeansParallel = 2 + } + + + /// + /// K-means is a popular clustering algorithm. With K-means, the data is clustered into a specified number of clusters in order to minimize the within-cluster sum of squares. K-means++ improves upon K-means by using a better method for choosing the initial cluster centers. + /// + public sealed partial class KMeansPlusPlusClusterer : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem + { + + + /// + /// The number of clusters + /// + [TlcModule.SweepableDiscreteParamAttribute("K", new object[]{5, 10, 20, 40})] + public int K { get; set; } = 5; + + /// + /// Cluster initialization algorithm + /// + public Trainers.KMeansPlusPlusTrainerInitAlgorithm InitAlgorithm { get; set; } = Trainers.KMeansPlusPlusTrainerInitAlgorithm.KMeansParallel; + + /// + /// Tolerance parameter for trainer convergence. Lower = slower, more accurate + /// + public float OptTol { get; set; } = 1E-07f; + + /// + /// Maximum number of iterations. + /// + public int MaxIterations { get; set; } = 1000; + + /// + /// Memory budget (in MBs) to use for KMeans acceleration + /// + public int AccelMemBudgetMb { get; set; } = 4096; + + /// + /// Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed. + /// + public int? NumThreads { get; set; } + + /// + /// The data to be used for training + /// + public Var TrainingData { get; set; } = new Var(); + + /// + /// Column to use for features + /// + public string FeatureColumn { get; set; } = "Features"; + + /// + /// Normalize option for the feature column + /// + public Models.NormalizeOption NormalizeFeatures { get; set; } = Models.NormalizeOption.Auto; + + /// + /// Whether learner should cache input training data + /// + public Models.CachingOptions Caching { get; set; } = Models.CachingOptions.Auto; + + + public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IClusteringOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput + { + /// + /// The trained model + /// + public Var PredictorModel { get; set; } = new Var(); + + } + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) + { + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(KMeansPlusPlusClusterer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } + + TrainingData = dataStep.Data; + Output output = experiment.Add(this); + return new KMeansPlusPlusClustererPipelineStep(output); + } + + private class KMeansPlusPlusClustererPipelineStep : ILearningPipelinePredictorStep + { + public KMeansPlusPlusClustererPipelineStep(Output output) + { + Model = output.PredictorModel; + } + + public Var Model { get; } + } + } + } + namespace Trainers { @@ -7403,7 +7527,7 @@ public BinaryPredictionScoreColumnsRenamerPipelineStep(Output output) namespace Transforms { - public sealed class NormalizeTransformBinColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class NormalizeTransformBinColumn : OneToOneColumn, IOneToOneColumn { /// /// Max number of bins, power of 2 recommended @@ -7555,7 +7679,7 @@ public enum CategoricalTransformOutputKind : byte } - public sealed class CategoricalHashTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class CategoricalHashTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// The number of bits to hash into. Must be between 1 and 30, inclusive. @@ -7725,7 +7849,7 @@ public enum TermTransformSortOrder : byte } - public sealed class CategoricalTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class CategoricalTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector @@ -7889,7 +8013,7 @@ public CategoricalOneHotVectorizerPipelineStep(Output output) namespace Transforms { - public sealed class CharTokenizeTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class CharTokenizeTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Name of the new column @@ -8008,7 +8132,7 @@ public CharacterTokenizerPipelineStep(Output output) namespace Transforms { - public sealed class ConcatTransformColumn : ManyToOneColumn, IManyToOneColumn + public sealed partial class ConcatTransformColumn : ManyToOneColumn, IManyToOneColumn { /// /// Name of the new column @@ -8098,7 +8222,7 @@ public ColumnConcatenatorPipelineStep(Output output) namespace Transforms { - public sealed class CopyColumnsTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class CopyColumnsTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Name of the new column @@ -8360,7 +8484,7 @@ public enum DataKind : byte } - public sealed class ConvertTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class ConvertTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// The result type @@ -8559,7 +8683,7 @@ public CombinerByContiguousGroupIdPipelineStep(Output output) namespace Transforms { - public sealed class NormalizeTransformAffineColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class NormalizeTransformAffineColumn : OneToOneColumn, IOneToOneColumn { /// /// Whether to map zero to zero, preserving sparsity @@ -8832,7 +8956,7 @@ public sealed class Output namespace Transforms { - public sealed class TermTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class TermTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Maximum number of terms to keep when auto-training @@ -9186,7 +9310,7 @@ public FeatureSelectorByMutualInformationPipelineStep(Output output) namespace Transforms { - public sealed class LpNormNormalizerTransformGcnColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class LpNormNormalizerTransformGcnColumn : OneToOneColumn, IOneToOneColumn { /// /// Normalize by standard deviation rather than L2 norm @@ -9330,7 +9454,7 @@ public GlobalContrastNormalizerPipelineStep(Output output) namespace Transforms { - public sealed class HashJoinTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class HashJoinTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Whether the values need to be combined for a single hash @@ -9489,7 +9613,7 @@ public HashConverterPipelineStep(Output output) namespace Transforms { - public sealed class KeyToValueTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class KeyToValueTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Name of the new column @@ -9668,7 +9792,7 @@ public LabelColumnKeyBooleanConverterPipelineStep(Output output) namespace Transforms { - public sealed class LabelIndicatorTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class LabelIndicatorTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// The positive example class for binary classification. @@ -9852,7 +9976,7 @@ public LabelToFloatConverterPipelineStep(Output output) namespace Transforms { - public sealed class NormalizeTransformLogNormalColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class NormalizeTransformLogNormalColumn : OneToOneColumn, IOneToOneColumn { /// /// Max number of examples used to train the normalizer @@ -9989,7 +10113,7 @@ public enum LpNormNormalizerTransformNormalizerKind : byte } - public sealed class LpNormNormalizerTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class LpNormNormalizerTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// The norm to use to normalize each sample @@ -10392,7 +10516,7 @@ public enum NAHandleTransformReplacementKind } - public sealed class NAHandleTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class NAHandleTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// The replacement method to utilize @@ -10536,7 +10660,7 @@ public MissingValueHandlerPipelineStep(Output output) namespace Transforms { - public sealed class NAIndicatorTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class NAIndicatorTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Name of the new column @@ -10650,7 +10774,7 @@ public MissingValueIndicatorPipelineStep(Output output) namespace Transforms { - public sealed class NADropTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class NADropTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Name of the new column @@ -10844,7 +10968,7 @@ public enum NAReplaceTransformReplacementKind } - public sealed class NAReplaceTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class NAReplaceTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Replacement value for NAs (uses default value if not given) @@ -11017,7 +11141,7 @@ public enum NgramTransformWeightingCriteria } - public sealed class NgramTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class NgramTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Maximum ngram length @@ -11356,7 +11480,7 @@ public PredictedLabelColumnOriginalValueConverterPipelineStep(Output output) namespace Transforms { - public sealed class GenerateNumberTransformColumn + public sealed partial class GenerateNumberTransformColumn { /// /// Name of the new column @@ -12095,7 +12219,7 @@ public enum TextTransformTextNormKind } - public sealed class TextTransformColumn : ManyToOneColumn, IManyToOneColumn + public sealed partial class TextTransformColumn : ManyToOneColumn, IManyToOneColumn { /// /// Name of the new column @@ -12109,7 +12233,7 @@ public sealed class TextTransformColumn : ManyToOneColumn, } - public sealed class TermLoaderArguments + public sealed partial class TermLoaderArguments { /// /// List of terms @@ -12524,7 +12648,7 @@ public sealed class Output namespace Transforms { - public sealed class DelimitedTokenizeTransformColumn : OneToOneColumn, IOneToOneColumn + public sealed partial class DelimitedTokenizeTransformColumn : OneToOneColumn, IOneToOneColumn { /// /// Comma separated set of term separator(s). Commonly: 'space', 'comma', 'semicolon' or other single character. diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index 192614d6ad..bb706392b5 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.Utilities; +using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Linq; @@ -15,15 +16,20 @@ namespace Microsoft.ML.Data { - public sealed class TextLoader : TextLoader + public sealed partial class TextLoaderRange + { + [JsonIgnore] + public int Ordinal { get { return Ordinal; } set { Min = value; Max = value; } } + } + + public sealed partial class TextLoader { /// - /// Construct a TextLoader object + /// Construct a TextLoader object by inferencing the dataset schema from a type. /// - /// Data file path /// Does the file contains header? - /// Column delimter. Default is '\t' or tab. + /// Column separator character. Default is '\t' /// Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators @@ -33,9 +39,9 @@ public sealed class TextLoader : TextLoader /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero /// except for 3rd and 5th columns which have values 6 and 3 /// Remove trailing whitespace from lines - public TextLoader(string inputFilePath, bool useHeader = false, - char delimeter = '\t', bool allowQuotedStrings = true, - bool supportSparse = true, bool trimWhitespace = false) : base(inputFilePath) + public TextLoader CreateFrom(bool useHeader = false, + char separator = '\t', bool allowQuotedStrings = true, + bool supportSparse = true, bool trimWhitespace = false) { var fields = typeof(TInput).GetFields(); Arguments.Column = new TextLoaderColumn[fields.Length]; @@ -56,47 +62,39 @@ public TextLoader(string inputFilePath, bool useHeader = false, DataKind dk; Utils.TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk); - var col = Runtime.Data.TextLoader.Column.Parse( - $"{name}:" + - $"{dk.ToString()}:" + - $"{mappingAttr.Ordinal}" - ); + Runtime.Data.TextLoader.Range[] sources; + if (!Runtime.Data.TextLoader.Column.TryParseSourceEx(mappingAttr.Ordinal, out sources)) + throw Contracts.Except($"{mappingAttr.Ordinal} could not be parsed."); - if(col == null) - throw Contracts.Except($"Could not generate column for {name}"); + Contracts.Assert(sources != null); TextLoaderColumn tlc = new TextLoaderColumn(); - if (col.KeyRange != null) - { - tlc.KeyRange = new KeyRange(); - tlc.KeyRange.Min = col.KeyRange.Min; - tlc.KeyRange.Max = col.KeyRange.Max; - } - - tlc.Name = col.Name; - tlc.Source = new TextLoaderRange[col.Source.Length]; + tlc.Name = name; + tlc.Source = new TextLoaderRange[sources.Length]; for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++) { tlc.Source[indexLocal] = new TextLoaderRange { - AllOther = col.Source[indexLocal].AllOther, - AutoEnd = col.Source[indexLocal].AutoEnd, - ForceVector = col.Source[indexLocal].ForceVector, - VariableEnd = col.Source[indexLocal].VariableEnd, - Max = col.Source[indexLocal].Max, - Min = col.Source[indexLocal].Min + AllOther = sources[indexLocal].AllOther, + AutoEnd = sources[indexLocal].AutoEnd, + ForceVector = sources[indexLocal].ForceVector, + VariableEnd = sources[indexLocal].VariableEnd, + Max = sources[indexLocal].Max, + Min = sources[indexLocal].Min }; } - tlc.Type = col.Type; + tlc.Type = dk; Arguments.Column[index] = tlc; } Arguments.HasHeader = useHeader; - Arguments.Separator = new[] { delimeter }; + Arguments.Separator = new[] { separator }; Arguments.AllowQuoting = allowQuotedStrings; Arguments.AllowSparse = supportSparse; Arguments.TrimWhitespace = trimWhitespace; + + return this; } } } diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs index e068ad81ec..856a273e23 100644 --- a/src/Microsoft.ML/LearningPipeline.cs +++ b/src/Microsoft.ML/LearningPipeline.cs @@ -68,7 +68,7 @@ public LearningPipeline() /// Possible data loader(s), transforms and trainers options are /// /// Data Loader: - /// + /// /// etc. /// /// diff --git a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs index 246cac7cda..41048000d8 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/ImportTextData.cs @@ -55,8 +55,9 @@ public static Output ImportText(IHostEnvironment env, Input input) var loader = host.CreateLoader(string.Format("Text{{{0}}}", input.CustomSchema), new FileHandleSource(input.InputFile)); return new Output { Data = loader }; } +#pragma warning restore 0618 - [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file", NoSeal = true)] + [TlcModule.EntryPoint(Name = "Data.TextLoader", Desc = "Import a dataset from a text file")] public static Output TextLoader(IHostEnvironment env, LoaderInput input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index c6c16df22e..cec3fdf91f 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -685,7 +685,7 @@ private void GenerateStructs(IndentingTextWriter writer, classBase = $" : OneToOneColumn<{_typesSymbolTable[type.FullName].Substring(_typesSymbolTable[type.FullName].LastIndexOf('.') + 1)}>, IOneToOneColumn"; else if (type.IsSubclassOf(typeof(ManyToOneColumn))) classBase = $" : ManyToOneColumn<{_typesSymbolTable[type.FullName].Substring(_typesSymbolTable[type.FullName].LastIndexOf('.') + 1)}>, IManyToOneColumn"; - writer.WriteLine($"public sealed class {_typesSymbolTable[type.FullName].Substring(_typesSymbolTable[type.FullName].LastIndexOf('.') + 1)}{classBase}"); + writer.WriteLine($"public sealed partial class {_typesSymbolTable[type.FullName].Substring(_typesSymbolTable[type.FullName].LastIndexOf('.') + 1)}{classBase}"); writer.WriteLine("{"); writer.Indent(); GenerateInputFields(writer, type, catalog, _typesSymbolTable); @@ -910,9 +910,8 @@ private void GenerateInput(IndentingTextWriter writer, if(entryPointInfo.ObsoleteAttribute != null) writer.WriteLine($"[Obsolete(\"{entryPointInfo.ObsoleteAttribute.Message}\")]"); - - string seal = entryPointInfo.NoSeal ? "" : "sealed "; - writer.WriteLine($"public {seal}partial class {classAndMethod.Item2}{classBase}"); + + writer.WriteLine($"public sealed partial class {classAndMethod.Item2}{classBase}"); writer.WriteLine("{"); writer.Indent(); writer.WriteLine(); diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index e0583f58b7..adfa42e50d 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -4,6 +4,7 @@ using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Running; +using Microsoft.ML.Data; using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Trainers; @@ -50,7 +51,7 @@ public void Setup() s_trainedModel = TrainCore(); IrisPrediction prediction = s_trainedModel.Predict(s_example); - var testData = new TextLoader(s_dataPath, useHeader: true, separator: "tab"); + var testData = new TextLoader(s_dataPath).CreateFrom(useHeader: true); var evaluator = new ClassificationEvaluator(); s_metrics = evaluator.Evaluate(s_trainedModel, testData); @@ -70,7 +71,7 @@ private static PredictionModel TrainCore() { var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(s_dataPath, useHeader: true, separator: "tab")); + pipeline.Add(new TextLoader(s_dataPath).CreateFrom(useHeader: true)); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); diff --git a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs index eb62ae120d..f0e7d8ec73 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs @@ -146,12 +146,13 @@ public void EntryPointPipelineSweep() const int numOfSampleRows = 1000; int numIterations = 4; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); +#pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest }).Data.Take(numOfSampleRows); - +#pragma warning restore 0618 // Define entrypoint graph string inputGraph = @" { diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs index c4ac61ab8d..19943c6887 100644 --- a/test/Microsoft.ML.TestFramework/ModelHelper.cs +++ b/test/Microsoft.ML.TestFramework/ModelHelper.cs @@ -70,147 +70,147 @@ private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) new TextLoaderColumn() { Name = "Id", - Source = new [] { new TextLoaderRange() { Min = 0, Max = 0} }, + Source = new [] { new TextLoaderRange() { Ordinal = 0 } }, Type = Runtime.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Date", - Source = new [] { new TextLoaderRange() { Min = 1, Max = 1} }, + Source = new [] { new TextLoaderRange() { Ordinal = 1 } }, Type = Runtime.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Label", - Source = new [] { new TextLoaderRange() { Min = 2, Max = 2} }, + Source = new [] { new TextLoaderRange() { Ordinal = 2 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bedrooms", - Source = new [] { new TextLoaderRange() { Min = 3, Max = 3} }, + Source = new [] { new TextLoaderRange() { Ordinal = 3 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bathrooms", - Source = new [] { new TextLoaderRange() { Min = 4, Max = 4} }, + Source = new [] { new TextLoaderRange() { Ordinal = 4 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving", - Source = new [] { new TextLoaderRange() { Min = 5, Max = 5} }, + Source = new [] { new TextLoaderRange() { Ordinal = 5 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot", - Source = new [] { new TextLoaderRange() { Min = 6, Max = 6} }, + Source = new [] { new TextLoaderRange() { Ordinal = 6 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Floors", - Source = new [] { new TextLoaderRange() { Min = 7, Max = 7} }, + Source = new [] { new TextLoaderRange() { Ordinal = 7 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Waterfront", - Source = new [] { new TextLoaderRange() { Min = 8, Max = 8} }, + Source = new [] { new TextLoaderRange() { Ordinal = 8 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "View", - Source = new [] { new TextLoaderRange() { Min = 9, Max = 9} }, + Source = new [] { new TextLoaderRange() { Ordinal = 9 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Condition", - Source = new [] { new TextLoaderRange() { Min = 10, Max = 10} }, + Source = new [] { new TextLoaderRange() { Ordinal = 10 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Grade", - Source = new [] { new TextLoaderRange() { Min = 11, Max = 11} }, + Source = new [] { new TextLoaderRange() { Ordinal = 11 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftAbove", - Source = new [] { new TextLoaderRange() { Min = 12, Max = 12} }, + Source = new [] { new TextLoaderRange() { Ordinal = 12 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftBasement", - Source = new [] { new TextLoaderRange() { Min = 13, Max = 13} }, + Source = new [] { new TextLoaderRange() { Ordinal = 13 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearBuilt", - Source = new [] { new TextLoaderRange() { Min = 14, Max = 14} }, + Source = new [] { new TextLoaderRange() { Ordinal = 14 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearRenovated", - Source = new [] { new TextLoaderRange() { Min = 15, Max = 15} }, + Source = new [] { new TextLoaderRange() { Ordinal = 15 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Zipcode", - Source = new [] { new TextLoaderRange() { Min = 16, Max = 16} }, + Source = new [] { new TextLoaderRange() { Ordinal = 16 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Lat", - Source = new [] { new TextLoaderRange() { Min = 17, Max = 17} }, + Source = new [] { new TextLoaderRange() { Ordinal = 17 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Long", - Source = new [] { new TextLoaderRange() { Min = 18, Max = 18} }, + Source = new [] { new TextLoaderRange() { Ordinal = 18 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving15", - Source = new [] { new TextLoaderRange() { Min = 19, Max = 19} }, + Source = new [] { new TextLoaderRange() { Ordinal = 19 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot15", - Source = new [] { new TextLoaderRange() { Min = 20, Max = 20} }, + Source = new [] { new TextLoaderRange() { Ordinal = 20 } }, Type = Runtime.Data.DataKind.Num }, } diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs index 316d7eab55..c647110702 100644 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ b/test/Microsoft.ML.Tests/CSharpCodeGen.cs @@ -15,8 +15,7 @@ public CSharpCodeGen(ITestOutputHelper output) : base(output) { } - //[Fact(Skip = "Temporary solution(Windows ONLY) to regenerate codegenerated CSharpAPI.cs")] - [Fact] + [Fact(Skip = "Temporary solution(Windows ONLY) to regenerate codegenerated CSharpAPI.cs")] public void GenerateCSharpAPI() { var cSharpAPIPath = Path.Combine(RootDir, @"src\\Microsoft.ML\\CSharpApi.cs"); diff --git a/test/Microsoft.ML.Tests/LearningPipelineTests.cs b/test/Microsoft.ML.Tests/LearningPipelineTests.cs index 30dd844d58..259f9b3ace 100644 --- a/test/Microsoft.ML.Tests/LearningPipelineTests.cs +++ b/test/Microsoft.ML.Tests/LearningPipelineTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML; +using Microsoft.ML.Data; using Microsoft.ML.Runtime.Api; using Microsoft.ML.TestFramework; using Microsoft.ML.Transforms; @@ -64,7 +65,7 @@ public void TransformOnlyPipeline() { const string _dataPath = @"..\..\Data\breast-cancer.txt"; var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_dataPath, useHeader: false)); + pipeline.Add(new TextLoader(_dataPath).CreateFrom(useHeader: false)); pipeline.Add(new CategoricalHashOneHotVectorizer("F1") { HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag }); var model = pipeline.Train(); var predictionModel = model.Predict(new InputData() { F1 = "5" }); diff --git a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs index bcc2f6a172..31fc4fdd6d 100644 --- a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs @@ -22,7 +22,7 @@ public void TrainAndPredictHousePriceModelTest() var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath, useHeader: true, delimeter: ',')); + pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: true, separator: ',')); pipeline.Add(new ColumnConcatenator(outputColumn: "NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15")); @@ -62,7 +62,7 @@ public void TrainAndPredictHousePriceModelTest() Assert.InRange(prediction.Price, 260_000, 330_000); string testDataPath = GetDataPath("kc_house_test.csv"); - var testData = new TextLoader(testDataPath, useHeader: true, delimeter: ','); + var testData = new TextLoader(testDataPath).CreateFrom(useHeader: true, separator: ','); var evaluator = new RegressionEvaluator(); RegressionMetrics metrics = evaluator.Evaluate(model, testData); diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index 2aba0a375d..5dcbf3a588 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -20,7 +20,7 @@ public void TrainAndPredictIrisModelTest() var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath, useHeader: false)); + pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false)); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); @@ -67,7 +67,7 @@ public void TrainAndPredictIrisModelTest() // Note: Testing against the same data set as a simple way to test evaluation. // This isn't appropriate in real-world scenarios. string testDataPath = GetDataPath("iris.txt"); - var testData = new TextLoader(testDataPath, useHeader: false); + var testData = new TextLoader(testDataPath).CreateFrom(useHeader: false); var evaluator = new ClassificationEvaluator(); evaluator.OutputTopKAcc = 3; diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 38cc1cbf6c..ebddc33b03 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -20,7 +20,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath, useHeader: false, delimeter: ',')); + pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false, separator: ',')); pipeline.Add(new Dictionarizer("Label")); // "IrisPlantType" is used as "Label" because of column attribute name on the field. @@ -70,7 +70,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() // Note: Testing against the same data set as a simple way to test evaluation. // This isn't appropriate in real-world scenarios. string testDataPath = GetDataPath("iris.data"); - var testData = new TextLoader(testDataPath, useHeader: false, delimeter: ','); + var testData = new TextLoader(testDataPath).CreateFrom(useHeader: false, separator: ','); var evaluator = new ClassificationEvaluator(); evaluator.OutputTopKAcc = 3; diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs index 422b621fa4..4d6c440537 100644 --- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Models; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using System.Collections.Generic; @@ -25,7 +26,7 @@ public void TrainAndPredictSentimentModelTest() string dataPath = GetDataPath(SentimentDataPath); var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath) + pipeline.Add(new Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { @@ -36,14 +37,14 @@ public void TrainAndPredictSentimentModelTest() new TextLoaderColumn() { Name = "Label", - Source = new [] { new TextLoaderRange() { Min = 0, Max = 0} }, + Source = new [] { new TextLoaderRange() { Ordinal = 0 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", - Source = new [] { new TextLoaderRange() { Min = 1, Max = 1} }, + Source = new [] { new TextLoaderRange() { Ordinal = 1 } }, Type = Runtime.Data.DataKind.Text } } @@ -82,11 +83,11 @@ public void TrainAndPredictSentimentModelTest() IEnumerable predictions = model.Predict(sentiments); Assert.Equal(2, predictions.Count()); - Assert.False(predictions.ElementAt(0).Sentiment); - Assert.True(predictions.ElementAt(1).Sentiment); + Assert.True(predictions.ElementAt(0).Sentiment.IsFalse); + Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); string testDataPath = GetDataPath(SentimentTestPath); - var testData = new TextLoader(testDataPath) + var testData = new Data.TextLoader(testDataPath) { Arguments = new TextLoaderArguments { @@ -97,14 +98,14 @@ public void TrainAndPredictSentimentModelTest() new TextLoaderColumn() { Name = "Label", - Source = new [] { new TextLoaderRange() { Min = 0, Max = 0} }, + Source = new [] { new TextLoaderRange() { Ordinal = 0 } }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", - Source = new [] { new TextLoaderRange() { Min = 1, Max = 1} }, + Source = new [] { new TextLoaderRange() { Ordinal = 1 } }, Type = Runtime.Data.DataKind.Text } } @@ -153,7 +154,7 @@ public class SentimentData public class SentimentPrediction { [ColumnName("PredictedLabel")] - public bool Sentiment; + public DvBool Sentiment; } } } diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index 8602dd2a6e..40c0b6525f 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -25,19 +25,19 @@ public TextLoaderTests(ITestOutputHelper output) [Fact] public void ConstructorDoesntThrow() { - Assert.NotNull(new TextLoader("fakeFile.txt")); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: true)); - Assert.NotNull(new TextLoader("fakeFile.txt")); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false)); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, supportSparse: false, trimWhitespace: false)); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, supportSparse: false)); - Assert.NotNull(new TextLoader("fakeFile.txt", useHeader: false, allowQuotedStrings: false)); + Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom()); + Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom(useHeader:true)); + Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom()); + Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: false)); + Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: false, supportSparse: false, trimWhitespace: false)); + Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: false, supportSparse: false)); + Assert.NotNull(new Data.TextLoader("fakeFile.txt").CreateFrom(useHeader: false, allowQuotedStrings: false)); } [Fact] public void CanSuccessfullyApplyATransform() { - var loader = new TextLoader("fakeFile.txt"); + var loader = new Data.TextLoader("fakeFile.txt").CreateFrom(); using (var environment = new TlcEnvironment()) { @@ -54,7 +54,7 @@ public void CanSuccessfullyApplyATransform() public void CanSuccessfullyRetrieveQuotedData() { string dataPath = GetDataPath("QuotingData.csv"); - var loader = new TextLoader(dataPath, useHeader: true, delimeter: ',', allowQuotedStrings: true, supportSparse: false); + var loader = new Data.TextLoader(dataPath).CreateFrom(useHeader: true, separator: ',', allowQuotedStrings: true, supportSparse: false); using (var environment = new TlcEnvironment()) { @@ -112,7 +112,7 @@ public void CanSuccessfullyRetrieveQuotedData() public void CanSuccessfullyRetrieveSparseData() { string dataPath = GetDataPath("SparseData.txt"); - var loader = new TextLoader(dataPath, useHeader: true, allowQuotedStrings: false, supportSparse: true); + var loader = new Data.TextLoader(dataPath).CreateFrom(useHeader: true, allowQuotedStrings: false, supportSparse: true); using (var environment = new TlcEnvironment()) { @@ -177,7 +177,7 @@ public void CanSuccessfullyRetrieveSparseData() public void CanSuccessfullyTrimSpaces() { string dataPath = GetDataPath("TrimData.csv"); - var loader = new TextLoader(dataPath, useHeader: true, delimeter: ',', allowQuotedStrings: false, supportSparse: false, trimWhitespace: true); + var loader = new Data.TextLoader(dataPath).CreateFrom(useHeader: true, separator: ',', allowQuotedStrings: false, supportSparse: false, trimWhitespace: true); using (var environment = new TlcEnvironment()) { @@ -224,7 +224,7 @@ public void CanSuccessfullyTrimSpaces() [Fact] public void ThrowsExceptionWithPropertyName() { - Exception ex = Assert.Throws( () => new TextLoader("fakefile.txt") ); + Exception ex = Assert.Throws( () => new Data.TextLoader("fakefile.txt").CreateFrom() ); Assert.StartsWith("String1 is missing ColumnAttribute", ex.Message); } From 3b57ae6b3afa753a0d0222fdee0a3c0d8a7533c8 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Tue, 15 May 2018 20:20:13 -0700 Subject: [PATCH 09/17] PR feedback. --- src/Microsoft.ML.Core/Utilities/Utils.cs | 49 ---------------- src/Microsoft.ML/CSharpApi.cs | 2 +- src/Microsoft.ML/Data/TextLoader.cs | 58 ++++++++++++++++++- src/Microsoft.ML/LearningPipeline.cs | 3 +- .../Internal/Tools/CSharpApiGenerator.cs | 34 ++++++++++- 5 files changed, 90 insertions(+), 56 deletions(-) diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs index 13534ba3cf..b453e6da29 100644 --- a/src/Microsoft.ML.Core/Utilities/Utils.cs +++ b/src/Microsoft.ML.Core/Utilities/Utils.cs @@ -1070,54 +1070,5 @@ public static string GetDescription(this Enum value) } return null; } - - /// - /// Try to map a System.Type to a corresponding DataKind value. - /// - public static bool TryGetDataKind(Type type, out DataKind kind) - { - Contracts.CheckValueOrNull(type); - - // REVIEW: Make this more efficient. Should we have a global dictionary? - if (type == typeof(DvInt1)) - kind = DataKind.I1; - else if (type == typeof(byte)) - kind = DataKind.U1; - else if (type == typeof(DvInt2)) - kind = DataKind.I2; - else if (type == typeof(ushort)) - kind = DataKind.U2; - else if (type == typeof(DvInt4)) - kind = DataKind.I4; - else if (type == typeof(uint)) - kind = DataKind.U4; - else if (type == typeof(DvInt8)) - kind = DataKind.I8; - else if (type == typeof(ulong)) - kind = DataKind.U8; - else if (type == typeof(Single)) - kind = DataKind.R4; - else if (type == typeof(Double)) - kind = DataKind.R8; - else if (type == typeof(DvText) || type == typeof(string)) - kind = DataKind.TX; - else if (type == typeof(DvBool) || type == typeof(bool)) - kind = DataKind.BL; - else if (type == typeof(DvTimeSpan)) - kind = DataKind.TS; - else if (type == typeof(DvDateTime)) - kind = DataKind.DT; - else if (type == typeof(DvDateTimeZone)) - kind = DataKind.DZ; - else if (type == typeof(UInt128)) - kind = DataKind.UG; - else - { - kind = default(DataKind); - return false; - } - - return true; - } } } diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 0394ce112d..317ee98db0 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -1448,7 +1448,7 @@ public sealed partial class TextLoaderArguments /// /// Source column separator. /// - public char[] Separator { get; set; } = { ' ' }; + public char[] Separator { get; set; } = { '\t' }; /// /// Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40 diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index bb706392b5..e43df96d4b 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -60,8 +60,6 @@ public TextLoader CreateFrom(bool useHeader = false, if (name.Any(c => !Char.IsLetterOrDigit(c))) throw Contracts.Except($"{name} is not alphanumeric."); - DataKind dk; - Utils.TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk); Runtime.Data.TextLoader.Range[] sources; if (!Runtime.Data.TextLoader.Column.TryParseSourceEx(mappingAttr.Ordinal, out sources)) throw Contracts.Except($"{mappingAttr.Ordinal} could not be parsed."); @@ -71,6 +69,12 @@ public TextLoader CreateFrom(bool useHeader = false, TextLoaderColumn tlc = new TextLoaderColumn(); tlc.Name = name; tlc.Source = new TextLoaderRange[sources.Length]; + DataKind dk; + if (!TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk)) + throw Contracts.Except($"{name} is of unsupported type."); + + tlc.Type = dk; + for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++) { tlc.Source[indexLocal] = new TextLoaderRange @@ -84,7 +88,6 @@ public TextLoader CreateFrom(bool useHeader = false, }; } - tlc.Type = dk; Arguments.Column[index] = tlc; } @@ -96,5 +99,54 @@ public TextLoader CreateFrom(bool useHeader = false, return this; } + + /// + /// Try to map a System.Type to a corresponding DataKind value. + /// + public static bool TryGetDataKind(Type type, out DataKind kind) + { + Contracts.CheckValueOrNull(type); + + // REVIEW: Make this more efficient. Should we have a global dictionary? + if (type == typeof(DvInt1) || type == typeof(sbyte)) + kind = DataKind.I1; + else if (type == typeof(byte) || type == typeof(char)) + kind = DataKind.U1; + else if (type == typeof(DvInt2) || type == typeof(Int16)) + kind = DataKind.I2; + else if (type == typeof(ushort)) + kind = DataKind.U2; + else if (type == typeof(DvInt4) || type == typeof(int)) + kind = DataKind.I4; + else if (type == typeof(uint)) + kind = DataKind.U4; + else if (type == typeof(DvInt8) || type == typeof(Int16) || type == typeof(long)) + kind = DataKind.I8; + else if (type == typeof(ulong)) + kind = DataKind.U8; + else if (type == typeof(Single)) + kind = DataKind.R4; + else if (type == typeof(Double)) + kind = DataKind.R8; + else if (type == typeof(DvText) || type == typeof(string)) + kind = DataKind.TX; + else if (type == typeof(DvBool) || type == typeof(bool)) + kind = DataKind.BL; + else if (type == typeof(DvTimeSpan)) + kind = DataKind.TS; + else if (type == typeof(DvDateTime)) + kind = DataKind.DT; + else if (type == typeof(DvDateTimeZone)) + kind = DataKind.DZ; + else if (type == typeof(UInt128)) + kind = DataKind.UG; + else + { + kind = default(DataKind); + return false; + } + + return true; + } } } diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs index 856a273e23..8de79ecae3 100644 --- a/src/Microsoft.ML/LearningPipeline.cs +++ b/src/Microsoft.ML/LearningPipeline.cs @@ -154,7 +154,6 @@ public PredictionModel Train() step = currentItem.ApplyStep(step, experiment); if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) transformModels.Add(dataStep.Model); - else if (step is ILearningPipelinePredictorStep predictorDataStep) { if (lastTransformModel != null) @@ -184,7 +183,7 @@ public PredictionModel Train() { if (lastTransformModel != null) transformModels.Insert(0, lastTransformModel); - + var modelInput = new Transforms.ModelCombiner { Models = new ArrayVar(transformModels.ToArray()) diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index cec3fdf91f..db378b45bd 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -177,6 +177,38 @@ public static string Capitalize(string s) return char.ToUpperInvariant(s[0]) + s.Substring(1); } + public static string GetCharValue(char value) + { + switch (value) + { + case '\t': + return "\\t"; + case '\n': + return "\\n"; + case '\r': + return "\\r"; + case '\\': + return "\\"; + case '\"': + return "\""; + case '\'': + return "\\'"; + case '\0': + return "\\0"; + case '\a': + return "\\a"; + case '\b': + return "\\b"; + case '\f': + return "\\f"; + case '\v': + return "\\v"; + default: + Contracts.Assert(!Char.IsWhiteSpace(value)); + return value.ToString(); + } + } + public static string GetValue(ModuleCatalog catalog, Type fieldType, object fieldValue, Dictionary typesSymbolTable, string rootNameSpace = "") { @@ -264,7 +296,7 @@ public static string GetValue(ModuleCatalog catalog, Type fieldType, object fiel case TlcModule.DataKind.Enum: return GetEnumName(fieldType, typesSymbolTable, rootNameSpace) + "." + fieldValue; case TlcModule.DataKind.Char: - return $"'{(char)fieldValue}'"; + return $"'{GetCharValue((char)fieldValue)}'"; case TlcModule.DataKind.Component: var type = fieldValue.GetType(); ModuleCatalog.ComponentInfo componentInfo; From 4119b43098aa4bffc564f618873fe0b2556c693d Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 16 May 2018 09:30:50 -0700 Subject: [PATCH 10/17] PR feedback. --- src/Microsoft.ML.Core/Utilities/Utils.cs | 1 - .../Microsoft.ML.PipelineInference.csproj | 4 ---- src/Microsoft.ML/Data/TextLoader.cs | 12 ++++++------ .../Runtime/Internal/Tools/CSharpApiGenerator.cs | 3 +-- .../Microsoft.ML.Predictor.Tests.csproj | 1 - 5 files changed, 7 insertions(+), 14 deletions(-) diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs index b453e6da29..48993de785 100644 --- a/src/Microsoft.ML.Core/Utilities/Utils.cs +++ b/src/Microsoft.ML.Core/Utilities/Utils.cs @@ -12,7 +12,6 @@ using System.Text; using System.Text.RegularExpressions; using System.Threading; -using Microsoft.ML.Runtime.Data; namespace Microsoft.ML.Runtime.Internal.Utilities { diff --git a/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj b/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj index 2fc84d92b0..7cf9585f3b 100644 --- a/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj +++ b/src/Microsoft.ML.PipelineInference/Microsoft.ML.PipelineInference.csproj @@ -7,10 +7,6 @@ CORECLR - - ;1591 - - diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index e43df96d4b..676720619a 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -103,7 +103,7 @@ public TextLoader CreateFrom(bool useHeader = false, /// /// Try to map a System.Type to a corresponding DataKind value. /// - public static bool TryGetDataKind(Type type, out DataKind kind) + private static bool TryGetDataKind(Type type, out DataKind kind) { Contracts.CheckValueOrNull(type); @@ -112,7 +112,7 @@ public static bool TryGetDataKind(Type type, out DataKind kind) kind = DataKind.I1; else if (type == typeof(byte) || type == typeof(char)) kind = DataKind.U1; - else if (type == typeof(DvInt2) || type == typeof(Int16)) + else if (type == typeof(DvInt2) || type == typeof(short)) kind = DataKind.I2; else if (type == typeof(ushort)) kind = DataKind.U2; @@ -120,7 +120,7 @@ public static bool TryGetDataKind(Type type, out DataKind kind) kind = DataKind.I4; else if (type == typeof(uint)) kind = DataKind.U4; - else if (type == typeof(DvInt8) || type == typeof(Int16) || type == typeof(long)) + else if (type == typeof(DvInt8) || type == typeof(long)) kind = DataKind.I8; else if (type == typeof(ulong)) kind = DataKind.U8; @@ -132,11 +132,11 @@ public static bool TryGetDataKind(Type type, out DataKind kind) kind = DataKind.TX; else if (type == typeof(DvBool) || type == typeof(bool)) kind = DataKind.BL; - else if (type == typeof(DvTimeSpan)) + else if (type == typeof(DvTimeSpan) || type == typeof(TimeSpan)) kind = DataKind.TS; - else if (type == typeof(DvDateTime)) + else if (type == typeof(DvDateTime) || type == typeof(DateTime)) kind = DataKind.DT; - else if (type == typeof(DvDateTimeZone)) + else if (type == typeof(DvDateTimeZone) || type == typeof(TimeZoneInfo)) kind = DataKind.DZ; else if (type == typeof(UInt128)) kind = DataKind.UG; diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index db378b45bd..92b2bf69a2 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -177,7 +177,7 @@ public static string Capitalize(string s) return char.ToUpperInvariant(s[0]) + s.Substring(1); } - public static string GetCharValue(char value) + private static string GetCharValue(char value) { switch (value) { @@ -204,7 +204,6 @@ public static string GetCharValue(char value) case '\v': return "\\v"; default: - Contracts.Assert(!Char.IsWhiteSpace(value)); return value.ToString(); } } diff --git a/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj b/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj index da40ffe344..30b51e8afb 100644 --- a/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj +++ b/test/Microsoft.ML.Predictor.Tests/Microsoft.ML.Predictor.Tests.csproj @@ -5,7 +5,6 @@ true AnyCPU - 1701;1702;1705 From 755eb3433579c6c087dbe47e2b770b32b79f0921 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 16 May 2018 10:47:30 -0700 Subject: [PATCH 11/17] PR feedback. --- src/Microsoft.ML/Data/TextLoader.cs | 24 +++++++---- .../Microsoft.ML.TestFramework/ModelHelper.cs | 42 +++++++++---------- .../Scenarios/SentimentPredictionTests.cs | 8 ++-- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index 676720619a..7358a0789c 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -5,26 +5,34 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Internal.Utilities; -using Newtonsoft.Json; using System; -using System.Collections.Generic; using System.Linq; using System.Reflection; -using System.Text; using System.Text.RegularExpressions; namespace Microsoft.ML.Data { public sealed partial class TextLoaderRange { - [JsonIgnore] - public int Ordinal { get { return Ordinal; } set { Min = value; Max = value; } } + public TextLoaderRange() + { + } + + public TextLoaderRange(int ordinal) + { + Min = ordinal; + Max = ordinal; + } + + public TextLoaderRange(int min, int max) + { + Min = min; + Max = max; + } } public sealed partial class TextLoader { - /// /// Construct a TextLoader object by inferencing the dataset schema from a type. /// @@ -105,7 +113,7 @@ public TextLoader CreateFrom(bool useHeader = false, /// private static bool TryGetDataKind(Type type, out DataKind kind) { - Contracts.CheckValueOrNull(type); + Contracts.Assert(type != null); // REVIEW: Make this more efficient. Should we have a global dictionary? if (type == typeof(DvInt1) || type == typeof(sbyte)) diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs index 19943c6887..1b0ab4eb8e 100644 --- a/test/Microsoft.ML.TestFramework/ModelHelper.cs +++ b/test/Microsoft.ML.TestFramework/ModelHelper.cs @@ -70,147 +70,147 @@ private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) new TextLoaderColumn() { Name = "Id", - Source = new [] { new TextLoaderRange() { Ordinal = 0 } }, + Source = new [] { new TextLoaderRange(0) }, Type = Runtime.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Date", - Source = new [] { new TextLoaderRange() { Ordinal = 1 } }, + Source = new [] { new TextLoaderRange(1) }, Type = Runtime.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Label", - Source = new [] { new TextLoaderRange() { Ordinal = 2 } }, + Source = new [] { new TextLoaderRange(2) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bedrooms", - Source = new [] { new TextLoaderRange() { Ordinal = 3 } }, + Source = new [] { new TextLoaderRange(3) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bathrooms", - Source = new [] { new TextLoaderRange() { Ordinal = 4 } }, + Source = new [] { new TextLoaderRange(4) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving", - Source = new [] { new TextLoaderRange() { Ordinal = 5 } }, + Source = new [] { new TextLoaderRange(5) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot", - Source = new [] { new TextLoaderRange() { Ordinal = 6 } }, + Source = new [] { new TextLoaderRange(6) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Floors", - Source = new [] { new TextLoaderRange() { Ordinal = 7 } }, + Source = new [] { new TextLoaderRange(7) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Waterfront", - Source = new [] { new TextLoaderRange() { Ordinal = 8 } }, + Source = new [] { new TextLoaderRange(8) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "View", - Source = new [] { new TextLoaderRange() { Ordinal = 9 } }, + Source = new [] { new TextLoaderRange(9) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Condition", - Source = new [] { new TextLoaderRange() { Ordinal = 10 } }, + Source = new [] { new TextLoaderRange(10) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Grade", - Source = new [] { new TextLoaderRange() { Ordinal = 11 } }, + Source = new [] { new TextLoaderRange(11) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftAbove", - Source = new [] { new TextLoaderRange() { Ordinal = 12 } }, + Source = new [] { new TextLoaderRange(12) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftBasement", - Source = new [] { new TextLoaderRange() { Ordinal = 13 } }, + Source = new [] { new TextLoaderRange(13) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearBuilt", - Source = new [] { new TextLoaderRange() { Ordinal = 14 } }, + Source = new [] { new TextLoaderRange(14) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearRenovated", - Source = new [] { new TextLoaderRange() { Ordinal = 15 } }, + Source = new [] { new TextLoaderRange(15) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Zipcode", - Source = new [] { new TextLoaderRange() { Ordinal = 16 } }, + Source = new [] { new TextLoaderRange(16) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Lat", - Source = new [] { new TextLoaderRange() { Ordinal = 17 } }, + Source = new [] { new TextLoaderRange(17) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Long", - Source = new [] { new TextLoaderRange() { Ordinal = 18 } }, + Source = new [] { new TextLoaderRange(18) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving15", - Source = new [] { new TextLoaderRange() { Ordinal = 19 } }, + Source = new [] { new TextLoaderRange(19) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot15", - Source = new [] { new TextLoaderRange() { Ordinal = 20 } }, + Source = new [] { new TextLoaderRange(20) }, Type = Runtime.Data.DataKind.Num }, } diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs index 4d6c440537..80947644e9 100644 --- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs @@ -37,14 +37,14 @@ public void TrainAndPredictSentimentModelTest() new TextLoaderColumn() { Name = "Label", - Source = new [] { new TextLoaderRange() { Ordinal = 0 } }, + Source = new [] { new TextLoaderRange(0) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", - Source = new [] { new TextLoaderRange() { Ordinal = 1 } }, + Source = new [] { new TextLoaderRange(1) }, Type = Runtime.Data.DataKind.Text } } @@ -98,14 +98,14 @@ public void TrainAndPredictSentimentModelTest() new TextLoaderColumn() { Name = "Label", - Source = new [] { new TextLoaderRange() { Ordinal = 0 } }, + Source = new [] { new TextLoaderRange(0) }, Type = Runtime.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SentimentText", - Source = new [] { new TextLoaderRange() { Ordinal = 1 } }, + Source = new [] { new TextLoaderRange(1) }, Type = Runtime.Data.DataKind.Text } } From 743fa6f09081cba3570b4c924b396fe5cf40d939 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 16 May 2018 22:51:36 -0700 Subject: [PATCH 12/17] PR feedback. --- src/Microsoft.ML/Data/TextLoader.cs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index 7358a0789c..71c8273c41 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -18,14 +18,34 @@ public TextLoaderRange() { } + /// + /// Convenience constructor for the scalar case. + /// Min and Max are set to the single value ordinal. + /// When a given column spans only a single column in the + /// dataset. + /// + /// public TextLoaderRange(int ordinal) { + + Contracts.Assert(ordinal >= 0); + Min = ordinal; Max = ordinal; } + /// + /// Convenience constructor for the vector case. + /// When a given column spans multiple contiguous + /// column in the dataset. + /// + /// public TextLoaderRange(int min, int max) { + + Contracts.Assert(min <= max); + Contracts.Assert(min >= 0); + Min = min; Max = max; } @@ -113,7 +133,7 @@ public TextLoader CreateFrom(bool useHeader = false, /// private static bool TryGetDataKind(Type type, out DataKind kind) { - Contracts.Assert(type != null); + Contracts.AssertValue(type); // REVIEW: Make this more efficient. Should we have a global dictionary? if (type == typeof(DvInt1) || type == typeof(sbyte)) From bb1afd461b7e1369840a7866bdeba54467907e70 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 16 May 2018 23:07:40 -0700 Subject: [PATCH 13/17] PR feedback. --- test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index e100794459..24e8374b4c 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -2708,7 +2708,9 @@ public void EntryPointTreeLeafFeaturizer() { var dataPath = GetDataPath(@"adult.tiny.with-schema.txt"); var inputFile = new SimpleFileHandle(Env, dataPath, false, false); +#pragma warning disable 0618 var dataView = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFile }).Data; +#pragma warning restore 0618 var cat = Categorical.CatTransformDict(Env, new CategoricalTransform.Arguments() { Data = dataView, From 596079f259faff4a25a339d3ffd0eb631700563a Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 16 May 2018 23:29:03 -0700 Subject: [PATCH 14/17] PR feedback. --- test/Microsoft.ML.Tests/LearningPipelineTests.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/Microsoft.ML.Tests/LearningPipelineTests.cs b/test/Microsoft.ML.Tests/LearningPipelineTests.cs index eb4c33add8..d115e332cc 100644 --- a/test/Microsoft.ML.Tests/LearningPipelineTests.cs +++ b/test/Microsoft.ML.Tests/LearningPipelineTests.cs @@ -5,6 +5,7 @@ using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; @@ -66,7 +67,7 @@ public void TransformOnlyPipeline() { const string _dataPath = @"..\..\Data\breast-cancer.txt"; var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(_dataPath).CreateFrom(useHeader: false)); + pipeline.Add(new ML.Data.TextLoader(_dataPath).CreateFrom(useHeader: false)); pipeline.Add(new CategoricalHashOneHotVectorizer("F1") { HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag }); var model = pipeline.Train(); var predictionModel = model.Predict(new InputData() { F1 = "5" }); @@ -95,7 +96,7 @@ public class Data public class Prediction { [ColumnName("PredictedLabel")] - public bool PredictedLabel; + public DvBool PredictedLabel; } [Fact] From 6233b761cfee106db7f3ad710fd320d142666089 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 17 May 2018 21:23:46 -0700 Subject: [PATCH 15/17] PR feedback. --- src/Microsoft.ML/Data/TextLoader.cs | 14 ++++++-------- .../Runtime/Internal/Tools/CSharpApiGenerator.cs | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index 71c8273c41..3dd54aeb88 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -20,15 +20,14 @@ public TextLoaderRange() /// /// Convenience constructor for the scalar case. - /// Min and Max are set to the single value ordinal. - /// When a given column spans only a single column in the - /// dataset. + /// Min and Max are set to the single value . + /// When a given column in the schema spans only a single column in the dataset, /// /// public TextLoaderRange(int ordinal) { - Contracts.Assert(ordinal >= 0); + Contracts.CheckParam(ordinal >= 0, nameof(ordinal)); Min = ordinal; Max = ordinal; @@ -36,15 +35,14 @@ public TextLoaderRange(int ordinal) /// /// Convenience constructor for the vector case. - /// When a given column spans multiple contiguous - /// column in the dataset. + /// When a given column in the schema spans contiguous columns in the dataset, /// /// public TextLoaderRange(int min, int max) { - Contracts.Assert(min <= max); - Contracts.Assert(min >= 0); + Contracts.CheckParam(min >= 0, nameof(min)); + Contracts.CheckParam(max >= 0, nameof(max)); Min = min; Max = max; diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index 92b2bf69a2..5fabb15840 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -177,7 +177,7 @@ public static string Capitalize(string s) return char.ToUpperInvariant(s[0]) + s.Substring(1); } - private static string GetCharValue(char value) + private static string GetCharAsString(char value) { switch (value) { @@ -295,7 +295,7 @@ public static string GetValue(ModuleCatalog catalog, Type fieldType, object fiel case TlcModule.DataKind.Enum: return GetEnumName(fieldType, typesSymbolTable, rootNameSpace) + "." + fieldValue; case TlcModule.DataKind.Char: - return $"'{GetCharValue((char)fieldValue)}'"; + return $"'{GetCharAsString((char)fieldValue)}'"; case TlcModule.DataKind.Component: var type = fieldValue.GetType(); ModuleCatalog.ComponentInfo componentInfo; From a6805c92648af47a0e3307342a42ec3bfc50c938 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 18 May 2018 17:36:59 -0700 Subject: [PATCH 16/17] PR feedback. --- src/Microsoft.ML/Data/TextLoader.cs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index 3dd54aeb88..1f094d2487 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -19,30 +19,31 @@ public TextLoaderRange() } /// - /// Convenience constructor for the scalar case. - /// Min and Max are set to the single value . - /// When a given column in the schema spans only a single column in the dataset, - /// + /// Convenience constructor for the scalar case, when a given column + /// in the schema spans only a single column in the dataset. + /// and are set to the single value . /// + /// Column index in the dataset. public TextLoaderRange(int ordinal) { - Contracts.CheckParam(ordinal >= 0, nameof(ordinal)); + Contracts.CheckParam(ordinal >= 0, nameof(ordinal), "Cannot be a negative number"); Min = ordinal; Max = ordinal; } /// - /// Convenience constructor for the vector case. - /// When a given column in the schema spans contiguous columns in the dataset, - /// + /// Convenience constructor for the vector case, when a given column + /// in the schema spans contiguous columns in the dataset. /// + /// Starting column index in the dataset. + /// Ending column index in the dataset. public TextLoaderRange(int min, int max) { - Contracts.CheckParam(min >= 0, nameof(min)); - Contracts.CheckParam(max >= 0, nameof(max)); + Contracts.CheckParam(min >= 0, nameof(min), "Cannot be negative number."); + Contracts.CheckParam(max >= min, nameof(max), $"Cannot be less than {nameof(min)}."); Min = min; Max = max; From 78e89c3fb51e2bf67fab649e98ae4855ca08acae Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Mon, 21 May 2018 19:19:06 -0700 Subject: [PATCH 17/17] PR feedback. --- src/Microsoft.ML/Data/TextLoader.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index 1f094d2487..3c8550ef09 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -42,8 +42,8 @@ public TextLoaderRange(int ordinal) public TextLoaderRange(int min, int max) { - Contracts.CheckParam(min >= 0, nameof(min), "Cannot be negative number."); - Contracts.CheckParam(max >= min, nameof(max), $"Cannot be less than {nameof(min)}."); + Contracts.CheckParam(min >= 0, nameof(min), "Cannot be a negative number."); + Contracts.CheckParam(max >= min, nameof(max), "Cannot be less than " + nameof(min) +"."); Min = min; Max = max;