diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index c5421ac305..55f3c89845 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -58,7 +58,8 @@ public static void Example() // Composing a different pipeline if we wanted to normalize more than one column at a time. // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizationMode.LogMeanVariance, new ColumnOptions[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") }); + var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance) + .Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance)); // The transformed data. var multiColtransformer = multiColPipeline.Fit(trainData); var multiColtransformedData = multiColtransformer.Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs index 30310b755b..3fe3f169ed 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs @@ -69,10 +69,11 @@ public static void Example() }; var model = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text") - .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("VariableLenghtFeatures", "TokenizedWords") })) + .Append(mlContext.Transforms.Conversion.MapValue("VariableLenghtFeatures", lookupMap, + lookupMap.Schema["Words"], lookupMap.Schema["Ids"], "TokenizedWords")) .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize")) .Append(tensorFlowModel.ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" })) - .Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax"))) + .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax")) .Fit(dataView); var engine = mlContext.Model.CreatePredictionEngine(model); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs index 1a1dcb7a54..d5cba7120b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs @@ -36,8 +36,8 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageObject", "ImagePath")) - .Append(mlContext.Transforms.ConvertToGrayscale(("Grayscale", "ImageObject"))); + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageObject", "ImagePath") + .Append(mlContext.Transforms.ConvertToGrayscale("Grayscale", "ImageObject")); var transformedData = pipeline.Fit(data).Transform(data); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs index 7f3e5d3c62..af69a3578c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs @@ -40,7 +40,7 @@ public static void Example() // Installing the Microsoft.ML.DNNImageFeaturizer packages copies the models in the // `DnnImageModels` folder. // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageObject", "ImagePath")) + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageObject", "ImagePath") .Append(mlContext.Transforms.ResizeImages("ImageObject", imageWidth: 224, imageHeight: 224)) .Append(mlContext.Transforms.ExtractPixels("Pixels", "ImageObject")) .Append(mlContext.Transforms.DnnFeaturizeImage("FeaturizedImage", m => m.ModelSelector.ResNet18(mlContext, m.OutputColumn, m.InputColumn), "Pixels")); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs index da6c583e13..188e36ca15 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs @@ -37,7 +37,7 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageObject", "ImagePath")) + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageObject", "ImagePath") .Append(mlContext.Transforms.ResizeImages("ImageObject", imageWidth: 100, imageHeight: 100 )) .Append(mlContext.Transforms.ExtractPixels("Pixels", "ImageObject")); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs index 80404e3ae7..f6fb4cae29 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs @@ -36,7 +36,7 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageReal", "ImagePath")); + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageReal", "ImagePath"); var transformedData = pipeline.Fit(data).Transform(data); // The transformedData IDataView contains the loaded images now diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs index b792aa9a8e..ca0d642e14 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs @@ -36,7 +36,7 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages(imagesFolder, ("ImageReal", "ImagePath")) + var pipeline = mlContext.Transforms.LoadImages(imagesFolder, "ImageReal", "ImagePath") .Append(mlContext.Transforms.ResizeImages("ImageReal", imageWidth: 100, imageHeight: 100)); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs similarity index 90% rename from docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs index eaec189bc4..bf314064e1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.ML.Samples.Dynamic { - public sealed class VectorWhitenWithColumnOptions + public sealed class VectorWhitenWithOptions { /// This example requires installation of additional nuget package Microsoft.ML.Mkl.Components. public static void Example() @@ -39,8 +39,7 @@ public static void Example() // A pipeline to project Features column into white noise vector. - var whiteningPipeline = ml.Transforms.VectorWhiten(new Transforms.VectorWhiteningEstimator.ColumnOptions( - nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.WhiteningKind.PrincipalComponentAnalysis, rank: 4)); + var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.WhiteningKind.PrincipalComponentAnalysis, rank: 4); // The transformed (projected) data. var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs index 1bcc4ef5f5..de241dceda 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using System.Linq; using Microsoft.ML.Data; -using static Microsoft.ML.Transforms.MissingValueReplacingEstimator.ColumnOptions; +using Microsoft.ML.Transforms; namespace Microsoft.ML.Samples.Dynamic { @@ -25,7 +25,7 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(samples); // ReplaceMissingValues is used to create a column where missing values are replaced according to the ReplacementMode. - var meanPipeline = mlContext.Transforms.ReplaceMissingValues("MissingReplaced", "Features", ReplacementMode.Mean); + var meanPipeline = mlContext.Transforms.ReplaceMissingValues("MissingReplaced", "Features", MissingValueReplacingEstimator.ReplacementMode.Mean); // Now we can transform the data and look at the output to confirm the behavior of the estimator. // This operation doesn't actually evaluate data until we read the data below. @@ -36,7 +36,7 @@ public static void Example() var meanRowEnumerable = mlContext.Data.CreateEnumerable(meanTransformedData, reuseRowObject: false); // ReplaceMissingValues is used to create a column where missing values are replaced according to the ReplacementMode. - var defaultPipeline = mlContext.Transforms.ReplaceMissingValues("MissingReplaced", "Features", ReplacementMode.DefaultValue); + var defaultPipeline = mlContext.Transforms.ReplaceMissingValues("MissingReplaced", "Features", MissingValueReplacingEstimator.ReplacementMode.DefaultValue); // Now we can transform the data and look at the output to confirm the behavior of the estimator. // This operation doesn't actually evaluate data until we read the data below. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs index d84d69db6f..2df356760a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -37,24 +37,14 @@ public static void Example() // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of keys based on the Education values from the dataset. - var educationKeys = new List() - { - "0-5yrs", - "6-11yrs", - "12+yrs" - }; - - // Creating a list of associated values that will map respectively to each educationKey - var educationValues = new List() - { - "Undergraduate", - "Postgraduate", - "Postgraduate" - }; - + // Creating a list of key-value pairs based on the Education values from the dataset. + var educationMap = new Dictionary (); + educationMap["0-5yrs"] = "Undergraduate"; + educationMap["6-11yrs"] = "Postgraduate"; + educationMap["12+yrs"] = "Postgraduate"; + // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, ("EducationCategory", "Education")); + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationCategory", educationMap, "Education"); // Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs index c32d7efdd5..5cf34572ba 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs @@ -28,24 +28,14 @@ public static void Example() IDataView trainData = mlContext.Data.LoadFromEnumerable(data); // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of keys based on the induced value from the dataset - var temperatureKeys = new List() - { - 36.0f, - 35.0f, - 34.0f - }; - - // Creating a list of values, these strings will map accordingly to each key. - var classificationValues = new List() - { - "T1", - "T2", - "T3" - }; + // Creating a list of key-value pairs based on the induced value from the dataset + var temperatureMap = new Dictionary(); + temperatureMap[36.0f] = "T1"; + temperatureMap[35.0f] = "T2"; + temperatureMap[34.0f] = "T3"; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(temperatureKeys, classificationValues, ("TemperatureCategory", "Temperature")); + var pipeline = mlContext.Transforms.Conversion.MapValue("TemperatureCategory", temperatureMap, "Temperature"); // Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs index cfafe4c336..f008d559d8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs @@ -31,24 +31,14 @@ public static void Example() IDataView trainData = mlContext.Data.LoadFromEnumerable(data); // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of keys based on the Education values from the dataset - var educationKeys = new List() - { - "0-5yrs", - "6-11yrs", - "12+yrs" - }; - - // Sample list of associated array values - var educationValues = new List() - { - new int[] { 1,2,3 }, - new int[] { 5,6,7 }, - new int[] { 42,32,64 } - }; + // Creating a list of key-value pairs based on the Education values from the dataset + var educationMap = new Dictionary(); + educationMap["0-5yrs"] = new int[] { 1, 2, 3 }; + educationMap["6-11yrs"] = new int[] { 5, 6, 7 }; + educationMap["12+yrs"] = new int[] { 42, 32, 64 }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, ("EducationFeature", "Education")); + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationFeature", educationMap, "Education"); // Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs index 11cffba54c..8c01d35e78 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs @@ -34,28 +34,18 @@ public static void Example() IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); IDataView trainData = mlContext.Data.LoadFromEnumerable(data); - // Creating a list of keys based on the Education values from the dataset + // Creating a list of key-value pairs based on the Education values from the dataset // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. - var educationKeys = new List() - { - "0-5yrs", - "6-11yrs", - "12+yrs" - }; - - // Creating a list of values that are sample strings. These will be converted to KeyTypes - var educationValues = new List() - { - "Undergraduate", - "Postgraduate", - "Postgraduate" - }; + var educationMap = new Dictionary(); + educationMap["0-5yrs"] = "Undergraduate"; + educationMap["6-11yrs"] = "Postgraduate"; + educationMap["12+yrs"] = "Postgraduate"; // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back // to the original value. - var pipeline = mlContext.Transforms.Conversion.MapValue(educationKeys, educationValues, true, ("EducationKeyType", "Education")) - .Append(mlContext.Transforms.Conversion.MapKeyToValue(("EducationCategory", "EducationKeyType"))); + var pipeline = mlContext.Transforms.Conversion.MapValue("EducationKeyType", educationMap, "Education", true) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("EducationCategory", "EducationKeyType")); // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 2aa61a1b14..80f24389fb 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Collections.Generic; +using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.Transforms; @@ -36,7 +37,8 @@ public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms /// /// The conversion transform's catalog. /// Description of dataset columns and how to process them. - public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingEstimator.ColumnOptions[] columns) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -54,14 +56,15 @@ public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms /// public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, DataKind outputKind = ConvertDefaults.DefaultOutputKind) - => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputKind); + => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName) }); /// /// Changes column type of the input column. /// /// The conversion transform's catalog. /// Description of dataset columns and how to process them. - public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnOptions[] columns) => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -91,7 +94,8 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co /// [!code-csharp[KeyToValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)] /// ]]> /// - public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) + [BestFriend] + internal static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// @@ -99,7 +103,8 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co /// /// The conversion transform's catalog. /// The input column to map back to vectors. - public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, + [BestFriend] + internal static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, params KeyToVectorMappingEstimator.ColumnOptions[] columns) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); @@ -124,6 +129,10 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog. /// Maximum number of keys to keep per column when auto-training. /// How items should be ordered when vectorized. If choosen they will be in the order encountered. /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). + /// Whether key value annotations should be text, regardless of the actual input type. + /// The data view containing the terms. If specified, this should be a single column data + /// view, and the key-values will be taken from that column. If unspecified, the key-values will be determined + /// from the input data upon fitting. /// /// /// new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality); + ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, + bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText, + IDataView keyData = null) + => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), + new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) }, keyData); /// /// Converts value types into , optionally loading the keys to use from . @@ -153,7 +165,8 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// ]]> /// /// - public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, + [BestFriend] + internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData); @@ -163,10 +176,10 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// The key type. /// The value type. /// The conversion transform's catalog - /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and - /// cannot contain duplicate keys. - /// The list of values to pair with the keys for the mapping. The length of this list must be equal to the same length as . - /// The columns to apply this transform on. + /// Name of the column resulting from the transformation of . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// Whether to treat the values as a . /// An instance of the /// /// @@ -179,10 +192,45 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// public static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - IEnumerable keys, - IEnumerable values, + string outputColumnName, + IEnumerable> keyValuePairs, + string inputColumnName = null, + bool treatValuesAsKeyType = false) + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, + new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + } + + /// + /// + /// + /// The key type. + /// The value type. + /// The conversion transform's catalog + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . + /// The columns to apply this transform on. + /// An instance of the + /// + /// + /// + /// + [BestFriend] + internal static ValueMappingEstimator MapValue( + this TransformsCatalog.ConversionTransforms catalog, + IEnumerable> keyValuePairs, params ColumnOptions[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns)); + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns)); + } /// /// @@ -190,9 +238,7 @@ public static ValueMappingEstimator MapValueThe key type. /// The value type. /// The conversion transform's catalog - /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and - /// cannot contain duplicate keys. - /// The list of values to pair with the keys for the mapping. The length of this list must be equal to the same length as . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// Whether to treat the values as a . /// The columns to apply this transform on. /// An instance of the @@ -202,14 +248,18 @@ public static ValueMappingEstimator MapValue /// - public static ValueMappingEstimator MapValue( + [BestFriend] + internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - IEnumerable keys, - IEnumerable values, + IEnumerable> keyValuePairs, bool treatValuesAsKeyType, params ColumnOptions[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, - ColumnOptions.ConvertToValueTuples(columns)); + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, + ColumnOptions.ConvertToValueTuples(columns)); + } /// /// @@ -217,10 +267,9 @@ public static ValueMappingEstimator MapValueThe key type. /// The value type. /// The conversion transform's catalog - /// The list of keys to use for the mapping. The mapping is 1-1 with . The length of this list must be the same length as and - /// cannot contain duplicate keys. - /// The list of values to pair with the keys for the mapping of TOutputType[]. The length of this list must be equal to the same length as . - /// The columns to apply this transform on. + /// Name of the column resulting from the transformation of . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . + /// Name of the column to transform. If set to , the value of the will be used as source. /// An instance of the /// /// @@ -233,20 +282,55 @@ public static ValueMappingEstimator MapValue public static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - IEnumerable keys, - IEnumerable values, - params ColumnOptions[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, - ColumnOptions.ConvertToValueTuples(columns)); + string outputColumnName, + IEnumerable> keyValuePairs, + string inputColumnName = null) + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, + new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + } /// /// /// + /// The key type. + /// The value type. /// The conversion transform's catalog - /// An instance of that contains the key and value columns. - /// Name of the key column in . - /// Name of the value column in . + /// Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// The columns to apply this transform on. + /// An instance of the + /// + /// + /// + /// + [BestFriend] + internal static ValueMappingEstimator MapValue( + this TransformsCatalog.ConversionTransforms catalog, + IEnumerable> keyValuePairs, + params ColumnOptions[] columns) + { + var keys = keyValuePairs.Select(pair => pair.Key); + var values = keyValuePairs.Select(pair => pair.Value); + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, + ColumnOptions.ConvertToValueTuples(columns)); + } + + /// + /// + /// + /// The conversion transform's catalog + /// Name of the column resulting from the transformation of . + /// An instance of that contains the and columns. + /// The key column in . + /// The value column in . + /// Name of the column to transform. If set to , the value of the will be used as source. /// A instance of the ValueMappingEstimator /// /// @@ -259,8 +343,35 @@ public static ValueMappingEstimator MapValue public static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, - IDataView lookupMap, string keyColumnName, string valueColumnName, params ColumnOptions[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumnName, valueColumnName, + string outputColumnName, IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, string inputColumnName = null) + { + return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn.Name, valueColumn.Name, + new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + } + + /// + /// + /// + /// The conversion transform's catalog + /// An instance of that contains the and columns. + /// The key column in . + /// The value column in . + /// The columns to apply this transform on. + /// A instance of the ValueMappingEstimator + /// + /// + /// + /// + [BestFriend] + internal static ValueMappingEstimator MapValue( + this TransformsCatalog.ConversionTransforms catalog, + IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params ColumnOptions[] columns) + => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn.Name, valueColumn.Name, ColumnOptions.ConvertToValueTuples(columns)); } } diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs index 1972eb9bb2..c4b97d0dea 100644 --- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs @@ -11,7 +11,8 @@ namespace Microsoft.ML /// /// Specifies input and output column names for a transformation. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { private readonly string _outputColumnName; private readonly string _inputColumnName; @@ -19,12 +20,12 @@ public sealed class ColumnOptions /// /// Specifies input and output column names for a transformation. /// - /// Name of output column resulting from the transformation of . - /// Name of input column. - public ColumnOptions(string outputColumnName, string inputColumnName) + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + public ColumnOptions(string outputColumnName, string inputColumnName = null) { _outputColumnName = outputColumnName; - _inputColumnName = inputColumnName; + _inputColumnName = inputColumnName ?? outputColumnName; } /// @@ -76,7 +77,8 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, /// ]]> /// /// - public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params ColumnOptions[] columns) + [BestFriend] + internal static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params ColumnOptions[] columns) => new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); /// diff --git a/src/Microsoft.ML.Data/Transforms/Hashing.cs b/src/Microsoft.ML.Data/Transforms/Hashing.cs index 0a53c8d6fd..93a6207609 100644 --- a/src/Microsoft.ML.Data/Transforms/Hashing.cs +++ b/src/Microsoft.ML.Data/Transforms/Hashing.cs @@ -1123,7 +1123,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs index dfa8607160..18ff259b4b 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVector.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVector.cs @@ -735,7 +735,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index 27d6700341..100a65609d 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -63,7 +63,8 @@ public enum NormalizationMode SupervisedBinning = 4 } - public abstract class ColumnOptionsBase + [BestFriend] + internal abstract class ColumnOptionsBase { public readonly string Name; public readonly string InputColumnName; @@ -102,7 +103,7 @@ internal static ColumnOptionsBase Create(string outputColumnName, string inputCo } } - public abstract class ControlZeroColumnOptionsBase : ColumnOptionsBase + internal abstract class ControlZeroColumnOptionsBase : ColumnOptionsBase { public readonly bool EnsureZeroUntouched; @@ -113,7 +114,8 @@ private protected ControlZeroColumnOptionsBase(string outputColumnName, string i } } - public sealed class MinMaxColumnOptions : ControlZeroColumnOptionsBase + [BestFriend] + internal sealed class MinMaxColumnOptions : ControlZeroColumnOptionsBase { public MinMaxColumnOptions(string outputColumnName, string inputColumnName = null, long maximumExampleCount = Defaults.MaximumExampleCount, bool ensureZeroUntouched = Defaults.EnsureZeroUntouched) : base(outputColumnName, inputColumnName ?? outputColumnName, maximumExampleCount, ensureZeroUntouched) @@ -124,7 +126,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.MinMaxUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class MeanVarianceColumnOptions : ControlZeroColumnOptionsBase + [BestFriend] + internal sealed class MeanVarianceColumnOptions : ControlZeroColumnOptionsBase { public readonly bool UseCdf; @@ -139,7 +142,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.MeanVarUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class LogMeanVarianceColumnOptions : ColumnOptionsBase + [BestFriend] + internal sealed class LogMeanVarianceColumnOptions : ColumnOptionsBase { public readonly bool UseCdf; @@ -154,7 +158,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.LogMeanVarUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class BinningColumnOptions : ControlZeroColumnOptionsBase + [BestFriend] + internal sealed class BinningColumnOptions : ControlZeroColumnOptionsBase { public readonly int MaximumBinCount; @@ -169,7 +174,8 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.BinUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class SupervisedBinningColumOptions : ControlZeroColumnOptionsBase + [BestFriend] + internal sealed class SupervisedBinningColumOptions : ControlZeroColumnOptionsBase { public readonly int MaximumBinCount; public readonly string LabelColumnName; @@ -308,7 +314,8 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(NormalizingTransformer).Assembly.FullName); } - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { public readonly string Name; public readonly string InputColumnName; diff --git a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs index 75df8a6475..95e08aa23d 100644 --- a/src/Microsoft.ML.Data/Transforms/TypeConverting.cs +++ b/src/Microsoft.ML.Data/Transforms/TypeConverting.cs @@ -172,7 +172,7 @@ private static VersionInfo GetVersionInfo() /// /// A collection of describing the settings of the transformation. /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); + internal IReadOnlyCollection Columns => _columns.AsReadOnly(); private readonly TypeConvertingEstimator.ColumnOptions[] _columns; @@ -526,7 +526,8 @@ internal sealed class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs index cf2946e1e5..4775f2fe1f 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs @@ -17,6 +17,7 @@ internal static class Defaults { public const int MaximumNumberOfKeys = 1000000; public const KeyOrdinality Ordinality = KeyOrdinality.ByOccurrence; + public const bool AddKeyValueAnnotationsAsText = false; } /// @@ -40,7 +41,8 @@ public enum KeyOrdinality : byte /// /// Describes how the transformer handles one column pair. /// - public abstract class ColumnOptionsBase + [BestFriend] + internal abstract class ColumnOptionsBase { public readonly string OutputColumnName; public readonly string InputColumnName; @@ -70,7 +72,8 @@ private protected ColumnOptionsBase(string outputColumnName, string inputColumnN /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions : ColumnOptionsBase + [BestFriend] + internal sealed class ColumnOptions : ColumnOptionsBase { /// /// Describes how the transformer handles one column pair. diff --git a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs index 6868e82c63..dfcac547fb 100644 --- a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs @@ -9,6 +9,19 @@ namespace Microsoft.ML { public static class ImageEstimatorsCatalog { + /// + /// The transform's catalog. + /// Name of the column resulting from the transformation of . + /// Name of the column to transform. If set to , the value of the will be used as source. + /// + /// + /// + /// + public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null) + => new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + /// /// The transform's catalog. /// Specifies the names of the input columns for the transformation, and their respective output column names. @@ -18,9 +31,34 @@ public static class ImageEstimatorsCatalog /// [!code-csharp[ConvertToGrayscale](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayscale.cs)] /// ]]> /// - public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columns) + [BestFriend] + internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columns) => new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); + /// + /// Loads the images from the into memory. + /// + /// + /// The image get loaded in memory as a type. + /// Loading is the first step of almost every pipeline that does image processing, and further analysis on images. + /// The images to load need to be in the formats supported by . + /// For end-to-end image processing pipelines, and scenarios in your applications, see the + /// examples in the machinelearning-samples github repository. + /// + /// + /// The transform's catalog. + /// Name of the column resulting from the transformation of . + /// The images folder. + /// Name of the column to transform. If set to , the value of the will be used as source. + /// + /// + /// + /// + public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, string inputColumnName = null) + => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + /// /// Loads the images from the into memory. /// @@ -41,7 +79,8 @@ public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalo /// [!code-csharp[LoadImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs)] /// ]]> /// - public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params ColumnOptions[] columns) + [BestFriend] + internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params ColumnOptions[] columns) => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, ColumnOptions.ConvertToValueTuples(columns)); /// @@ -75,7 +114,8 @@ public static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog /// /// The transform's catalog. /// The describing how the transform handles each image pixel extraction output input column pair. - public static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog catalog, params ImagePixelExtractingEstimator.ColumnOptions[] columnOptions) + [BestFriend] + internal static ImagePixelExtractingEstimator ExtractPixels(this TransformsCatalog catalog, params ImagePixelExtractingEstimator.ColumnOptions[] columnOptions) => new ImagePixelExtractingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); /// @@ -133,7 +173,8 @@ public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog /// [!code-csharp[ResizeImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs)] /// ]]> /// - public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog, params ImageResizingEstimator.ColumnOptions[] columnOptions) + [BestFriend] + internal static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog, params ImageResizingEstimator.ColumnOptions[] columnOptions) => new ImageResizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); /// @@ -141,7 +182,8 @@ public static ImageResizingEstimator ResizeImages(this TransformsCatalog catalog /// /// The transform's catalog. /// The describing how the transform handles each vector to image conversion column pair. - public static VectorToImageConvertingEstimator ConvertToImage(this TransformsCatalog catalog, params VectorToImageConvertingEstimator.ColumnOptions[] columnOptions) + [BestFriend] + internal static VectorToImageConvertingEstimator ConvertToImage(this TransformsCatalog catalog, params VectorToImageConvertingEstimator.ColumnOptions[] columnOptions) => new VectorToImageConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); /// diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs index 2fa255bc1e..533918b336 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractor.cs @@ -562,7 +562,8 @@ internal static void GetOrder(ColorsOrder order, ColorBits colors, out int a, ou /// /// Describes how the transformer handles one image pixel extraction column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs index f73f541b50..01d897093a 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs @@ -460,7 +460,8 @@ public enum Anchor : byte /// /// Describes how the transformer handles one image resize column. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of public readonly string Name; diff --git a/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs b/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs index 8ee8d6bc17..5e33c0176c 100644 --- a/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/VectorToImageTransform.cs @@ -449,7 +449,8 @@ internal static class Defaults /// /// Describes how the transformer handles one vector to image conversion column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs b/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs index 7ba5c81031..10e144907b 100644 --- a/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs +++ b/src/Microsoft.ML.Mkl.Components/MklComponentsCatalog.cs @@ -142,6 +142,7 @@ public static SymbolicSgdTrainer SymbolicSgd( /// /// /// /// @@ -161,12 +162,12 @@ public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catal /// /// /// /// /// - public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catalog, params VectorWhiteningEstimator.ColumnOptions[] columns) + [BestFriend] + internal static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog catalog, params VectorWhiteningEstimator.ColumnOptions[] columns) => new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns); - } } diff --git a/src/Microsoft.ML.Mkl.Components/VectorWhitening.cs b/src/Microsoft.ML.Mkl.Components/VectorWhitening.cs index 862031bdbc..abafdfba4e 100644 --- a/src/Microsoft.ML.Mkl.Components/VectorWhitening.cs +++ b/src/Microsoft.ML.Mkl.Components/VectorWhitening.cs @@ -682,7 +682,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.PCA/PCACatalog.cs b/src/Microsoft.ML.PCA/PCACatalog.cs index 2cf6f5e1bb..3c1aafe6f7 100644 --- a/src/Microsoft.ML.PCA/PCACatalog.cs +++ b/src/Microsoft.ML.PCA/PCACatalog.cs @@ -35,7 +35,8 @@ public static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this Trans /// Initializes a new instance of . /// The transform's catalog. /// Input columns to apply PrincipalComponentAnalysis on. - public static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this TransformsCatalog catalog, params PrincipalComponentAnalyzer.ColumnOptions[] columns) + [BestFriend] + internal static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this TransformsCatalog catalog, params PrincipalComponentAnalyzer.ColumnOptions[] columns) => new PrincipalComponentAnalyzer(CatalogUtils.GetEnvironment(catalog), columns); /// diff --git a/src/Microsoft.ML.PCA/PcaTransformer.cs b/src/Microsoft.ML.PCA/PcaTransformer.cs index 57ebd98892..1860ebaa17 100644 --- a/src/Microsoft.ML.PCA/PcaTransformer.cs +++ b/src/Microsoft.ML.PCA/PcaTransformer.cs @@ -630,7 +630,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs index 607a3439fa..b2f9596cd0 100644 --- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs @@ -731,9 +731,9 @@ public static class NAReplacerStaticExtensions private readonly struct Config { public readonly bool ImputeBySlot; - public readonly MissingValueReplacingEstimator.ColumnOptions.ReplacementMode ReplacementMode; + public readonly MissingValueReplacingEstimator.ReplacementMode ReplacementMode; - public Config(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, + public Config(MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { ImputeBySlot = imputeBySlot; @@ -814,7 +814,7 @@ public override IEstimator Reconcile(IHostEnvironment env, /// /// Incoming data. /// How NaN should be replaced - public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode) { Contracts.CheckValue(input, nameof(input)); return new OutScalar(input, new Config(replacementMode, false)); @@ -825,7 +825,7 @@ public static Scalar ReplaceNaNValues(this Scalar input, MissingVa /// /// Incoming data. /// How NaN should be replaced - public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static Scalar ReplaceNaNValues(this Scalar input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode) { Contracts.CheckValue(input, nameof(input)); return new OutScalar(input, new Config(replacementMode, false)); @@ -838,7 +838,7 @@ public static Scalar ReplaceNaNValues(this Scalar input, Missing /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) + public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { Contracts.CheckValue(input, nameof(input)); return new OutVectorColumn(input, new Config(replacementMode, imputeBySlot)); @@ -852,7 +852,7 @@ public static Vector ReplaceNaNValues(this Vector input, MissingVa /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) + public static Vector ReplaceNaNValues(this Vector input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode, bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) { Contracts.CheckValue(input, nameof(input)); return new OutVectorColumn(input, new Config(replacementMode, imputeBySlot)); @@ -863,7 +863,7 @@ public static Vector ReplaceNaNValues(this Vector input, Missing /// /// Incoming data. /// How NaN should be replaced - public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode) { Contracts.CheckValue(input, nameof(input)); return new OutVarVectorColumn(input, new Config(replacementMode, false)); @@ -873,7 +873,7 @@ public static VarVector ReplaceNaNValues(this VarVector input, Mis /// /// Incoming data. /// How NaN should be replaced - public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ColumnOptions.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.ReplacementMode) + public static VarVector ReplaceNaNValues(this VarVector input, MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode) { Contracts.CheckValue(input, nameof(input)); return new OutVarVectorColumn(input, new Config(replacementMode, false)); diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs index 85175e5268..22ebfe7890 100644 --- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs +++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs @@ -13,12 +13,17 @@ namespace Microsoft.ML public static class CategoricalCatalog { /// - /// Convert a text column into one-hot encoded vector. + /// Convert text columns into one-hot encoded vectors. /// /// The transform catalog /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. - /// The conversion mode. + /// Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector. + /// Maximum number of terms to keep per column when auto-training. + /// How items should be ordered when vectorized. If choosen they will be in the order encountered. + /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a'). + /// Specifies an ordering for the encoding. If specified, this should be a single column data view, + /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting. /// /// /// new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputKind); + OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind, + int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, + ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, + IDataView keyData = null) + => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), + new[] { new OneHotEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality) }, keyData); /// /// Convert several text column into one-hot encoded vectors. /// /// The transform catalog /// The column settings. - public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, + [BestFriend] + internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, params OneHotEncodingEstimator.ColumnOptions[] columns) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns); @@ -47,7 +57,8 @@ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Cate /// The column settings. /// Specifies an ordering for the encoding. If specified, this should be a single column data view, /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting. - public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, + [BestFriend] + internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, OneHotEncodingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData); @@ -58,26 +69,32 @@ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Cate /// The transform catalog /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. + /// The conversion mode. /// Number of bits to hash into. Must be between 1 and 30, inclusive. + /// Hashing seed. + /// Whether the position of each term should be included in the hash. /// During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. - /// The conversion mode. public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null, + OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator, int numberOfBits = OneHotHashEncodingEstimator.Defaults.NumberOfBits, - int maximumNumberOfInverts = OneHotHashEncodingEstimator.Defaults.MaximumNumberOfInverts, - OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator) - => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, numberOfBits, maximumNumberOfInverts, outputKind); + uint seed = OneHotHashEncodingEstimator.Defaults.Seed, + bool useOrderedHashing = OneHotHashEncodingEstimator.Defaults.UseOrderedHashing, + int maximumNumberOfInverts = OneHotHashEncodingEstimator.Defaults.MaximumNumberOfInverts) + => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), + new[] { new OneHotHashEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, numberOfBits, seed, useOrderedHashing, maximumNumberOfInverts) }); /// /// Convert several text column into hash-based one-hot encoded vectors. /// /// The transform catalog /// The column settings. - public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, + [BestFriend] + internal static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, params OneHotHashEncodingEstimator.ColumnOptions[] columns) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } diff --git a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs index 6b4de2fbba..406cef8d2d 100644 --- a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs @@ -18,7 +18,8 @@ public static class ConversionsCatalog /// /// The categorical transform's catalog. /// Specifies the output and input columns on which the transformation should be applied. - public static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, + [BestFriend] + internal static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs index 197f9b9568..4623b04406 100644 --- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs @@ -47,7 +47,8 @@ internal sealed class Options : TransformInputBase /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs index 2e283ad89f..55659fbcb9 100644 --- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs @@ -15,7 +15,8 @@ public static class ExtensionsCatalog /// /// The transform extensions' catalog. /// The names of the input columns of the transformation and the corresponding names for the output columns. - public static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog, + [BestFriend] + internal static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog, params ColumnOptions[] columns) => new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns)); @@ -45,13 +46,16 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor /// (depending on whether the is given a value, or left to null) /// identical to the input column for everything but the missing values. The missing values of the input column, in this new column are replaced with /// one of the values specifid in the . The default for the is - /// . + /// . /// /// The transform extensions' catalog. /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. /// If not provided, the will be replaced with the results of the transforms. - /// The type of replacement to use as specified in + /// The type of replacement to use as specified in + /// If true, per-slot imputation of replacement is performed. + /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, + /// where imputation is always for the entire column. /// /// /// new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, replacementMode); + MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode, + bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot) + => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new MissingValueReplacingEstimator.ColumnOptions(outputColumnName, inputColumnName, replacementMode, imputeBySlot) }); /// /// Creates a new output column, identical to the input column for everything but the missing values. - /// The missing values of the input column, in this new column are replaced with . + /// The missing values of the input column, in this new column are replaced with . /// /// The transform extensions' catalog. /// The name of the columns to use, and per-column transformation configuraiton. - public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog, params MissingValueReplacingEstimator.ColumnOptions[] columns) => new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs index 6e15b9fa0b..3c59b738a4 100644 --- a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs +++ b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs @@ -25,7 +25,8 @@ public static class FeatureSelectionCatalog /// ]]> /// /// - public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog, + [BestFriend] + internal static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog, string labelColumnName = MutualInfoSelectDefaults.LabelColumn, int slotsInOutput = MutualInfoSelectDefaults.SlotsInOutput, int numberOfBins = MutualInfoSelectDefaults.NumBins, @@ -64,7 +65,8 @@ public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMu /// ]]> /// /// - public static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog, + [BestFriend] + internal static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog, params CountFeatureSelectingEstimator.ColumnOptions[] columns) => new CountFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), columns); diff --git a/src/Microsoft.ML.Transforms/HashJoiningTransform.cs b/src/Microsoft.ML.Transforms/HashJoiningTransform.cs index f5b63d99e4..651a758c2c 100644 --- a/src/Microsoft.ML.Transforms/HashJoiningTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoiningTransform.cs @@ -105,7 +105,8 @@ internal bool TryUnparse(StringBuilder sb) } } - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { // Either VBuffer> or a single Key. // Note that if CustomSlotMap contains only one array, the output type of the transform will a single Key. diff --git a/src/Microsoft.ML.Transforms/KernelCatalog.cs b/src/Microsoft.ML.Transforms/KernelCatalog.cs index 52c2d2d072..e399038833 100644 --- a/src/Microsoft.ML.Transforms/KernelCatalog.cs +++ b/src/Microsoft.ML.Transforms/KernelCatalog.cs @@ -21,6 +21,8 @@ public static class KernelExpansionCatalog /// The number of random Fourier features to create. /// If , use both of cos and sin basis functions to create two features for every random Fourier frequency. /// Otherwise, only cos bases would be used. + /// Which fourier generator to use. + /// The seed of the random number generator for generating the new features (if unspecified, the global random is used). /// /// /// new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, rank, useCosAndSinBases); + bool useCosAndSinBases = ApproximatedKernelMappingEstimator.Defaults.UseCosAndSinBases, + KernelBase generator = null, + int? seed = null) + => new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), + new[] { new ApproximatedKernelMappingEstimator.ColumnOptions(outputColumnName, rank, useCosAndSinBases, inputColumnName, generator, seed) }); /// /// Takes columns filled with a vector of floats and maps its to a random low-dimensional feature space. /// /// The transform's catalog. /// The input columns to use for the transformation. - public static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this TransformsCatalog catalog, params ApproximatedKernelMappingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this TransformsCatalog catalog, params ApproximatedKernelMappingEstimator.ColumnOptions[] columns) => new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs index 8dd9cc1733..820ebf0c2f 100644 --- a/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueHandlingTransformer.cs @@ -153,7 +153,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa if (!addInd) { replaceCols.Add(new MissingValueReplacingEstimator.ColumnOptions(column.Name, column.Source, - (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); + (MissingValueReplacingEstimator.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); continue; } @@ -188,7 +188,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa // Add the NAReplaceTransform column. replaceCols.Add(new MissingValueReplacingEstimator.ColumnOptions(tmpReplacementColName, column.Source, - (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); + (MissingValueReplacingEstimator.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); // Add the ConcatTransform column. if (replaceType is VectorType) diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs index ea8341430e..6c10792562 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs @@ -117,7 +117,7 @@ internal sealed class Options : TransformInputBase public Column[] Columns; [Argument(ArgumentType.AtMostOnce, HelpText = "The replacement method to utilize", ShortName = "kind")] - public ReplacementKind ReplacementKind = (ReplacementKind)MissingValueReplacingEstimator.Defaults.ReplacementMode; + public ReplacementKind ReplacementKind = (ReplacementKind)MissingValueReplacingEstimator.Defaults.Mode; // Specifying by-slot imputation for vectors of unknown size will cause a warning, and the imputation will be global. [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to impute values by slot", ShortName = "slot")] @@ -441,7 +441,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa cols[i] = new MissingValueReplacingEstimator.ColumnOptions( item.Name, item.Source, - (MissingValueReplacingEstimator.ColumnOptions.ReplacementMode)(item.Kind ?? options.ReplacementKind), + (MissingValueReplacingEstimator.ReplacementMode)(item.Kind ?? options.ReplacementKind), item.Slot ?? options.ImputeBySlot, item.ReplacementString); }; @@ -890,41 +890,42 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src public sealed class MissingValueReplacingEstimator : IEstimator { + /// + /// The possible ways to replace missing values. + /// + public enum ReplacementMode : byte + { + /// + /// Replace with the default value of the column based on its type. For example, 'zero' for numeric and 'empty' for string/text columns. + /// + DefaultValue = 0, + /// + /// Replace with the mean value of the column. Supports only numeric/time span/ DateTime columns. + /// + Mean = 1, + /// + /// Replace with the minimum value of the column. Supports only numeric/time span/ DateTime columns. + /// + Minimum = 2, + /// + /// Replace with the maximum value of the column. Supports only numeric/time span/ DateTime columns. + /// + Maximum = 3, + } + [BestFriend] internal static class Defaults { - public const ColumnOptions.ReplacementMode ReplacementMode = ColumnOptions.ReplacementMode.DefaultValue; + public const ReplacementMode Mode = ReplacementMode.DefaultValue; public const bool ImputeBySlot = true; } /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { - /// - /// The possible ways to replace missing values. - /// - public enum ReplacementMode : byte - { - /// - /// Replace with the default value of the column based on its type. For example, 'zero' for numeric and 'empty' for string/text columns. - /// - DefaultValue = 0, - /// - /// Replace with the mean value of the column. Supports only numeric/time span/ DateTime columns. - /// - Mean = 1, - /// - /// Replace with the minimum value of the column. Supports only numeric/time span/ DateTime columns. - /// - Minimum = 2, - /// - /// Replace with the maximum value of the column. Supports only numeric/time span/ DateTime columns. - /// - Maximum = 3, - } - /// Name of the column resulting from the transformation of . public readonly string Name; /// Name of column to transform. @@ -949,7 +950,7 @@ public enum ReplacementMode : byte /// If true, per-slot imputation of replacement is performed. /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors, /// where imputation is always for the entire column. - public ColumnOptions(string name, string inputColumnName = null, ReplacementMode replacementMode = Defaults.ReplacementMode, + public ColumnOptions(string name, string inputColumnName = null, ReplacementMode replacementMode = Defaults.Mode, bool imputeBySlot = Defaults.ImputeBySlot) { Contracts.CheckNonWhiteSpace(name, nameof(name)); @@ -973,7 +974,7 @@ internal ColumnOptions(string name, string inputColumnName, ReplacementMode repl private readonly IHost _host; private readonly ColumnOptions[] _columns; - internal MissingValueReplacingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, ColumnOptions.ReplacementMode replacementKind = Defaults.ReplacementMode) + internal MissingValueReplacingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, ReplacementMode replacementKind = Defaults.Mode) : this(env, new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, replacementKind)) { diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 04020355be..657e58bf32 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -40,7 +40,8 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// ]]> /// /// - public static NormalizingEstimator Normalize(this TransformsCatalog catalog, + [BestFriend] + internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, NormalizingEstimator.NormalizationMode mode, params ColumnOptions[] columns) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), mode, ColumnOptions.ConvertToValueTuples(columns)); @@ -50,7 +51,8 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// /// The transform catalog /// The normalization settings for all the columns - public static NormalizingEstimator Normalize(this TransformsCatalog catalog, + [BestFriend] + internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, params NormalizingEstimator.ColumnOptionsBase[] columns) => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); @@ -79,7 +81,8 @@ public static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog /// /// The transform's catalog. /// Describes the parameters of the lp-normalization process for each column pair. - public static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog catalog, params LpNormNormalizingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog catalog, params LpNormNormalizingEstimator.ColumnOptions[] columns) => new LpNormNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// @@ -110,7 +113,8 @@ public static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this Tr /// /// The transform's catalog. /// Describes the parameters of the gcn-normaliztion process for each column pair. - public static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this TransformsCatalog catalog, params GlobalContrastNormalizingEstimator.ColumnOptions[] columns) + [BestFriend] + internal static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this TransformsCatalog catalog, params GlobalContrastNormalizingEstimator.ColumnOptions[] columns) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/OneHotEncoding.cs b/src/Microsoft.ML.Transforms/OneHotEncoding.cs index a79294c613..a7b6373c51 100644 --- a/src/Microsoft.ML.Transforms/OneHotEncoding.cs +++ b/src/Microsoft.ML.Transforms/OneHotEncoding.cs @@ -181,7 +181,8 @@ public enum OutputKind : byte /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions : ValueToKeyMappingEstimator.ColumnOptionsBase + [BestFriend] + internal sealed class ColumnOptions : ValueToKeyMappingEstimator.ColumnOptionsBase { public readonly OutputKind OutputKind; /// diff --git a/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs b/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs index 8a543619dc..957433a8d9 100644 --- a/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs +++ b/src/Microsoft.ML.Transforms/OneHotHashEncoding.cs @@ -216,7 +216,8 @@ internal static class Defaults /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { public readonly HashingEstimator.ColumnOptions HashingOptions; public readonly OneHotEncodingEstimator.OutputKind OutputKind; diff --git a/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs index b9317da64c..3ef4273ed9 100644 --- a/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Transforms/Properties/AssemblyInfo.cs @@ -17,5 +17,6 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.TextAnalytics" + InternalPublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TimeSeries" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TestFramework" + PublicKey.TestValue)] [assembly: WantsToBeBestFriends] diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs index 0d83665562..d4f80b26e4 100644 --- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs +++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs @@ -617,7 +617,8 @@ internal static class Defaults /// /// Describes how the transformer handles one Gcn column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index 66ce9278e2..81b581b12e 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -1004,7 +1004,8 @@ internal LatentDirichletAllocationEstimator(IHostEnvironment env, params ColumnO /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs index a3c7c3194d..d8945fdf3d 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs @@ -872,7 +872,8 @@ public sealed class NgramHashingEstimator : IEstimator /// /// Describes how the transformer handles one pair of mulitple inputs - singular output columns. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index 0f22e279bd..44b3c98bb3 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -793,7 +793,8 @@ internal static bool IsSchemaColumnValid(SchemaShape.Column col) /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs index 22c0ab8b14..415b18077b 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs @@ -508,7 +508,8 @@ public Options() /// /// Describes how the transformer handles one column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// Name of the column resulting from the transformation of . public readonly string Name; diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs index ef56dbf065..4aa28da763 100644 --- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs +++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs @@ -72,7 +72,8 @@ public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this /// and append another marker character, , to the end of the output vector of characters. /// Pairs of columns to run the tokenization on. - public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog, bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters, params ColumnOptions[] columns) => new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns)); @@ -118,8 +119,8 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T /// /// The text-related transform's catalog. - /// Name of the column resulting from the transformation of . /// The path of the pre-trained embeedings model to use. + /// Name of the column resulting from the transformation of . /// Name of the column to transform. /// /// @@ -146,7 +147,8 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T /// ]]> /// /// - public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.TextTransforms catalog, WordEmbeddingEstimator.PretrainedModelKind modelKind = WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding, params WordEmbeddingEstimator.ColumnOptions[] columns) => new WordEmbeddingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), modelKind, columns); @@ -170,7 +172,8 @@ public static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.T /// /// The text-related transform's catalog. /// Pairs of columns to run the tokenization on. - public static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog.TextTransforms catalog, params WordTokenizingEstimator.ColumnOptions[] columns) => new WordTokenizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns); @@ -210,7 +213,8 @@ public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.Text /// /// The text-related transform's catalog. /// Pairs of columns to run the ngram process on. - public static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static NgramExtractingEstimator ProduceNgrams(this TransformsCatalog.TextTransforms catalog, params NgramExtractingEstimator.ColumnOptions[] columns) => new NgramExtractingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns); @@ -384,6 +388,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// 0 does not retain any input values. -1 retains all input values mapping to each hash. + /// Whether to rehash unigrams. public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog, string outputColumnName, string inputColumnName = null, @@ -393,10 +398,47 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T bool useAllLengths = NgramHashingEstimator.Defaults.UseAllLengths, uint seed = NgramHashingEstimator.Defaults.Seed, bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing, - int maximumNumberOfInverts = NgramHashingEstimator.Defaults.MaximumNumberOfInverts) + int maximumNumberOfInverts = NgramHashingEstimator.Defaults.MaximumNumberOfInverts, + bool rehashUnigrams = NgramHashingEstimator.Defaults.RehashUnigrams) + => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), + new[] {new NgramHashingEstimator.ColumnOptions(outputColumnName, new[] { inputColumnName }, ngramLength: ngramLength, skipLength: skipLength, + useAllLengths: useAllLengths, numberOfBits: numberOfBits, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts, rehashUnigrams) }); + + /// + /// Produces a bag of counts of hashed ngrams in + /// and outputs ngram vector as + /// + /// is different from in a way that + /// takes tokenized text as input while tokenizes text internally. + /// + /// The text-related transform's catalog. + /// Name of the column resulting from the transformation of . + /// Names of the columns to transform. If set to , the value of the will be used as source. + /// Number of bits to hash into. Must be between 1 and 30, inclusive. + /// Ngram length. + /// Maximum number of tokens to skip when constructing an ngram. + /// Whether to include all ngram lengths up to or only . + /// Hashing seed. + /// Whether the position of each source column should be included in the hash (when there are multiple source columns). + /// During hashing we constuct mappings between original values and the produced hash values. + /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. + /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. + /// 0 does not retain any input values. -1 retains all input values mapping to each hash. + /// Whether to rehash unigrams. + public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog, + string outputColumnName, + string[] inputColumnNames = null, + int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits, + int ngramLength = NgramHashingEstimator.Defaults.NgramLength, + int skipLength = NgramHashingEstimator.Defaults.SkipLength, + bool useAllLengths = NgramHashingEstimator.Defaults.UseAllLengths, + uint seed = NgramHashingEstimator.Defaults.Seed, + bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing, + int maximumNumberOfInverts = NgramHashingEstimator.Defaults.MaximumNumberOfInverts, + bool rehashUnigrams = NgramHashingEstimator.Defaults.RehashUnigrams) => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), - outputColumnName, inputColumnName, numberOfBits: numberOfBits, ngramLength: ngramLength, skipLength: skipLength, - useAllLengths: useAllLengths, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts); + new[] {new NgramHashingEstimator.ColumnOptions(outputColumnName, inputColumnNames, ngramLength: ngramLength, skipLength: skipLength, + useAllLengths: useAllLengths, numberOfBits: numberOfBits, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts, rehashUnigrams) }); /// /// Produces a bag of counts of hashed ngrams for each . For each column, @@ -407,7 +449,8 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T /// /// The text-related transform's catalog. /// Pairs of columns to compute n-grams. Note that gram indices are generated by hashing. - public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog, + [BestFriend] + internal static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog, NgramHashingEstimator.ColumnOptions[] columns) => new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns); @@ -419,9 +462,16 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T /// Name of the column resulting from the transformation of . /// Name of the column to transform. If set to , the value of the will be used as source. /// The number of topics. + /// Dirichlet prior on document-topic vectors. + /// Dirichlet prior on vocab-topic vectors. + /// Number of Metropolis Hasting step. /// Number of iterations. + /// Compute log likelihood over local dataset on this iteration interval. + /// The number of training threads. Default value depends on number of logical processors. /// The threshold of maximum count of tokens per doc. /// The number of words to summarize the topic. + /// The number of burn-in iterations. + /// Reset the random number generator for each document. /// /// /// new LatentDirichletAllocationEstimator(CatalogUtils.GetEnvironment(catalog), - outputColumnName, inputColumnName, numberOfTopics, - LatentDirichletAllocationEstimator.Defaults.AlphaSum, - LatentDirichletAllocationEstimator.Defaults.Beta, - LatentDirichletAllocationEstimator.Defaults.SamplingStepCount, - maximumNumberOfIterations, - LatentDirichletAllocationEstimator.Defaults.NumberOfThreads, - maximumTokenCountPerDocument, - numberOfSummaryTermsPerTopic, - LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval, - LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations, - LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator); + outputColumnName, inputColumnName, numberOfTopics, alphaSum, beta, samplingStepCount, + maximumNumberOfIterations, numberOfThreads, maximumTokenCountPerDocument, numberOfSummaryTermsPerTopic, + likelihoodInterval, numberOfBurninIterations, resetRandomGenerator); /// /// Uses LightLDA to transform a document (represented as a vector of floats) @@ -455,7 +504,8 @@ public static LatentDirichletAllocationEstimator LatentDirichletAllocation(this /// /// The transform's catalog. /// Describes the parameters of LDA for each column pair. - public static LatentDirichletAllocationEstimator LatentDirichletAllocation( + [BestFriend] + internal static LatentDirichletAllocationEstimator LatentDirichletAllocation( this TransformsCatalog.TextTransforms catalog, params LatentDirichletAllocationEstimator.ColumnOptions[] columns) => new LatentDirichletAllocationEstimator(CatalogUtils.GetEnvironment(catalog), columns); diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs index 3094936ba9..9eff2abd7d 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsExtractor.cs @@ -828,7 +828,8 @@ public enum PretrainedModelKind /// /// Information for each column pair. /// - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Name of the column resulting from the transformation of . diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs index 8c29215b94..f54db6d352 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs @@ -438,7 +438,8 @@ internal WordTokenizingEstimator(IHostEnvironment env, params ColumnOptions[] co : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(WordTokenizingEstimator)), new WordTokenizingTransformer(env, columns)) { } - public sealed class ColumnOptions + [BestFriend] + internal sealed class ColumnOptions { /// /// Output column name that will be used to store the tokenization result of column. diff --git a/test/Microsoft.ML.Functional.Tests/Debugging.cs b/test/Microsoft.ML.Functional.Tests/Debugging.cs index a495c99c99..9253e73ebe 100644 --- a/test/Microsoft.ML.Functional.Tests/Debugging.cs +++ b/test/Microsoft.ML.Functional.Tests/Debugging.cs @@ -105,7 +105,6 @@ public void InspectPipelineSchema() // Define a pipeline var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize()) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca( new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 20 })); @@ -173,7 +172,6 @@ public void ViewTrainingOutput() // Define a pipeline var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize()) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca( new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 20 })); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index b8c417b87b..87425b06c4 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -999,7 +999,8 @@ public void TensorFlowSentimentClassificationTest() // Then this integer vector is retrieved from the pipeline and resized to fixed length. // The second pipeline 'tfEnginePipe' takes the resized integer vector and passes it to TensoFlow and gets the classification scores. var estimator = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text") - .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, "Words", "Ids", new ColumnOptions[] { ("Features", "TokenizedWords") })); + .Append(mlContext.Transforms.Conversion.MapValue(lookupMap, lookupMap.Schema["Words"], lookupMap.Schema["Ids"], + new ColumnOptions[] { ("Features", "TokenizedWords") })); var model = estimator.Fit(dataView); var dataPipe = mlContext.Model.CreatePredictionEngine(model); diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs index 0bb02cacd6..5161071a1a 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs @@ -75,10 +75,10 @@ public void CategoricalOneHotHashEncoding() var mlContext = new MLContext(); var dataView = mlContext.Data.LoadFromEnumerable(data); - var pipe = mlContext.Transforms.Categorical.OneHotHashEncoding("CatA", "A", 3, 0, OneHotEncodingEstimator.OutputKind.Bag) - .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatB", "A", 2, 0, OneHotEncodingEstimator.OutputKind.Key)) - .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatC", "A", 3, 0, OneHotEncodingEstimator.OutputKind.Indicator)) - .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatD", "A", 2, 0, OneHotEncodingEstimator.OutputKind.Binary)); + var pipe = mlContext.Transforms.Categorical.OneHotHashEncoding("CatA", "A", OneHotEncodingEstimator.OutputKind.Bag, 3, 0) + .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatB", "A", OneHotEncodingEstimator.OutputKind.Key, 2, 0)) + .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatC", "A", OneHotEncodingEstimator.OutputKind.Indicator, 3, 0)) + .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("CatD", "A", OneHotEncodingEstimator.OutputKind.Binary, 2, 0)); TestEstimatorCore(pipe, dataView); Done(); diff --git a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs index ffe79ad181..fc55126dc6 100644 --- a/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NAReplaceTests.cs @@ -43,10 +43,10 @@ public void NAReplaceWorkout() var dataView = ML.Data.LoadFromEnumerable(data); var pipe = ML.Transforms.ReplaceMissingValues( - new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean)); + new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ReplacementMode.Mean)); TestEstimatorCore(pipe, dataView); Done(); } @@ -68,10 +68,10 @@ public void NAReplaceStatic() var est = data.MakeNewEstimator(). Append(row => ( - A: row.ScalarFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Maximum), - B: row.ScalarDouble.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - C: row.VectorFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - D: row.VectorDoulbe.ReplaceNaNValues(MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Minimum) + A: row.ScalarFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ReplacementMode.Maximum), + B: row.ScalarDouble.ReplaceNaNValues(MissingValueReplacingEstimator.ReplacementMode.Mean), + C: row.VectorFloat.ReplaceNaNValues(MissingValueReplacingEstimator.ReplacementMode.Mean), + D: row.VectorDoulbe.ReplaceNaNValues(MissingValueReplacingEstimator.ReplacementMode.Minimum) )); TestEstimatorCore(est.AsDynamic, data.AsDynamic, invalidInput: invalidData); @@ -104,10 +104,10 @@ public void TestOldSavingAndLoading() var dataView = ML.Data.LoadFromEnumerable(data); var pipe = ML.Transforms.ReplaceMissingValues( - new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), - new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean)); + new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ReplacementMode.Mean), + new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ReplacementMode.Mean)); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index 9638a558de..3b90d25e8b 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -507,11 +507,15 @@ public void ValueMappingWorkout() var badData = new[] { new TestWrong() { A = "bar", B = 1.2f } }; var badDataView = ML.Data.LoadFromEnumerable(badData); - var keys = new List() { "foo", "bar", "test", "wahoo" }; - var values = new List() { 1, 2, 3, 4 }; + var keyValuePairs = new List>() { + new KeyValuePair("foo", 1), + new KeyValuePair("bar", 2), + new KeyValuePair("test", 3), + new KeyValuePair("wahoo", 4) + }; // Workout on value mapping - var est = ML.Transforms.Conversion.MapValue(keys, values, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); + var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } @@ -523,14 +527,14 @@ public void ValueMappingValueTypeIsVectorWorkout() var badData = new[] { new TestWrong() { A = "bar", B = 1.2f } }; var badDataView = ML.Data.LoadFromEnumerable(badData); - var keys = new List() { "foo", "bar", "test" }; - var values = new List() { - new int[] {2, 3, 4 }, - new int[] {100, 200 }, - new int[] {400, 500, 600, 700 }}; + var keyValuePairs = new List>() { + new KeyValuePair("foo", new int[] {2, 3, 4 }), + new KeyValuePair("bar", new int[] {100, 200 }), + new KeyValuePair("test", new int[] {400, 500, 600, 700 }), + }; // Workout on value mapping - var est = ML.Transforms.Conversion.MapValue(keys, values, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); + var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") }); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } @@ -543,11 +547,15 @@ public void ValueMappingInputIsVectorWorkout() var badData = new[] { new TestWrong() { B = 1.2f } }; var badDataView = ML.Data.LoadFromEnumerable(badData); - var keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; - var values = new List() { 1, 2, 3, 4 }; + var keyValuePairs = new List,int>>() { + new KeyValuePair,int>("foo".AsMemory(), 1), + new KeyValuePair,int>("bar".AsMemory(), 2), + new KeyValuePair,int>("test".AsMemory(), 3), + new KeyValuePair,int>("wahoo".AsMemory(), 4) + }; var est = ML.Transforms.Text.TokenizeIntoWords("TokenizeB", "B") - .Append(ML.Transforms.Conversion.MapValue(keys, values, new ColumnOptions[] { ("VecB", "TokenizeB") })); + .Append(ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("VecB", "TokenizeB") })); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); }