diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs index 8349922768..610a77983a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs @@ -20,17 +20,22 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); // Construct the pipeline. - var pipeline = mlContext.Transforms.Conversion.ConvertType("SurvivedInt32", "Survived", DataKind.Int32); + var pipeline = mlContext.Transforms.Conversion.ConvertType( + "SurvivedInt32", "Survived", DataKind.Int32); // Let's train our pipeline, and then apply it to the same data. var transformer = pipeline.Fit(data); var transformedData = transformer.Transform(data); - // Display original column 'Survived' (boolean) and converted column 'SurvivedInt32' (Int32) - var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); + // Display original column 'Survived' (boolean) and converted column + // SurvivedInt32' (Int32) + var convertedData = mlContext.Data.CreateEnumerable( + transformedData, true); + foreach (var item in convertedData) { - Console.WriteLine("A:{0,-10} Aconv:{1}", item.Survived, item.SurvivedInt32); + Console.WriteLine("A:{0,-10} Aconv:{1}", item.Survived, + item.SurvivedInt32); } // Output diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs index b7790d9f11..bae0bfea02 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs @@ -4,22 +4,33 @@ namespace Samples.Dynamic { - // This example illustrates how to convert multiple columns of different types to one type, in this case System.Single. - // This is often a useful data transformation before concatenating the features together and passing them to a particular estimator. + // This example illustrates how to convert multiple columns of different types + // to one type, in this case System.Single. + // This is often a useful data transformation before concatenating the features + // together and passing them to a particular estimator. public static class ConvertTypeMultiColumn { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(seed: 1); var rawData = new[] { - new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145}, - new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14}, - new InputData() { Feature1 = false, Feature2 = "14", Feature3 = DateTime.Today, Feature4 = 0.2046}, - new InputData() { Feature1 = false, Feature2 = "23", Feature3 = DateTime.Now, Feature4 = 0.1206}, - new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09}, + new InputData() { Feature1 = true, Feature2 = "0.4", + Feature3 = DateTime.Now, Feature4 = 0.145}, + + new InputData() { Feature1 = false, Feature2 = "0.5", + Feature3 = DateTime.Today, Feature4 = 3.14}, + + new InputData() { Feature1 = false, Feature2 = "14", + Feature3 = DateTime.Today, Feature4 = 0.2046}, + + new InputData() { Feature1 = false, Feature2 = "23", + Feature3 = DateTime.Now, Feature4 = 0.1206}, + + new InputData() { Feature1 = true, Feature2 = "8904", + Feature3 = DateTime.UtcNow, Feature4 = 8.09}, }; // Convert the data to an IDataView. @@ -37,17 +48,20 @@ public static void Example() // Let's fit our pipeline to the data. var transformer = pipeline.Fit(data); - // Transforming the same data. This will add the 4 columns defined in the pipeline, containing the converted + // Transforming the same data. This will add the 4 columns defined in + // the pipeline, containing the converted // values of the initial columns. var transformedData = transformer.Transform(data); // Shape the transformed data as a strongly typed IEnumerable. - var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); + var convertedData = mlContext.Data.CreateEnumerable( + transformedData, true); // Printing the results. Console.WriteLine("Converted1\t Converted2\t Converted3\t Converted4"); foreach (var item in convertedData) - Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}"); + Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t " + + $"{item.Converted3}\t {item.Converted4}"); // Transformed data. // diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs index daee047cff..7617840f6d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs @@ -9,8 +9,8 @@ public static class Hash { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(seed: 1); // Get a small dataset as an IEnumerable. @@ -24,30 +24,40 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); - // Construct the pipeline that would hash the two columns and store the results in new columns. - // The first transform hashes the string column and the second transform hashes the integer column. + // Construct the pipeline that would hash the two columns and store the + // results in new columns. The first transform hashes the string column + // and the second transform hashes the integer column. // - // Hashing is not a reversible operation, so there is no way to retrive the original value from the hashed value. - // Sometimes, for debugging, or model explainability, users will need to know what values in the original columns generated - // the values in the hashed columns, since the algorithms will mostly use the hashed values for further computations. - // The Hash method will preserve the mapping from the original values to the hashed values in the Annotations of the - // newly created column (column populated with the hashed values). - // - // Setting the maximumNumberOfInverts parameters to -1 will preserve the full map. - // If that parameter is left to the default 0 value, the mapping is not preserved. - var pipeline = mlContext.Transforms.Conversion.Hash("CategoryHashed", "Category", numberOfBits: 16, maximumNumberOfInverts: -1) - .Append(mlContext.Transforms.Conversion.Hash("AgeHashed", "Age", numberOfBits: 8)); + // Hashing is not a reversible operation, so there is no way to retrive + // the original value from the hashed value. Sometimes, for debugging, + // or model explainability, users will need to know what values in the + // original columns generated the values in the hashed columns, since + // the algorithms will mostly use the hashed values for further + // computations. The Hash method will preserve the mapping from the + // original values to the hashed values in the Annotations of the newly + // created column (column populated with the hashed values). + // + // Setting the maximumNumberOfInverts parameters to -1 will preserve the + // full map. If that parameter is left to the default 0 value, the + // mapping is not preserved. + var pipeline = mlContext.Transforms.Conversion.Hash("CategoryHashed", + "Category", numberOfBits: 16, maximumNumberOfInverts: -1) + .Append(mlContext.Transforms.Conversion.Hash("AgeHashed", "Age", + numberOfBits: 8)); // Let's fit our pipeline, and then apply it to the same data. var transformer = pipeline.Fit(data); var transformedData = transformer.Transform(data); - // Convert the post transformation from the IDataView format to an IEnumerable for easy consumption. - var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); + // Convert the post transformation from the IDataView format to an + // IEnumerable for easy consumption. + var convertedData = mlContext.Data.CreateEnumerable< + TransformedDataPoint>(transformedData, true); Console.WriteLine("Category CategoryHashed\t Age\t AgeHashed"); foreach (var item in convertedData) - Console.WriteLine($"{item.Category}\t {item.CategoryHashed}\t\t {item.Age}\t {item.AgeHashed}"); + Console.WriteLine($"{item.Category}\t {item.CategoryHashed}\t\t " + + $"{item.Age}\t {item.AgeHashed}"); // Expected data after the transformation. // @@ -58,20 +68,24 @@ public static void Example() // MLB 36206 18 127 // MLS 6013 14 62 - // For the Category column, where we set the maximumNumberOfInverts parameter, the names of the original categories, - // and their correspondance with the generated hash values is preserved in the Annotations in the format of indices and values. - // the indices array will have the hashed values, and the corresponding element, position-wise, in the values array will - // contain the original value. + // For the Category column, where we set the maximumNumberOfInverts + // parameter, the names of the original categories, and their + // correspondance with the generated hash values is preserved in the + // Annotations in the format of indices and values.the indices array + // will have the hashed values, and the corresponding element, + // position -wise, in the values array will contain the original value. // // See below for an example on how to retrieve the mapping. var slotNames = new VBuffer>(); - transformedData.Schema["CategoryHashed"].Annotations.GetValue("KeyValues", ref slotNames); + transformedData.Schema["CategoryHashed"].Annotations.GetValue( + "KeyValues", ref slotNames); var indices = slotNames.GetIndices(); var categoryNames = slotNames.GetValues(); for (int i = 0; i < indices.Length; i++) - Console.WriteLine($"The original value of the {indices[i]} category is {categoryNames[i]}"); + Console.WriteLine($"The original value of the {indices[i]} " + + $"category is {categoryNames[i]}"); // Output Data // diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs index 4d967647d0..82b2dea250 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs @@ -11,8 +11,8 @@ public class KeyToValueToKey { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. @@ -27,25 +27,40 @@ public static void Example() // A pipeline to convert the terms of the 'Review' column in // making use of default settings. - var defaultPipeline = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedText", nameof(DataPoint.Review)) - .Append(mlContext.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), "TokenizedText")); + var defaultPipeline = mlContext.Transforms.Text.TokenizeIntoWords( + "TokenizedText", nameof(DataPoint.Review)).Append(mlContext + .Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), + "TokenizedText")); - // Another pipeline, that customizes the advanced settings of the ValueToKeyMappingEstimator. - // We can change the maximumNumberOfKeys to limit how many keys will get generated out of the set of words, - // and condition the order in which they get evaluated by changing keyOrdinality from the default ByOccurence (order in which they get encountered) - // to value/alphabetically. - var customizedPipeline = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedText", nameof(DataPoint.Review)) - .Append(mlContext.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), "TokenizedText", maximumNumberOfKeys: 10, - keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue)); + // Another pipeline, that customizes the advanced settings of the + // ValueToKeyMappingEstimator. We can change the maximumNumberOfKeys to + // limit how many keys will get generated out of the set of words, and + // condition the order in which they get evaluated by changing + // keyOrdinality from the default ByOccurence (order in which they get + // encountered) to value/alphabetically. + var customizedPipeline = mlContext.Transforms.Text.TokenizeIntoWords( + "TokenizedText", nameof(DataPoint.Review)).Append(mlContext + .Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), + "TokenizedText", maximumNumberOfKeys: 10, keyOrdinality: + ValueToKeyMappingEstimator.KeyOrdinality.ByValue)); // The transformed data. - var transformedDataDefault = defaultPipeline.Fit(trainData).Transform(trainData); - var transformedDataCustomized = customizedPipeline.Fit(trainData).Transform(trainData); + var transformedDataDefault = defaultPipeline.Fit(trainData).Transform( + trainData); + + var transformedDataCustomized = customizedPipeline.Fit(trainData) + .Transform(trainData); // Getting the resulting data as an IEnumerable. // This will contain the newly created columns. - IEnumerable defaultData = mlContext.Data.CreateEnumerable(transformedDataDefault, reuseRowObject: false); - IEnumerable customizedData = mlContext.Data.CreateEnumerable(transformedDataCustomized, reuseRowObject: false); + IEnumerable defaultData = mlContext.Data. + CreateEnumerable(transformedDataDefault, + reuseRowObject: false); + + IEnumerable customizedData = mlContext.Data. + CreateEnumerable(transformedDataCustomized, + reuseRowObject: false); + Console.WriteLine($"Keys"); foreach (var dataRow in defaultData) Console.WriteLine($"{string.Join(',', dataRow.Keys)}"); @@ -65,13 +80,17 @@ public static void Example() // 8,2,9,7,6,4 // 3,10,0,0,0 // 3,10,0,0,0,8 - // Retrieve the original values, by appending the KeyToValue etimator to the existing pipelines - // to convert the keys back to the strings. - var pipeline = defaultPipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue(nameof(TransformedData.Keys))); + // Retrieve the original values, by appending the KeyToValue etimator to + // the existing pipelines to convert the keys back to the strings. + var pipeline = defaultPipeline.Append(mlContext.Transforms.Conversion + .MapKeyToValue(nameof(TransformedData.Keys))); + transformedDataDefault = pipeline.Fit(trainData).Transform(trainData); // Preview of the DefaultColumnName column obtained. - var originalColumnBack = transformedDataDefault.GetColumn>>(transformedDataDefault.Schema[nameof(TransformedData.Keys)]); + var originalColumnBack = transformedDataDefault.GetColumn>>(transformedDataDefault.Schema[nameof( + TransformedData.Keys)]); foreach (var row in originalColumnBack) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToBinaryVector.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToBinaryVector.cs index e3b652f0af..57ae091124 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToBinaryVector.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToBinaryVector.cs @@ -7,14 +7,16 @@ namespace Samples.Dynamic { class MapKeyToBinaryVector { - /// This example demonstrates the use of MapKeyToVector by mapping keys to floats[] of 0 and 1, representing the number in binary format. - /// Because the ML.NET KeyType maps the missing value to zero, counting starts at 1, so the uint values - /// converted to KeyTypes will appear skewed by one. + /// This example demonstrates the use of MapKeyToVector by mapping keys to + /// floats[] of 0 and 1, representing the number in binary format. + /// Because the ML.NET KeyType maps the missing value to zero, counting + /// starts at 1, so the uint values converted to KeyTypes will appear + /// skewed by one. /// See https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. @@ -30,18 +32,21 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); // Constructs the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapKeyToBinaryVector("TimeframeVector", "Timeframe"); + var pipeline = mlContext.Transforms.Conversion.MapKeyToBinaryVector( + "TimeframeVector", "Timeframe"); // Fits the pipeline to the data. IDataView transformedData = pipeline.Fit(data).Transform(data); // Getting the resulting data as an IEnumerable. // This will contain the newly created columns. - IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable features = mlContext.Data.CreateEnumerable< + TransformedData>(transformedData, reuseRowObject: false); Console.WriteLine($" Timeframe TimeframeVector"); foreach (var featureRow in features) - Console.WriteLine($"{featureRow.Timeframe}\t\t\t{string.Join(',', featureRow.TimeframeVector)}"); + Console.WriteLine($"{featureRow.Timeframe}\t\t\t" + + $"{string.Join(',', featureRow.TimeframeVector)}"); // Timeframe TimeframeVector // 10 0,1,0,0,1 //binary representation of 9, the original value diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs index 6c801c14e5..5784faaa99 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs @@ -5,22 +5,25 @@ namespace Samples.Dynamic { - /// This example demonstrates the use of the ValueToKeyMappingEstimator, by mapping KeyType values to the original strings. - /// For more on ML.NET KeyTypes see: https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types + /// This example demonstrates the use of the ValueToKeyMappingEstimator, by + /// mapping KeyType values to the original strings. For more on ML.NET KeyTypes + /// see: https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types public class MapKeyToValueMultiColumn { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Get a small dataset as an IEnumerable. // Create a list of data examples. var examples = GenerateRandomDataPoints(1000, 10); - // Convert the examples list to an IDataView object, which is consumable by ML.NET API. + // Convert the examples list to an IDataView object, which is consumable + // by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(examples); // Create a pipeline. @@ -28,31 +31,41 @@ public static void Example() // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply StochasticDualCoordinateAscent multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy()); + .Append(mlContext.MulticlassClassification.Trainers. + SdcaMaximumEntropy()); // Train the model and do predictions on same data set. // Typically predictions would be in a different, validation set. var dataWithPredictions = pipeline.Fit(dataView).Transform(dataView); - // At this point, the Label colum is tranformed from strings, to DataViewKeyType and - // the transformation has added the PredictedLabel column, with same DataViewKeyType as - // transformed Label column. - // MapKeyToValue would take columns with DataViewKeyType and convert them back to thier original values. + // At this point, the Label colum is tranformed from strings, to + // DataViewKeyType and the transformation has added the PredictedLabel + // column, with same DataViewKeyType as transformed Label column. + // MapKeyToValue would take columns with DataViewKeyType and convert + // them back to thier original values. var newPipeline = mlContext.Transforms.Conversion.MapKeyToValue(new[] { new InputOutputColumnPair("LabelOriginalValue","Label"), - new InputOutputColumnPair("PredictedLabelOriginalValue","PredictedLabel") + new InputOutputColumnPair("PredictedLabelOriginalValue", + "PredictedLabel") + }); - var transformedData = newPipeline.Fit(dataWithPredictions).Transform(dataWithPredictions); + var transformedData = newPipeline.Fit(dataWithPredictions).Transform( + dataWithPredictions); + // Let's iterate over first 5 items. transformedData = mlContext.Data.TakeRows(transformedData, 5); - var values = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + var values = mlContext.Data.CreateEnumerable( + transformedData, reuseRowObject: false); // Printing the column names of the transformed data. - Console.WriteLine($"Label LabelOriginalValue PredictedLabel PredictedLabelOriginalValue"); + Console.WriteLine($"Label LabelOriginalValue PredictedLabel " + + $"PredictedLabelOriginalValue"); + foreach (var row in values) - Console.WriteLine($"{row.Label}\t\t{row.LabelOriginalValue}\t\t\t{row.PredictedLabel}\t\t\t{row.PredictedLabelOriginalValue}"); + Console.WriteLine($"{row.Label}\t\t{row.LabelOriginalValue}\t\t\t" + + $"{row.PredictedLabel}\t\t\t{row.PredictedLabelOriginalValue}"); // Expected output: // Label LabelOriginalValue PredictedLabel PredictedLabelOriginalValue @@ -71,7 +84,8 @@ private class DataPoint public float[] Features { get; set; } } - private static List GenerateRandomDataPoints(int count, int featureVectorLenght) + private static List GenerateRandomDataPoints(int count, + int featureVectorLenght) { var examples = new List(); var rnd = new Random(0); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs index 12f5adc7fb..221ff8d2d4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs @@ -8,14 +8,14 @@ namespace Samples.Dynamic { class MapKeyToVector { - /// This example demonstrates the use of MapKeyToVector by mapping keys to floats[]. - /// Because the ML.NET KeyType maps the missing value to zero, counting starts at 1, so the uint values - /// converted to KeyTypes will appear skewed by one. - /// See https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types + /// This example demonstrates the use of MapKeyToVector by mapping keys to + /// floats[]. Because the ML.NET KeyType maps the missing value to zero, + /// counting starts at 1, so the uint values converted to KeyTypes will + /// appear skewed by one. See https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. @@ -28,31 +28,43 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); - // First transform just maps key type to indicator vector. i.e. it's produces vector filled with - // zeros with size of key cardinality and set 1 to corresponding key's value index in that array. - // After that we concatenate two columns with single int values into vector of ints. - // Third transform will create vector of keys, where key type is shared across whole vector. - // Forth transfrom output data as count vector and that vector would have size equal to shared key type - // cardinality and put key counts to corresponding indexes in array. - // Fifth transform output indicator vector for each key and concatenate them together. - // Result vector would be size of key cardinality multiplied by size of original vector. - var pipeline = mlContext.Transforms.Conversion.MapKeyToVector("TimeframeVector", "Timeframe") - .Append(mlContext.Transforms.Concatenate("Parts", "PartA", "PartB")) - .Append(mlContext.Transforms.Conversion.MapValueToKey("Parts")) - .Append(mlContext.Transforms.Conversion.MapKeyToVector("PartsCount", "Parts", outputCountVector: true)) - .Append(mlContext.Transforms.Conversion.MapKeyToVector("PartsNoCount", "Parts")); + // First transform just maps key type to indicator vector. i.e. it's + // produces vector filled with zeros with size of key cardinality and + // set 1 to corresponding key's value index in that array. After that we + // concatenate two columns with single int values into vector of ints. + // Third transform will create vector of keys, where key type is shared + // across whole vector. Forth transfrom output data as count vector and + // that vector would have size equal to shared key type cardinality and + // put key counts to corresponding indexes in array. Fifth transform + // output indicator vector for each key and concatenate them together. + // Result vector would be size of key cardinality multiplied by size of + // original vector. + var pipeline = mlContext.Transforms.Conversion.MapKeyToVector( + "TimeframeVector", "Timeframe") + .Append(mlContext.Transforms.Concatenate("Parts", "PartA", "PartB")) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Parts")) + .Append(mlContext.Transforms.Conversion.MapKeyToVector( + "PartsCount", "Parts", outputCountVector:true)) + .Append(mlContext.Transforms.Conversion.MapKeyToVector( + "PartsNoCount", "Parts")); // Fits the pipeline to the data. IDataView transformedData = pipeline.Fit(data).Transform(data); // Getting the resulting data as an IEnumerable. // This will contain the newly created columns. - IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable features = mlContext.Data.CreateEnumerable< + TransformedData>(transformedData, reuseRowObject: false); + + Console.WriteLine("Timeframe TimeframeVector PartsCount " + + "PartsNoCount"); - Console.WriteLine($"Timeframe TimeframeVector PartsCount PartsNoCount"); foreach (var featureRow in features) - Console.WriteLine($"{featureRow.Timeframe} {string.Join(',', featureRow.TimeframeVector.Select(x=>x))} " + - $"{string.Join(',', featureRow.PartsCount.Select(x => x))} {string.Join(',', featureRow.PartsNoCount.Select(x => x))}"); + Console.WriteLine(featureRow.Timeframe + " " + + string.Join(',', featureRow.TimeframeVector.Select(x=>x)) + " " + + string.Join(',', featureRow.PartsCount.Select(x => x)) + + " " + string.Join(',', featureRow.PartsNoCount.Select( + x => x))); // Expected output: // Timeframe TimeframeVector PartsCount PartsNoCount diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVectorMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVectorMultiColumn.cs index 7407ee67cf..606c371497 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVectorMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVectorMultiColumn.cs @@ -7,14 +7,15 @@ namespace Samples.Dynamic { public class MapKeyToVectorMultiColumn { - /// This example demonstrates the use of MapKeyToVector by mapping keys to floats[] for multiple columns at once. - /// Because the ML.NET KeyType maps the missing value to zero, counting starts at 1, so the uint values + /// This example demonstrates the use of MapKeyToVector by mapping keys to + /// floats[] for multiple columns at once. Because the ML.NET KeyType maps + /// the missing value to zero, counting starts at 1, so the uint values /// converted to KeyTypes will appear skewed by one. /// See https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. @@ -40,11 +41,17 @@ public static void Example() // Getting the resulting data as an IEnumerable. // This will contain the newly created columns. - IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable features = mlContext.Data.CreateEnumerable< + TransformedData>(transformedData, reuseRowObject: false); + + Console.WriteLine($" Timeframe TimeframeVector " + + $"Category CategoryVector"); - Console.WriteLine($" Timeframe TimeframeVector Category CategoryVector"); foreach (var featureRow in features) - Console.WriteLine($"{featureRow.Timeframe} {string.Join(',', featureRow.TimeframeVector)} {featureRow.Category} {string.Join(',', featureRow.CategoryVector)}"); + Console.WriteLine(featureRow.Timeframe + " " + + string.Join(',', featureRow.TimeframeVector) + " " + + featureRow.Category + " " + + string.Join(',', featureRow.CategoryVector)); // TransformedData obtained post-transformation. // @@ -59,8 +66,8 @@ public static void Example() private class DataPoint { - // The maximal value used is 9; but since 0 is reserved for missing value, - // we set the count to 10. + // The maximal value used is 9; but since 0 is reserved for missing + // value, we set the count to 10. [KeyType(10)] public uint Timeframe { get; set; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs index 0b7b5ab4dd..60bea2b951 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs @@ -6,12 +6,13 @@ namespace Samples.Dynamic { public static class MapValue { - /// This example demonstrates the use of the ValueMappingEstimator by mapping strings to other string values, or floats to strings. - /// This is useful to map types to a category. + /// This example demonstrates the use of the ValueMappingEstimator by + /// mapping strings to other string values, or floats to strings. This is + /// useful to map types to a category. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. @@ -34,7 +35,8 @@ public static void Example() timeframeMap["12-25yrs"] = "Long"; timeframeMap["25+yrs"] = "Long"; - // Construct the mapping of strings to keys(uints) for the Timeframe column. + // Construct the mapping of strings to keys(uints) for the Timeframe + // column. var timeframeKeyMap = new Dictionary(); timeframeKeyMap["0-4yrs"] = 1; timeframeKeyMap["0-5yrs"] = 1; @@ -51,22 +53,31 @@ public static void Example() scoreMap[5] = "High"; // Constructs the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue("TimeframeCategory", timeframeMap, "Timeframe") - .Append(mlContext.Transforms.Conversion.MapValue("ScoreCategory", scoreMap, "Score")) - // on the MapValue below, the treatValuesAsKeyType is set to true. The type of the Label column will be a KeyDataViewType type, - // and it can be used as input for trainers performing multiclass classification. - .Append(mlContext.Transforms.Conversion.MapValue("Label", timeframeKeyMap, "Timeframe", treatValuesAsKeyType: true)); + var pipeline = mlContext.Transforms.Conversion.MapValue( + "TimeframeCategory", timeframeMap, "Timeframe").Append(mlContext. + Transforms.Conversion.MapValue("ScoreCategory", scoreMap, "Score")) + // on the MapValue below, the treatValuesAsKeyType is set to true. + // The type of the Label column will be a KeyDataViewType type, + // and it can be used as input for trainers performing multiclass + // classification. + .Append(mlContext.Transforms.Conversion.MapValue("Label", + timeframeKeyMap, "Timeframe", treatValuesAsKeyType: true)); // Fits the pipeline to the data. IDataView transformedData = pipeline.Fit(data).Transform(data); // Getting the resulting data as an IEnumerable. // This will contain the newly created columns. - IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable features = mlContext.Data.CreateEnumerable< + TransformedData>(transformedData, reuseRowObject: false); + + Console.WriteLine(" Timeframe TimeframeCategory Label Score " + + "ScoreCategory"); - Console.WriteLine($" Timeframe TimeframeCategory Label Score ScoreCategory"); foreach (var featureRow in features) - Console.WriteLine($"{featureRow.Timeframe}\t\t{featureRow.TimeframeCategory}\t\t\t{featureRow.Label}\t\t{featureRow.Score}\t{featureRow.ScoreCategory}"); + Console.WriteLine($"{featureRow.Timeframe}\t\t" + + $"{featureRow.TimeframeCategory}\t\t\t{featureRow.Label}\t\t" + + $"{featureRow.Score}\t{featureRow.ScoreCategory}"); // TransformedData obtained post-transformation. // diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs index 9e1cbf2a99..5c57b4078a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs @@ -6,12 +6,13 @@ namespace Samples.Dynamic { public static class MapValueIdvLookup { - /// This example demonstrates the use of MapValue by mapping floats to strings, looking up the mapping in an IDataView. - /// This is useful to map types to a grouping. + /// This example demonstrates the use of MapValue by mapping floats to + /// strings, looking up the mapping in an IDataView. This is useful to map + /// types to a grouping. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. @@ -41,17 +42,22 @@ public static void Example() var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData); // Constructs the ValueMappingEstimator making the ML.NET pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue("PriceCategory", lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema["Category"], "Price"); + var pipeline = mlContext.Transforms.Conversion.MapValue("PriceCategory", + lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema[ + "Category"], "Price"); - // Fits the ValueMappingEstimator and transforms the data converting the Price to PriceCategory. + // Fits the ValueMappingEstimator and transforms the data converting the + // Price to PriceCategory. IDataView transformedData = pipeline.Fit(data).Transform(data); // Getting the resulting data as an IEnumerable. - IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable features = mlContext.Data.CreateEnumerable< + TransformedData>(transformedData, reuseRowObject: false); Console.WriteLine($" Price PriceCategory"); foreach (var featureRow in features) - Console.WriteLine($"{featureRow.Price}\t\t{featureRow.PriceCategory}"); + Console.WriteLine($"{featureRow.Price}\t\t" + + $"{featureRow.PriceCategory}"); // TransformedData obtained post-transformation. // diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs index 18362ec9a0..57bff5a3b8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs @@ -6,13 +6,16 @@ namespace Samples.Dynamic { public static class MapValueToArray { - /// This example demonstrates the use of MapValue by mapping strings to array values, which allows for mapping data to numeric arrays. - /// This functionality is useful when the generated column will serve as the Features column for a trainer. Most of the trainers take a numeric vector, as the Features column. - /// In this example, we are mapping the Timeframe data to arbitrary integer arrays. + /// This example demonstrates the use of MapValue by mapping strings to + /// array values, which allows for mapping data to numeric arrays. This + /// functionality is useful when the generated column will serve as the + /// Features column for a trainer. Most of the trainers take a numeric + /// vector, as the Features column. In this example, we are mapping the + /// Timeframe data to arbitrary integer arrays. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. @@ -27,8 +30,8 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); - // Creating a list of key-value pairs to indicate the mapping between the - // DataPoint values, and the arrays they should map to. + // Creating a list of key-value pairs to indicate the mapping between + // the DataPoint values, and the arrays they should map to. var timeframeMap = new Dictionary(); timeframeMap["0-4yrs"] = new int[] { 0, 5, 300 }; timeframeMap["0-5yrs"] = new int[] { 0, 5, 300 }; @@ -37,17 +40,22 @@ public static void Example() timeframeMap["25+yrs"] = new int[] { 12, 50, 300 }; // Constructs the ValueMappingEstimator making the ML.NET pipeline. - var pipeline = mlContext.Transforms.Conversion.MapValue("Features", timeframeMap, "Timeframe"); + var pipeline = mlContext.Transforms.Conversion.MapValue("Features", + timeframeMap, "Timeframe"); - // Fits the ValueMappingEstimator and transforms the data adding the Features column. + // Fits the ValueMappingEstimator and transforms the data adding the + // Features column. IDataView transformedData = pipeline.Fit(data).Transform(data); // Getting the resulting data as an IEnumerable. - IEnumerable featuresColumn = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable featuresColumn = mlContext.Data + .CreateEnumerable(transformedData, reuseRowObject: + false); Console.WriteLine($"Timeframe Features"); foreach (var featureRow in featuresColumn) - Console.WriteLine($"{featureRow.Timeframe}\t\t {string.Join(",", featureRow.Features)}"); + Console.WriteLine($"{featureRow.Timeframe}\t\t " + + $"{string.Join(",", featureRow.Features)}"); // Timeframe Features // 0-4yrs 0, 5, 300 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs index ccd79f4fae..e54578cb60 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs @@ -6,13 +6,14 @@ namespace Samples.Dynamic { public static class MapValueToKeyMultiColumn { - /// This example demonstrates the use of the ValueToKeyMappingEstimator, by mapping strings to KeyType values. - /// For more on ML.NET KeyTypes see: https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types + /// This example demonstrates the use of the ValueToKeyMappingEstimator, by + /// mapping strings to KeyType values. For more on ML.NET KeyTypes see: + /// https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types /// It is possible to have multiple values map to the same category. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. @@ -30,19 +31,24 @@ public static void Example() new InputOutputColumnPair("StudyTimeCategory", "StudyTime"), new InputOutputColumnPair("CourseCategory", "Course") }, - keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator.KeyOrdinality.ByValue, - addKeyValueAnnotationsAsText: true); + keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator + .KeyOrdinality.ByValue, addKeyValueAnnotationsAsText: true); // Fits the pipeline to the data. IDataView transformedData = pipeline.Fit(data).Transform(data); // Getting the resulting data as an IEnumerable. // This will contain the newly created columns. - IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable features = mlContext.Data.CreateEnumerable< + TransformedData>(transformedData, reuseRowObject: false); + + Console.WriteLine($" StudyTime StudyTimeCategory Course " + + $"CourseCategory"); - Console.WriteLine($" StudyTime StudyTimeCategory Course CourseCategory"); foreach (var featureRow in features) - Console.WriteLine($"{featureRow.StudyTime}\t\t{featureRow.StudyTimeCategory}\t\t\t{featureRow.Course}\t\t{featureRow.CourseCategory}"); + Console.WriteLine($"{featureRow.StudyTime}\t\t" + + $"{featureRow.StudyTimeCategory}\t\t\t{featureRow.Course}\t\t" + + $"{featureRow.CourseCategory}"); // TransformedData obtained post-transformation. // @@ -52,11 +58,13 @@ public static void Example() // 12-25yrs 3 LA 3 // 0-5yrs 2 DS 2 - // If we wanted to provide the mapping, rather than letting the transform create it, - // we could do so by creating an IDataView one column containing the values to map to. - // If the values in the dataset are not found in the lookup IDataView they will get mapped to the mising value, 0. - // The keyData are shared among the columns, therefore the keys are not contiguous for the column. - // Create the lookup map data IEnumerable. + // If we wanted to provide the mapping, rather than letting the + // transform create it, we could do so by creating an IDataView one + // column containing the values to map to. If the values in the dataset + // are not found in the lookup IDataView they will get mapped to the + // mising value, 0. The keyData are shared among the columns, therefore + // the keys are not contiguous for the column. Create the lookup map + // data IEnumerable. var lookupData = new[] { new LookupMap { Key = "0-4yrs" }, new LookupMap { Key = "6-11yrs" }, @@ -70,22 +78,28 @@ public static void Example() var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData); // Constructs the ML.net pipeline - var pipelineWithLookupMap = mlContext.Transforms.Conversion.MapValueToKey(new[] { - new InputOutputColumnPair("StudyTimeCategory", "StudyTime"), - new InputOutputColumnPair("CourseCategory", "Course") - }, - keyData: lookupIdvMap); + var pipelineWithLookupMap = mlContext.Transforms.Conversion + .MapValueToKey(new[] { + new InputOutputColumnPair("StudyTimeCategory", "StudyTime"), + new InputOutputColumnPair("CourseCategory", "Course") + }, + keyData: lookupIdvMap); // Fits the pipeline to the data. transformedData = pipelineWithLookupMap.Fit(data).Transform(data); // Getting the resulting data as an IEnumerable. // This will contain the newly created columns. - features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + features = mlContext.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + + Console.WriteLine($" StudyTime StudyTimeCategory " + + $"Course CourseCategory"); - Console.WriteLine($" StudyTime StudyTimeCategory Course CourseCategory"); foreach (var featureRow in features) - Console.WriteLine($"{featureRow.StudyTime}\t\t{featureRow.StudyTimeCategory}\t\t\t{featureRow.Course}\t\t{featureRow.CourseCategory}"); + Console.WriteLine($"{featureRow.StudyTime}\t\t" + + $"{featureRow.StudyTimeCategory}\t\t\t{featureRow.Course}\t\t" + + $"{featureRow.CourseCategory}"); // StudyTime StudyTimeCategory Course CourseCategory // 0 - 4yrs 1 CS 4 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCount.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCount.cs index b4d313644a..2b5ec15af4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCount.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCount.cs @@ -9,8 +9,8 @@ public static class SelectFeaturesBasedOnCount { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -19,7 +19,8 @@ public static void Example() // Printing the columns of the input data. Console.WriteLine($"NumericVector StringVector"); foreach (var item in rawData) - Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVector), string.Join(",", item.StringVector)); + Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item + .NumericVector), string.Join(",", item.StringVector)); // NumericVector StringVector // 4,NaN,6 A,WA,Male @@ -29,19 +30,27 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); - // We will use the SelectFeaturesBasedOnCount to retain only those slots which have at least 'count' non-default and non-missing values per slot. + // We will use the SelectFeaturesBasedOnCount to retain only those slots + // which have at least 'count' non-default and non-missing values per + // slot. var pipeline = - mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(outputColumnName: "NumericVector", count: 3) // Usage on numeric column. - .Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(outputColumnName: "StringVector", count: 3)); // Usage on text column. + mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount( + outputColumnName: "NumericVector", count: 3) // Usage on numeric + // column. + .Append(mlContext.Transforms.FeatureSelection + .SelectFeaturesBasedOnCount(outputColumnName: "StringVector", + count: 3)); // Usage on text column. var transformedData = pipeline.Fit(data).Transform(data); - var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); + var convertedData = mlContext.Data.CreateEnumerable( + transformedData, true); // Printing the columns of the transformed data. Console.WriteLine($"NumericVector StringVector"); foreach (var item in convertedData) - Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVector), string.Join(",", item.StringVector)); + Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item. + NumericVector), string.Join(",", item.StringVector)); // NumericVector StringVector // 4,6 A,Male diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCountMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCountMultiColumn.cs index ae45fa375e..ff195da136 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCountMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnCountMultiColumn.cs @@ -9,8 +9,8 @@ public static class SelectFeaturesBasedOnCountMultiColumn { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -19,7 +19,8 @@ public static void Example() // Printing the columns of the input data. Console.WriteLine($"NumericVector StringVector"); foreach (var item in rawData) - Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVector), string.Join(",", item.StringVector)); + Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item. + NumericVector), string.Join(",", item.StringVector)); // NumericVector StringVector // 4,NaN,6 A,WA,Male @@ -29,22 +30,27 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); - // We will use the SelectFeaturesBasedOnCount transform estimator, to retain only those slots which have - // at least 'count' non-default values per slot. + // We will use the SelectFeaturesBasedOnCount transform estimator, to + // retain only those slots which have at least 'count' non-default + // values per slot. - // Multi column example. This pipeline transform two columns using the provided parameters. - var pipeline = mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount( - new InputOutputColumnPair[] { new InputOutputColumnPair("NumericVector"), new InputOutputColumnPair("StringVector") }, - count: 3); + // Multi column example. This pipeline transform two columns using the + // provided parameters. + var pipeline = mlContext.Transforms.FeatureSelection + .SelectFeaturesBasedOnCount(new InputOutputColumnPair[] { new + InputOutputColumnPair("NumericVector"), new InputOutputColumnPair( + "StringVector") }, count: 3); var transformedData = pipeline.Fit(data).Transform(data); - var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); + var convertedData = mlContext.Data.CreateEnumerable( + transformedData, true); // Printing the columns of the transformed data. Console.WriteLine($"NumericVector StringVector"); foreach (var item in convertedData) - Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVector), string.Join(",", item.StringVector)); + Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item + .NumericVector), string.Join(",", item.StringVector)); // NumericVector StringVector // 4,6 A,Male diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformation.cs index d7aa805ceb..79d1a06021 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformation.cs @@ -9,8 +9,8 @@ public static class SelectFeaturesBasedOnMutualInformation { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -19,7 +19,8 @@ public static void Example() // Printing the columns of the input data. Console.WriteLine($"Label NumericVector"); foreach (var item in rawData) - Console.WriteLine("{0,-25} {1,-25}", item.Label, string.Join(",", item.NumericVector)); + Console.WriteLine("{0,-25} {1,-25}", item.Label, string.Join(",", + item.NumericVector)); // Label NumericVector // True 4,0,6 @@ -29,16 +30,19 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); - // We define a MutualInformationFeatureSelectingEstimator that selects the top k slots in a feature - // vector based on highest mutual information between that slot and a specified label. - var pipeline = mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation( - outputColumnName: "NumericVector", labelColumnName: "Label", - slotsInOutput:2); + // We define a MutualInformationFeatureSelectingEstimator that selects + // the top k slots in a feature vector based on highest mutual + // information between that slot and a specified label. + var pipeline = mlContext.Transforms.FeatureSelection + .SelectFeaturesBasedOnMutualInformation(outputColumnName: + "NumericVector", labelColumnName: "Label", slotsInOutput:2); - // The pipeline can then be trained, using .Fit(), and the resulting transformer can be used to transform data. + // The pipeline can then be trained, using .Fit(), and the resulting + // transformer can be used to transform data. var transformedData = pipeline.Fit(data).Transform(data); - var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); + var convertedData = mlContext.Data.CreateEnumerable( + transformedData, true); // Printing the columns of the transformed data. Console.WriteLine($"NumericVector"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformationMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformationMultiColumn.cs index d9543cee7e..138abcbd60 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformationMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/FeatureSelection/SelectFeaturesBasedOnMutualInformationMultiColumn.cs @@ -9,8 +9,8 @@ public static class SelectFeaturesBasedOnMutualInformationMultiColumn { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -19,7 +19,8 @@ public static void Example() // Printing the columns of the input data. Console.WriteLine($"NumericVectorA NumericVectorB"); foreach (var item in rawData) - Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVectorA), string.Join(",", item.NumericVectorB)); + Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item + .NumericVectorA), string.Join(",", item.NumericVectorB)); // NumericVectorA NumericVectorB // 4,0,6 7,8,9 @@ -29,23 +30,28 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(rawData); - // We define a MutualInformationFeatureSelectingEstimator that selects the top k slots in a feature - // vector based on highest mutual information between that slot and a specified label. + // We define a MutualInformationFeatureSelectingEstimator that selects + // the top k slots in a feature vector based on highest mutual + // information between that slot and a specified label. - // Multi column example : This pipeline transform two columns using the provided parameters. - var pipeline = mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation( - new InputOutputColumnPair[] { new InputOutputColumnPair("NumericVectorA"), new InputOutputColumnPair("NumericVectorB") }, - labelColumnName: "Label", + // Multi column example : This pipeline transform two columns using the + // provided parameters. + var pipeline = mlContext.Transforms.FeatureSelection + .SelectFeaturesBasedOnMutualInformation(new InputOutputColumnPair[] + { new InputOutputColumnPair("NumericVectorA"), new + InputOutputColumnPair("NumericVectorB") }, labelColumnName: "Label", slotsInOutput: 4); var transformedData = pipeline.Fit(data).Transform(data); - var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); + var convertedData = mlContext.Data.CreateEnumerable( + transformedData, true); // Printing the columns of the transformed data. Console.WriteLine($"NumericVectorA NumericVectorB"); foreach (var item in convertedData) - Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVectorA), string.Join(",", item.NumericVectorB)); + Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item + .NumericVectorA), string.Join(",", item.NumericVectorB)); // NumericVectorA NumericVectorB // 4,0,6 9 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs index 02f8fce98b..052f949eb4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScale.cs @@ -8,17 +8,21 @@ namespace Samples.Dynamic { public static class ConvertToGrayscale { - // Sample that loads images from the file system, and converts them to grayscale. + // Sample that loads images from the file system, and converts them to + // grayscale. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Downloading a few images, and an images.tsv file, which contains a list of the files from the dotnet/machinelearning/test/data/images/. - // If you inspect the fileSystem, after running this line, an "images" folder will be created, containing 4 images, and a .tsv file + // Downloading a few images, and an images.tsv file, which contains a + // list of the files from the dotnet/machinelearning/test/data/images/. + // If you inspect the fileSystem, after running this line, an "images" + // folder will be created, containing 4 images, and a .tsv file // enumerating the images. - var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadImages(); + var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils + .DownloadImages(); // Preview of the content of the images.tsv file // @@ -39,8 +43,10 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages("ImageObject", imagesFolder, "ImagePath") - .Append(mlContext.Transforms.ConvertToGrayscale("Grayscale", "ImageObject")); + var pipeline = mlContext.Transforms.LoadImages("ImageObject", + imagesFolder, "ImagePath") + .Append(mlContext.Transforms.ConvertToGrayscale("Grayscale", + "ImageObject")); var transformedData = pipeline.Fit(data).Transform(data); @@ -54,20 +60,32 @@ public static void Example() private static void PrintColumns(IDataView transformedData) { - Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25}", "ImagePath", "Name", "ImageObject", "Grayscale"); - using (var cursor = transformedData.GetRowCursor(transformedData.Schema)) + Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25}", "ImagePath", + "Name", "ImageObject", "Grayscale"); + + using (var cursor = transformedData.GetRowCursor(transformedData + .Schema)) { - // Note that it is best to get the getters and values *before* iteration, so as to faciliate buffer - // sharing (if applicable), and column-type validation once, rather than many times. + // Note that it is best to get the getters and values *before* + // iteration, so as to faciliate buffer sharing (if applicable), and + // column -type validation once, rather than many times. ReadOnlyMemory imagePath = default; ReadOnlyMemory name = default; Bitmap imageObject = null; Bitmap grayscaleImageObject = null; - var imagePathGetter = cursor.GetGetter>(cursor.Schema["ImagePath"]); - var nameGetter = cursor.GetGetter>(cursor.Schema["Name"]); - var imageObjectGetter = cursor.GetGetter(cursor.Schema["ImageObject"]); - var grayscaleGetter = cursor.GetGetter(cursor.Schema["Grayscale"]); + var imagePathGetter = cursor.GetGetter>(cursor + .Schema["ImagePath"]); + + var nameGetter = cursor.GetGetter>(cursor + .Schema["Name"]); + + var imageObjectGetter = cursor.GetGetter(cursor.Schema[ + "ImageObject"]); + + var grayscaleGetter = cursor.GetGetter(cursor.Schema[ + "Grayscale"]); + while (cursor.MoveNext()) { imagePathGetter(ref imagePath); @@ -75,7 +93,9 @@ private static void PrintColumns(IDataView transformedData) imageObjectGetter(ref imageObject); grayscaleGetter(ref grayscaleImageObject); - Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25}", imagePath, name, imageObject.PhysicalDimension, grayscaleImageObject.PhysicalDimension); + Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25}", + imagePath, name, imageObject.PhysicalDimension, + grayscaleImageObject.PhysicalDimension); } // Dispose the image. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScaleInMemory.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScaleInMemory.cs index 883dfa5dc1..3bb0586cc0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScaleInMemory.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToGrayScaleInMemory.cs @@ -11,13 +11,16 @@ static public void Example() { var mlContext = new MLContext(); // Create an image list. - var images = new[] { new ImageDataPoint(2, 3, Color.Blue), new ImageDataPoint(2, 3, Color.Red) }; + var images = new[] { new ImageDataPoint(2, 3, Color.Blue), new + ImageDataPoint(2, 3, Color.Red) }; - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var data = mlContext.Data.LoadFromEnumerable(images); // Convert image to gray scale. - var pipeline = mlContext.Transforms.ConvertToGrayscale("GrayImage", "Image"); + var pipeline = mlContext.Transforms.ConvertToGrayscale("GrayImage", + "Image"); // Fit the model. var model = pipeline.Fit(data); @@ -26,7 +29,8 @@ static public void Example() var transformedData = model.Transform(data); // Load images in DataView back to Enumerable. - var transformedDataPoints = mlContext.Data.CreateEnumerable(transformedData, false); + var transformedDataPoints = mlContext.Data.CreateEnumerable< + ImageDataPoint>(transformedData, false); // Print out input and output pixels. foreach (var dataPoint in transformedDataPoints) @@ -39,7 +43,8 @@ static public void Example() { var pixel = image.GetPixel(x, y); var grayPixel = grayImage.GetPixel(x, y); - Console.WriteLine($"The original pixel is {pixel} and its pixel in gray is {grayPixel}"); + Console.WriteLine($"The original pixel is {pixel} and its" + + $"pixel in gray is {grayPixel}"); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToImage.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToImage.cs index 04ccb71093..f0f58c7e45 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToImage.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ConvertToImage.cs @@ -14,22 +14,25 @@ public static class ConvertToImage private const int numberOfChannels = 3; private const int inputSize = imageHeight * imageWidth * numberOfChannels; - // Sample that shows how an input array (of doubles) can be used to interop with image related estimators in ML.NET. + // Sample that shows how an input array (of doubles) can be used to interop + // with image related estimators in ML.NET. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(4); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var data = mlContext.Data.LoadFromEnumerable(dataPoints); // Image loading pipeline. - var pipeline = mlContext.Transforms.ConvertToImage(imageHeight, imageWidth, "Image", "Features") - .Append(mlContext.Transforms.ExtractPixels("Pixels", "Image")); + var pipeline = mlContext.Transforms.ConvertToImage(imageHeight, + imageWidth, "Image", "Features") + .Append(mlContext.Transforms.ExtractPixels("Pixels", "Image")); var transformedData = pipeline.Fit(data).Transform(data); @@ -45,18 +48,25 @@ public static void Example() private static void PrintColumns(IDataView transformedData) { - Console.WriteLine("{0, -25} {1, -25} {2, -25}", "Features", "Image", "Pixels"); + Console.WriteLine("{0, -25} {1, -25} {2, -25}", "Features", "Image", + "Pixels"); - using (var cursor = transformedData.GetRowCursor(transformedData.Schema)) + using (var cursor = transformedData.GetRowCursor(transformedData + .Schema)) { - // Note that it is best to get the getters and values *before* iteration, so as to faciliate buffer - // sharing (if applicable), and column-type validation once, rather than many times. + // Note that it is best to get the getters and values *before* + // iteration, so as to faciliate buffer sharing (if applicable), and + // column -type validation once, rather than many times. VBuffer features = default; VBuffer pixels = default; Bitmap imageObject = null; - var featuresGetter = cursor.GetGetter>(cursor.Schema["Features"]); - var pixelsGetter = cursor.GetGetter>(cursor.Schema["Pixels"]); + var featuresGetter = cursor.GetGetter>(cursor.Schema[ + "Features"]); + + var pixelsGetter = cursor.GetGetter>(cursor.Schema[ + "Pixels"]); + var imageGetter = cursor.GetGetter(cursor.Schema["Image"]); while (cursor.MoveNext()) { @@ -65,8 +75,10 @@ private static void PrintColumns(IDataView transformedData) pixelsGetter(ref pixels); imageGetter(ref imageObject); - Console.WriteLine("{0, -25} {1, -25} {2, -25}", string.Join(",", features.DenseValues().Take(5)) + "...", - imageObject.PhysicalDimension, string.Join(",", pixels.DenseValues().Take(5)) + "..."); + Console.WriteLine("{0, -25} {1, -25} {2, -25}", string.Join(",", + features.DenseValues().Take(5)) + "...", imageObject + .PhysicalDimension, string.Join(",", pixels.DenseValues() + .Take(5)) + "..."); } // Dispose the image. @@ -80,12 +92,14 @@ private class DataPoint public float[] Features { get; set; } } - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) { var random = new Random(seed); for (int i = 0; i < count; i++) - yield return new DataPoint { Features = Enumerable.Repeat(0, inputSize).Select(x => (float)random.Next(0, 256)).ToArray() }; + yield return new DataPoint { Features = Enumerable.Repeat(0, + inputSize).Select(x => (float)random.Next(0, 256)).ToArray() }; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs index 97929a38cc..99d00b900c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/DnnFeaturizeImage.cs @@ -9,16 +9,20 @@ public static class DnnFeaturizeImage { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Downloading a few images, and an images.tsv file, which contains a list of the files from the dotnet/machinelearning/test/data/images/. - // If you inspect the fileSystem, after running this line, an "images" folder will be created, containing 4 images, and a .tsv file + // Downloading a few images, and an images.tsv file, which contains a + // list of the files from the dotnet/machinelearning/test/data/images/. + // If you inspect the fileSystem, after running this line, an "images" + // folder will be created, containing 4 images, and a .tsv file // enumerating the images. - var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadImages(); + var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils + .DownloadImages(); - // Preview of the content of the images.tsv file, which lists the images to operate on + // Preview of the content of the images.tsv file, which lists the images + // to operate on // // imagePath imageType // tomato.bmp tomato @@ -40,16 +44,22 @@ public static void Example() // Installing the Microsoft.ML.DNNImageFeaturizer packages copies the models in the // `DnnImageModels` folder. // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages("ImageObject", imagesFolder, "ImagePath") - .Append(mlContext.Transforms.ResizeImages("ImageObject", imageWidth: 224, imageHeight: 224)) - .Append(mlContext.Transforms.ExtractPixels("Pixels", "ImageObject")) - .Append(mlContext.Transforms.DnnFeaturizeImage("FeaturizedImage", m => m.ModelSelector.ResNet18(mlContext, m.OutputColumn, m.InputColumn), "Pixels")); + var pipeline = mlContext.Transforms.LoadImages("ImageObject", + imagesFolder, "ImagePath") + .Append(mlContext.Transforms.ResizeImages("ImageObject", imageWidth: + 224, imageHeight: 224)) + .Append(mlContext.Transforms.ExtractPixels("Pixels", "ImageObject")) + .Append(mlContext.Transforms.DnnFeaturizeImage("FeaturizedImage", + m => m.ModelSelector.ResNet18(mlContext, m.OutputColumn, m + .InputColumn), "Pixels")); var transformedData = pipeline.Fit(data).Transform(data); - var FeaturizedImageColumnsPerRow = transformedData.GetColumn("FeaturizedImage").ToArray(); + var FeaturizedImageColumnsPerRow = transformedData.GetColumn( + "FeaturizedImage").ToArray(); - // Preview of FeaturizedImageColumnsPerRow for the first row, FeaturizedImageColumnsPerRow[0] + // Preview of FeaturizedImageColumnsPerRow for the first row, + // FeaturizedImageColumnsPerRow[0] // // 0.696136236 // 0.2661711 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs index 4f8793fd7a..e608e484df 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ExtractPixels.cs @@ -9,18 +9,22 @@ namespace Samples.Dynamic { public static class ExtractPixels { - // Sample that loads the images from the file system, resizes them (ExtractPixels requires a resizing operation), and extracts the - // values of the pixels as a vector. + // Sample that loads the images from the file system, resizes them ( + // ExtractPixels requires a resizing operation), and extracts the values of + // the pixels as a vector. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Downloading a few images, and an images.tsv file, which contains a list of the files from the dotnet/machinelearning/test/data/images/. - // If you inspect the fileSystem, after running this line, an "images" folder will be created, containing 4 images, and a .tsv file + // Downloading a few images, and an images.tsv file, which contains a + // list of the files from the dotnet/machinelearning/test/data/images/. + // If you inspect the fileSystem, after running this line, an "images" + // folder will be created, containing 4 images, and a .tsv file // enumerating the images. - var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadImages(); + var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils + .DownloadImages(); // Preview of the content of the images.tsv file // @@ -41,9 +45,13 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages("ImageObject", imagesFolder, "ImagePath") - .Append(mlContext.Transforms.ResizeImages("ImageObjectResized", inputColumnName: "ImageObject", imageWidth: 100, imageHeight: 100)) - .Append(mlContext.Transforms.ExtractPixels("Pixels", "ImageObjectResized")); + var pipeline = mlContext.Transforms.LoadImages("ImageObject", + imagesFolder, "ImagePath") + .Append(mlContext.Transforms.ResizeImages("ImageObjectResized", + inputColumnName: "ImageObject", imageWidth: 100, imageHeight: + 100)) + .Append(mlContext.Transforms.ExtractPixels("Pixels", + "ImageObjectResized")); var transformedData = pipeline.Fit(data).Transform(data); @@ -59,12 +67,15 @@ public static void Example() private static void PrintColumns(IDataView transformedData) { - Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25} {4, -25}", "ImagePath", "Name", "ImageObject", "ImageObjectResized", "Pixels"); + Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25} {4, -25}", + "ImagePath", "Name", "ImageObject", "ImageObjectResized", "Pixels"); - using (var cursor = transformedData.GetRowCursor(transformedData.Schema)) + using (var cursor = transformedData.GetRowCursor(transformedData + .Schema)) { - // Note that it is best to get the getters and values *before* iteration, so as to faciliate buffer - // sharing (if applicable), and column-type validation once, rather than many times. + // Note that it is best to get the getters and values *before* + // iteration, so as to faciliate buffer sharing (if applicable), and + // column -type validation once, rather than many times. ReadOnlyMemory imagePath = default; ReadOnlyMemory name = default; @@ -72,11 +83,21 @@ private static void PrintColumns(IDataView transformedData) Bitmap resizedImageObject = null; VBuffer pixels = default; - var imagePathGetter = cursor.GetGetter>(cursor.Schema["ImagePath"]); - var nameGetter = cursor.GetGetter>(cursor.Schema["Name"]); - var imageObjectGetter = cursor.GetGetter(cursor.Schema["ImageObject"]); - var resizedImageGetter = cursor.GetGetter(cursor.Schema["ImageObjectResized"]); - var pixelsGetter = cursor.GetGetter>(cursor.Schema["Pixels"]); + var imagePathGetter = cursor.GetGetter>(cursor + .Schema["ImagePath"]); + + var nameGetter = cursor.GetGetter>(cursor + .Schema["Name"]); + + var imageObjectGetter = cursor.GetGetter(cursor.Schema[ + "ImageObject"]); + + var resizedImageGetter = cursor.GetGetter(cursor.Schema[ + "ImageObjectResized"]); + + var pixelsGetter = cursor.GetGetter>(cursor.Schema[ + "Pixels"]); + while (cursor.MoveNext()) { @@ -86,8 +107,10 @@ private static void PrintColumns(IDataView transformedData) resizedImageGetter(ref resizedImageObject); pixelsGetter(ref pixels); - Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25} {4, -25}", imagePath, name, - imageObject.PhysicalDimension, resizedImageObject.PhysicalDimension, string.Join(",", pixels.DenseValues().Take(5)) + "..."); + Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25} " + + "{4, -25}", imagePath, name, imageObject.PhysicalDimension, + resizedImageObject.PhysicalDimension, string.Join(",", + pixels.DenseValues().Take(5)) + "..."); } // Dispose the image. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs index 6dba6729e2..6d38dc6e7c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs @@ -11,14 +11,17 @@ public static class LoadImages // Loads the images of the imagesFolder into an IDataView. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Downloading a few images, and an images.tsv file, which contains a list of the files from the dotnet/machinelearning/test/data/images/. - // If you inspect the fileSystem, after running this line, an "images" folder will be created, containing 4 images, and a .tsv file + // Downloading a few images, and an images.tsv file, which contains a + // list of the files from the dotnet/machinelearning/test/data/images/. + // If you inspect the fileSystem, after running this line, an "images" + // folder will be created, containing 4 images, and a .tsv file // enumerating the images. - var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadImages(); + var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils + .DownloadImages(); // Preview of the content of the images.tsv file // @@ -39,7 +42,8 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages("ImageObject", imagesFolder, "ImagePath"); + var pipeline = mlContext.Transforms.LoadImages("ImageObject", + imagesFolder, "ImagePath"); var transformedData = pipeline.Fit(data).Transform(data); @@ -55,18 +59,28 @@ public static void Example() private static void PrintColumns(IDataView transformedData) { // The transformedData IDataView contains the loaded images now. - Console.WriteLine("{0, -25} {1, -25} {2, -25}", "ImagePath", "Name", "ImageObject"); - using (var cursor = transformedData.GetRowCursor(transformedData.Schema)) + Console.WriteLine("{0, -25} {1, -25} {2, -25}", "ImagePath", "Name", + "ImageObject"); + + using (var cursor = transformedData.GetRowCursor(transformedData + .Schema)) { - // Note that it is best to get the getters and values *before* iteration, so as to faciliate buffer - // sharing (if applicable), and column-type validation once, rather than many times. + // Note that it is best to get the getters and values *before* + // iteration, so as to faciliate buffer sharing (if applicable), + // and column-type validation once, rather than many times. ReadOnlyMemory imagePath = default; ReadOnlyMemory name = default; Bitmap imageObject = null; - var imagePathGetter = cursor.GetGetter>(cursor.Schema["ImagePath"]); - var nameGetter = cursor.GetGetter>(cursor.Schema["Name"]); - var imageObjectGetter = cursor.GetGetter(cursor.Schema["ImageObject"]); + var imagePathGetter = cursor.GetGetter>(cursor + .Schema["ImagePath"]); + + var nameGetter = cursor.GetGetter>(cursor + .Schema["Name"]); + + var imageObjectGetter = cursor.GetGetter(cursor.Schema[ + "ImageObject"]); + while (cursor.MoveNext()) { @@ -74,7 +88,8 @@ private static void PrintColumns(IDataView transformedData) nameGetter(ref name); imageObjectGetter(ref imageObject); - Console.WriteLine("{0, -25} {1, -25} {2, -25}", imagePath, name, imageObject.PhysicalDimension); + Console.WriteLine("{0, -25} {1, -25} {2, -25}", imagePath, name, + imageObject.PhysicalDimension); } // Dispose the image. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs index 11b8905c90..696895993f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/ResizeImages.cs @@ -11,14 +11,17 @@ public static class ResizeImages // Example on how to load the images from the file system, and resize them. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Downloading a few images, and an images.tsv file, which contains a list of the files from the dotnet/machinelearning/test/data/images/. - // If you inspect the fileSystem, after running this line, an "images" folder will be created, containing 4 images, and a .tsv file + // Downloading a few images, and an images.tsv file, which contains a + // list of the files from the dotnet/machinelearning/test/data/images/. + // If you inspect the fileSystem, after running this line, an "images" + // folder will be created, containing 4 images, and a .tsv file // enumerating the images. - var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadImages(); + var imagesDataFile = Microsoft.ML.SamplesUtils.DatasetUtils + .DownloadImages(); // Preview of the content of the images.tsv file // @@ -39,8 +42,10 @@ public static void Example() var imagesFolder = Path.GetDirectoryName(imagesDataFile); // Image loading pipeline. - var pipeline = mlContext.Transforms.LoadImages("ImageObject", imagesFolder, "ImagePath") - .Append(mlContext.Transforms.ResizeImages("ImageObjectResized", inputColumnName: "ImageObject", imageWidth: 100, imageHeight: 100)); + var pipeline = mlContext.Transforms.LoadImages("ImageObject", + imagesFolder, "ImagePath") + .Append(mlContext.Transforms.ResizeImages("ImageObjectResized", + inputColumnName: "ImageObject", imageWidth: 100, imageHeight: 100)); var transformedData = pipeline.Fit(data).Transform(data); // The transformedData IDataView contains the resized images now. @@ -57,20 +62,32 @@ public static void Example() private static void PrintColumns(IDataView transformedData) { - Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25}", "ImagePath", "Name", "ImageObject", "ImageObjectResized"); - using (var cursor = transformedData.GetRowCursor(transformedData.Schema)) + Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25}", "ImagePath", + "Name", "ImageObject", "ImageObjectResized"); + + using (var cursor = transformedData.GetRowCursor(transformedData + .Schema)) { - // Note that it is best to get the getters and values *before* iteration, so as to faciliate buffer - // sharing (if applicable), and column-type validation once, rather than many times. + // Note that it is best to get the getters and values *before* + // iteration, so as to faciliate buffer sharing (if applicable), and + // column -type validation once, rather than many times. ReadOnlyMemory imagePath = default; ReadOnlyMemory name = default; Bitmap imageObject = null; Bitmap resizedImageObject = null; - var imagePathGetter = cursor.GetGetter>(cursor.Schema["ImagePath"]); - var nameGetter = cursor.GetGetter>(cursor.Schema["Name"]); - var imageObjectGetter = cursor.GetGetter(cursor.Schema["ImageObject"]); - var resizedImageGetter = cursor.GetGetter(cursor.Schema["ImageObjectResized"]); + var imagePathGetter = cursor.GetGetter>(cursor + .Schema["ImagePath"]); + + var nameGetter = cursor.GetGetter>(cursor + .Schema["Name"]); + + var imageObjectGetter = cursor.GetGetter(cursor.Schema[ + "ImageObject"]); + + var resizedImageGetter = cursor.GetGetter(cursor.Schema[ + "ImageObjectResized"]); + while (cursor.MoveNext()) { imagePathGetter(ref imagePath); @@ -78,8 +95,9 @@ private static void PrintColumns(IDataView transformedData) imageObjectGetter(ref imageObject); resizedImageGetter(ref resizedImageObject); - Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25}", imagePath, name, - imageObject.PhysicalDimension, resizedImageObject.PhysicalDimension); + Console.WriteLine("{0, -25} {1, -25} {2, -25} {3, -25}", + imagePath, name, imageObject.PhysicalDimension, + resizedImageObject.PhysicalDimension); } // Dispose the image.