diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs index 1506fc1abd..fd24a32b39 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs @@ -10,35 +10,43 @@ public static class AveragedPerceptron { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(); + var pipeline = mlContext.BinaryClassification.Trainers + .AveragedPerceptron(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -48,7 +56,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -71,7 +81,9 @@ public static void Example() // Precision || 0.7402 | 0.7061 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -82,13 +94,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -111,11 +128,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs index 748e37be8e..8f31f474e4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs @@ -11,15 +11,17 @@ public static class AveragedPerceptronWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -33,23 +35,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options); + var pipeline = mlContext.BinaryClassification.Trainers + .AveragedPerceptron(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -59,7 +67,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -82,7 +92,9 @@ public static void Example() // Precision || 0.7402 | 0.7061 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -93,13 +105,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -122,11 +139,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude index 6bc5660d46..72dc7cc111 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude @@ -13,63 +13,79 @@ namespace Samples.Dynamic.Trainers.BinaryClassification {<#=Comments#> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); <# } #> <# if (TrainerOptions == null) { #> // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.<#=Trainer#>(); + var pipeline = mlContext.BinaryClassification.Trainers + .<#=Trainer#>(); <# } else { #> // Define trainer options. var options = new <#=TrainerOptions#>; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.<#=Trainer#>(options); + var pipeline = mlContext.BinaryClassification.Trainers + .<#=Trainer#>(options); <# } #> // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); <#=ExpectedOutputPerInstance#> - <# string Evaluator = IsCalibrated ? "Evaluate" : "EvaluateNonCalibrated"; #> + <# string Evaluator = IsCalibrated ? "Evaluate" : + "EvaluateNonCalibrated"; #> // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.<#=Evaluator#>(transformedTestData); + var metrics = mlContext.BinaryClassification + .<#=Evaluator#>(transformedTestData); + PrintMetrics(metrics); <#=ExpectedOutput#> } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -80,13 +96,18 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + <#=DataSepValue#>).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + <#=DataSepValue#>).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -109,11 +130,15 @@ namespace Samples.Dynamic.Trainers.BinaryClassification Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs index 1da43a7790..2b56ef63f0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs @@ -9,26 +9,36 @@ public static class FixedPlatt { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils + .LoadFeaturizedAdultDataset(mlContext); + // Leave out 10% of data for testing. - var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3); + var trainTestData = mlContext.Data + .TrainTestSplit(data, testFraction: 0.3); - // Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it. - var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(); + // Create data training pipeline for non calibrated trainer and train + // Naive calibrator on top of it. + var pipeline = mlContext.BinaryClassification.Trainers + .AveragedPerceptron(); - // Fit the pipeline, and get a transformer that knows how to score new data. + // Fit the pipeline, and get a transformer that knows how to score new + // data. var transformer = pipeline.Fit(trainTestData.TrainSet); // Fit this pipeline to the training data. - // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample - // bears positive sentiment. This estimate is relative to the numbers obtained. + // Let's score the new data. The score will give us a numerical + // estimation of the chance that the particular sample bears positive + // sentiment. This estimate is relative to the numbers obtained. var scoredData = transformer.Transform(trainTestData.TestSet); - var outScores = mlContext.Data.CreateEnumerable(scoredData, reuseRowObject: false); + var outScores = mlContext.Data + .CreateEnumerable(scoredData, reuseRowObject: false); + PrintScore(outScores, 5); // Preview of scoredDataPreview.RowView // Score 4.18144 @@ -37,16 +47,24 @@ public static void Example() // Score -2.554229 // Score 5.36571 - // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer - // that can transform the scored data by adding a new column names "Probability". - var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Platt(slope: -1f, offset: -0.05f); + // Let's train a calibrator estimator on this scored dataset. The + // trained calibrator estimator produces a transformer that can + // transform the scored data by adding a new column names "Probability". + var calibratorEstimator = mlContext.BinaryClassification.Calibrators + .Platt(slope: -1f, offset: -0.05f); + var calibratorTransformer = calibratorEstimator.Fit(scoredData); - // Transform the scored data with a calibrator transfomer by adding a new column names "Probability". - // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval - // representing the chance that the respective sample bears positive sentiment. + // Transform the scored data with a calibrator transfomer by adding a + // new column names "Probability". This column is a calibrated version + // of the "Score" column, meaning its values are a valid probability + // value in the [0, 1] interval representing the chance that the + // respective sample bears positive sentiment. var finalData = calibratorTransformer.Transform(scoredData); - var outScoresAndProbabilities = mlContext.Data.CreateEnumerable(finalData, reuseRowObject: false); + var outScoresAndProbabilities = mlContext.Data + .CreateEnumerable(finalData, + reuseRowObject: false); + PrintScoreAndProbability(outScoresAndProbabilities, 5); // Score 4.18144 Probability 0.9856767 // Score -14.10248 Probability 7.890148E-07 @@ -61,10 +79,13 @@ private static void PrintScore(IEnumerable values, int numRows) Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score); } - private static void PrintScoreAndProbability(IEnumerable values, int numRows) + private static void PrintScoreAndProbability( + IEnumerable values, int numRows) + { foreach (var value in values.Take(numRows)) - Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", value.Score, "Probability", value.Probability); + Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", + value.Score, "Probability", value.Probability); } private class ScoreValue diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs index 15a3162d4e..1b1b63139e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs @@ -9,26 +9,35 @@ public static class Isotonic { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils + .LoadFeaturizedAdultDataset(mlContext); + // Leave out 10% of data for testing. - var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3); + var trainTestData = mlContext.Data + .TrainTestSplit(data, testFraction: 0.3); - // Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it. - var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(); + // Create data training pipeline for non calibrated trainer and train + // Naive calibrator on top of it. + var pipeline = mlContext.BinaryClassification.Trainers + .AveragedPerceptron(); - // Fit the pipeline, and get a transformer that knows how to score new data. + // Fit the pipeline, and get a transformer that knows how to score new + // data. var transformer = pipeline.Fit(trainTestData.TrainSet); // Fit this pipeline to the training data. - // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample - // bears positive sentiment. This estimate is relative to the numbers obtained. + // Let's score the new data. The score will give us a numerical + // estimation of the chance that the particular sample bears positive + // sentiment. This estimate is relative to the numbers obtained. var scoredData = transformer.Transform(trainTestData.TestSet); - var outScores = mlContext.Data.CreateEnumerable(scoredData, reuseRowObject: false); + var outScores = mlContext.Data + .CreateEnumerable(scoredData, reuseRowObject: false); PrintScore(outScores, 5); // Preview of scoredDataPreview.RowView @@ -38,16 +47,24 @@ public static void Example() // Score -2.554229 // Score 5.36571 - // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer - // that can transform the scored data by adding a new column names "Probability". - var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Isotonic(); + // Let's train a calibrator estimator on this scored dataset. The + // trained calibrator estimator produces a transformer that can + // transform the scored data by adding a new column names "Probability". + var calibratorEstimator = mlContext.BinaryClassification.Calibrators + .Isotonic(); + var calibratorTransformer = calibratorEstimator.Fit(scoredData); - // Transform the scored data with a calibrator transfomer by adding a new column names "Probability". - // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval - // representing the chance that the respective sample bears positive sentiment. + // Transform the scored data with a calibrator transfomer by adding a + // new column names "Probability". This column is a calibrated version + // of the "Score" column, meaning its values are a valid probability + // value in the [0, 1] interval representing the chance that the + // respective sample bears positive sentiment. var finalData = calibratorTransformer.Transform(scoredData); - var outScoresAndProbabilities = mlContext.Data.CreateEnumerable(finalData, reuseRowObject: false); + var outScoresAndProbabilities = mlContext.Data + .CreateEnumerable(finalData, + reuseRowObject: false); + PrintScoreAndProbability(outScoresAndProbabilities, 5); // Score 4.18144 Probability 0.8 // Score -14.10248 Probability 1E-15 @@ -62,10 +79,14 @@ private static void PrintScore(IEnumerable values, int numRows) Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score); } - private static void PrintScoreAndProbability(IEnumerable values, int numRows) + private static void PrintScoreAndProbability( + IEnumerable values, int numRows) + { foreach (var value in values.Take(numRows)) - Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", value.Score, "Probability", value.Probability); + Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", + value.Score, "Probability", value.Probability); + } private class ScoreValue diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs index 84a004b1c5..054d4f9e31 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs @@ -9,26 +9,36 @@ public static class Naive { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils + .LoadFeaturizedAdultDataset(mlContext); + // Leave out 10% of data for testing. - var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3); + var trainTestData = mlContext.Data + .TrainTestSplit(data, testFraction: 0.3); - // Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it. - var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(); + // Create data training pipeline for non calibrated trainer and train + // Naive calibrator on top of it. + var pipeline = mlContext.BinaryClassification.Trainers + .AveragedPerceptron(); - // Fit the pipeline, and get a transformer that knows how to score new data. + // Fit the pipeline, and get a transformer that knows how to score new + // data. var transformer = pipeline.Fit(trainTestData.TrainSet); // Fit this pipeline to the training data. - // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample - // bears positive sentiment. This estimate is relative to the numbers obtained. + // Let's score the new data. The score will give us a numerical + // estimation of the chance that the particular sample bears positive + // sentiment. This estimate is relative to the numbers obtained. var scoredData = transformer.Transform(trainTestData.TestSet); - var outScores = mlContext.Data.CreateEnumerable(scoredData, reuseRowObject: false); + var outScores = mlContext.Data + .CreateEnumerable(scoredData, reuseRowObject: false); + PrintScore(outScores, 5); // Preview of scoredDataPreview.RowView // Score 4.18144 @@ -37,16 +47,24 @@ public static void Example() // Score -2.554229 // Score 5.36571 - // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer - // that can transform the scored data by adding a new column names "Probability". - var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Naive(); + // Let's train a calibrator estimator on this scored dataset. The + // trained calibrator estimator produces a transformer that can + // transform the scored data by adding a new column names "Probability". + var calibratorEstimator = mlContext.BinaryClassification.Calibrators + .Naive(); + var calibratorTransformer = calibratorEstimator.Fit(scoredData); - // Transform the scored data with a calibrator transfomer by adding a new column names "Probability". - // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval - // representing the chance that the respective sample bears positive sentiment. + // Transform the scored data with a calibrator transfomer by adding a + // new column names "Probability". This column is a calibrated version + // of the "Score" column, meaning its values are a valid probability + // value in the [0, 1] interval representing the chance that the + // respective sample bears positive sentiment. var finalData = calibratorTransformer.Transform(scoredData); - var outScoresAndProbabilities = mlContext.Data.CreateEnumerable(finalData, reuseRowObject: false); + var outScoresAndProbabilities = mlContext.Data + .CreateEnumerable(finalData, + reuseRowObject: false); + PrintScoreAndProbability(outScoresAndProbabilities, 5); // Score 4.18144 Probability 0.775 // Score -14.10248 Probability 0.01923077 @@ -61,10 +79,14 @@ private static void PrintScore(IEnumerable values, int numRows) Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score); } - private static void PrintScoreAndProbability(IEnumerable values, int numRows) + private static void PrintScoreAndProbability( + IEnumerable values, int numRows) + { foreach (var value in values.Take(numRows)) - Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", value.Score, "Probability", value.Probability); + Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", + value.Score, "Probability", value.Probability); + } private class ScoreValue diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs index aa0d7d0798..709db362e6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs @@ -9,26 +9,36 @@ public static class Platt { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils + .LoadFeaturizedAdultDataset(mlContext); + // Leave out 10% of data for testing. - var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3); + var trainTestData = mlContext.Data + .TrainTestSplit(data, testFraction: 0.3); - // Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it. - var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(); + // Create data training pipeline for non calibrated trainer and train + // Naive calibrator on top of it. + var pipeline = mlContext.BinaryClassification.Trainers + .AveragedPerceptron(); - // Fit the pipeline, and get a transformer that knows how to score new data. + // Fit the pipeline, and get a transformer that knows how to score new + // data. var transformer = pipeline.Fit(trainTestData.TrainSet); // Fit this pipeline to the training data. - // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample - // bears positive sentiment. This estimate is relative to the numbers obtained. + // Let's score the new data. The score will give us a numerical + // estimation of the chance that the particular sample bears positive + // sentiment. This estimate is relative to the numbers obtained. var scoredData = transformer.Transform(trainTestData.TestSet); - var outScores = mlContext.Data.CreateEnumerable(scoredData, reuseRowObject: false); + var outScores = mlContext.Data + .CreateEnumerable(scoredData, reuseRowObject: false); + PrintScore(outScores, 5); // Preview of scoredDataPreview.RowView // Score 4.18144 @@ -37,16 +47,24 @@ public static void Example() // Score -2.554229 // Score 5.36571 - // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer - // that can transform the scored data by adding a new column names "Probability". - var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Platt(); + // Let's train a calibrator estimator on this scored dataset. The + // trained calibrator estimator produces a transformer that can + // transform the scored data by adding a new column names "Probability". + var calibratorEstimator = mlContext.BinaryClassification.Calibrators + .Platt(); + var calibratorTransformer = calibratorEstimator.Fit(scoredData); - // Transform the scored data with a calibrator transfomer by adding a new column names "Probability". - // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval - // representing the chance that the respective sample bears positive sentiment. + // Transform the scored data with a calibrator transfomer by adding a + // new column names "Probability". This column is a calibrated version + // of the "Score" column, meaning its values are a valid probability + // value in the [0, 1] interval representing the chance that the + // respective sample bears positive sentiment. var finalData = calibratorTransformer.Transform(scoredData); - var outScoresAndProbabilities = mlContext.Data.CreateEnumerable(finalData, reuseRowObject: false); + var outScoresAndProbabilities = mlContext.Data + .CreateEnumerable(finalData, + reuseRowObject: false); + PrintScoreAndProbability(outScoresAndProbabilities, 5); // Score 4.18144 Probability 0.8511352 // Score -14.10248 Probability 0.001633563 @@ -61,10 +79,14 @@ private static void PrintScore(IEnumerable values, int numRows) Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score); } - private static void PrintScoreAndProbability(IEnumerable values, int numRows) + private static void PrintScoreAndProbability( + IEnumerable values, int numRows) + { foreach (var value in values.Take(numRows)) - Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", value.Score, "Probability", value.Probability); + Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", + value.Score, "Probability", value.Probability); + } private class ScoreValue diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FactorizationMachine.cs index afc9231814..4e8bec4f4e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FactorizationMachine.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FactorizationMachine.cs @@ -10,41 +10,51 @@ public static class FactorizationMachine { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(); + var pipeline = mlContext.BinaryClassification.Trainers + .FieldAwareFactorizationMachine(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: False @@ -54,7 +64,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -77,7 +89,9 @@ public static void Example() // Precision || 0.9063 | 0.8732 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -88,13 +102,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -117,11 +136,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs index b415ae5dc9..f33feb7063 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs @@ -8,39 +8,48 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FastForest { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.FastForest(); + var pipeline = mlContext.BinaryClassification.Trainers + .FastForest(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -50,7 +59,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -73,7 +84,9 @@ public static void Example() // Precision || 0.6182 | 0.5416 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -84,13 +97,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -113,12 +131,17 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs index 372be0f1fa..c5bf4d5366 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FastForestWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -36,23 +39,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.FastForest(options); + var pipeline = mlContext.BinaryClassification.Trainers + .FastForest(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -62,7 +71,9 @@ public static void Example() // Label: False, Prediction: True // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -85,7 +96,9 @@ public static void Example() // Precision || 0.7072 | 0.7806 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -96,13 +109,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -125,12 +143,17 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs index d260ec8666..f50b8f9732 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs @@ -8,39 +8,48 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FastTree { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.FastTree(); + var pipeline = mlContext.BinaryClassification.Trainers + .FastTree(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -50,7 +59,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -76,7 +87,9 @@ public static void Example() // Precision || 0.6903 | 0.7716 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -87,13 +100,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -116,12 +134,17 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs index b02e9b977b..87e894d903 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FastTreeWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -36,23 +39,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.FastTree(options); + var pipeline = mlContext.BinaryClassification.Trainers + .FastTree(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -62,7 +71,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -88,7 +99,9 @@ public static void Example() // Precision || 0.6903 | 0.7716 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -99,13 +112,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -128,12 +146,17 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs index bc2eaef343..bddc926ccf 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs @@ -8,28 +8,36 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FieldAwareFactorizationMachine { - // This example first train a field-aware factorization to binary classification, measure the trained model's quality, and finally + // This example first train a field-aware factorization to binary + // classification, measure the trained model's quality, and finally // use the trained model to make prediction. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. IEnumerable data = GenerateRandomDataPoints(500); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(data); // Define the trainer. - // This trainer trains field-aware factorization (FFM) for binary classification. See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf - // for the theory behind and https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the training - // algorithm implemented in ML.NET. - var pipeline = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine( + // This trainer trains field-aware factorization (FFM) + // for binary classification. + // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory + // behind and + // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the + // training algorithm implemented in ML.NET. + var pipeline = mlContext.BinaryClassification.Trainers + .FieldAwareFactorizationMachine( // Specify three feature columns! - new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1), nameof(DataPoint.Field2) }, + new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1), + nameof(DataPoint.Field2) }, // Specify binary label's column name. nameof(DataPoint.Label) ); @@ -40,7 +48,8 @@ public static void Example() var transformedTrainingData = model.Transform(trainingData); // Measure the quality of the trained model. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTrainingData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTrainingData); // Show the quality metrics. PrintMetrics(metrics); @@ -68,14 +77,19 @@ public static void Example() // Precision || 0.7878 | 0.8235 | // Create prediction function from the trained model. - var engine = mlContext.Model.CreatePredictionEngine(model); + var engine = mlContext.Model + .CreatePredictionEngine(model); // Make some predictions. foreach(var dataPoint in data.Take(5)) { var result = engine.Predict(dataPoint); - Console.WriteLine($"Actual label: {dataPoint.Label}, predicted label: {result.PredictedLabel}, " + - $"score of being positive class: {result.Score}, and probability of beling positive class: {result.Probability}."); + Console.WriteLine($"Actual label: {dataPoint.Label}, " + + $"predicted label: {result.PredictedLabel}, " + + $"score of being positive class: {result.Score}, " + + $"and probability of beling positive class: " + + $"{result.Probability}."); + } // Expected output: @@ -95,7 +109,8 @@ private class DataPoint // Label. public bool Label { get; set; } - // Features from the first field. Note that different fields can have different numbers of features. + // Features from the first field. Note that different fields can have + // different numbers of features. [VectorType(featureLength)] public float[] Field0 { get; set; } @@ -108,8 +123,8 @@ private class DataPoint public float[] Field2 { get; set; } } - // This class defines objects produced by trained model. The trained model maps - // a DataPoint to a Result. + // This class defines objects produced by trained model. The trained model + // maps a DataPoint to a Result. public class Result { // Label. @@ -123,13 +138,16 @@ public class Result } // Function used to create toy data sets. - private static IEnumerable GenerateRandomDataPoints(int exampleCount, int seed = 0) + private static IEnumerable GenerateRandomDataPoints( + int exampleCount, int seed = 0) + { var rnd = new Random(seed); var data = new List(); for (int i = 0; i < exampleCount; ++i) { - // Initialize an example with a random label and an empty feature vector. + // Initialize an example with a random label and an empty feature + // vector. var sample = new DataPoint() { Label = rnd.Next() % 2 == 0, @@ -139,9 +157,10 @@ private static IEnumerable GenerateRandomDataPoints(int exampleCount, }; // Fill feature vectors according the assigned label. - // Notice that features from different fields have different biases and therefore different distributions. - // In practices such as game recommendation, one may use one field to store features from user profile and - // another field to store features from game profile. + // Notice that features from different fields have different biases + // and therefore different distributions. In practices such as game + // recommendation, one may use one field to store features from user + // profile and another field to store features from game profile. for (int j = 0; j < featureLength; ++j) { var value0 = (float)rnd.NextDouble(); @@ -169,14 +188,20 @@ private static IEnumerable GenerateRandomDataPoints(int exampleCount, } // Function used to show evaluation metrics such as accuracy of predictions. - private static void PrintMetrics(CalibratedBinaryClassificationMetrics metrics) + private static void PrintMetrics( + CalibratedBinaryClassificationMetrics metrics) + { Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.tt index 3f41aa671f..22cdd45721 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.tt @@ -3,19 +3,24 @@ string ClassName="FieldAwareFactorizationMachine"; string Trainer = @"FieldAwareFactorizationMachine( // Specify three feature columns! - new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1), nameof(DataPoint.Field2) }, + new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1), + nameof(DataPoint.Field2) }, // Specify binary label's column name. nameof(DataPoint.Label) )"; string OptionsInclude = null; string Comments = @" - // This example first train a field-aware factorization to binary classification, measure the trained model's quality, and finally + // This example first train a field-aware factorization to binary + // classification, measure the trained model's quality, and finally // use the trained model to make prediction."; -string TrainerDescription = @"// This trainer trains field-aware factorization (FFM) for binary classification. See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf - // for the theory behind and https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the training - // algorithm implemented in ML.NET."; +string TrainerDescription = @"// This trainer trains field-aware factorization (FFM) + // for binary classification. + // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory + // behind and + // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the + // training algorithm implemented in ML.NET."; string TrainerOptions = null; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs index 47e53dfdc3..08b48a9e74 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs @@ -9,26 +9,31 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FieldAwareFactorizationMachineWithOptions { - // This example first train a field-aware factorization to binary classification, measure the trained model's quality, and finally + // This example first train a field-aware factorization to binary + // classification, measure the trained model's quality, and finally // use the trained model to make prediction. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. IEnumerable data = GenerateRandomDataPoints(500); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(data); // Define trainer options. var options = new FieldAwareFactorizationMachineTrainer.Options { FeatureColumnName = nameof(DataPoint.Field0), - ExtraFeatureColumns = new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) }, + ExtraFeatureColumns = + new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) }, + LabelColumnName = nameof(DataPoint.Label), LambdaLatent = 0.01f, LambdaLinear = 0.001f, @@ -38,10 +43,14 @@ public static void Example() }; // Define the trainer. - // This trainer trains field-aware factorization (FFM) for binary classification. See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf - // for the theory behind and https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the training - // algorithm implemented in ML.NET. - var pipeline = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(options); + // This trainer trains field-aware factorization (FFM) + // for binary classification. + // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory + // behind and + // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the + // training algorithm implemented in ML.NET. + var pipeline = mlContext.BinaryClassification.Trainers + .FieldAwareFactorizationMachine(options); // Train the model. var model = pipeline.Fit(trainingData); @@ -50,7 +59,8 @@ public static void Example() var transformedTrainingData = model.Transform(trainingData); // Measure the quality of the trained model. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTrainingData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTrainingData); // Show the quality metrics. PrintMetrics(metrics); @@ -78,14 +88,19 @@ public static void Example() // Precision || 0.7425 | 0.8319 | // Create prediction function from the trained model. - var engine = mlContext.Model.CreatePredictionEngine(model); + var engine = mlContext.Model + .CreatePredictionEngine(model); // Make some predictions. foreach(var dataPoint in data.Take(5)) { var result = engine.Predict(dataPoint); - Console.WriteLine($"Actual label: {dataPoint.Label}, predicted label: {result.PredictedLabel}, " + - $"score of being positive class: {result.Score}, and probability of beling positive class: {result.Probability}."); + Console.WriteLine($"Actual label: {dataPoint.Label}, " + + $"predicted label: {result.PredictedLabel}, " + + $"score of being positive class: {result.Score}, " + + $"and probability of beling positive class: " + + $"{result.Probability}."); + } // Expected output: @@ -105,7 +120,8 @@ private class DataPoint // Label. public bool Label { get; set; } - // Features from the first field. Note that different fields can have different numbers of features. + // Features from the first field. Note that different fields can have + // different numbers of features. [VectorType(featureLength)] public float[] Field0 { get; set; } @@ -118,8 +134,8 @@ private class DataPoint public float[] Field2 { get; set; } } - // This class defines objects produced by trained model. The trained model maps - // a DataPoint to a Result. + // This class defines objects produced by trained model. The trained model + // maps a DataPoint to a Result. public class Result { // Label. @@ -133,13 +149,16 @@ public class Result } // Function used to create toy data sets. - private static IEnumerable GenerateRandomDataPoints(int exampleCount, int seed = 0) + private static IEnumerable GenerateRandomDataPoints( + int exampleCount, int seed = 0) + { var rnd = new Random(seed); var data = new List(); for (int i = 0; i < exampleCount; ++i) { - // Initialize an example with a random label and an empty feature vector. + // Initialize an example with a random label and an empty feature + // vector. var sample = new DataPoint() { Label = rnd.Next() % 2 == 0, @@ -149,9 +168,10 @@ private static IEnumerable GenerateRandomDataPoints(int exampleCount, }; // Fill feature vectors according the assigned label. - // Notice that features from different fields have different biases and therefore different distributions. - // In practices such as game recommendation, one may use one field to store features from user profile and - // another field to store features from game profile. + // Notice that features from different fields have different biases + // and therefore different distributions. In practices such as game + // recommendation, one may use one field to store features from user + // profile and another field to store features from game profile. for (int j = 0; j < featureLength; ++j) { var value0 = (float)rnd.NextDouble(); @@ -179,14 +199,20 @@ private static IEnumerable GenerateRandomDataPoints(int exampleCount, } // Function used to show evaluation metrics such as accuracy of predictions. - private static void PrintMetrics(CalibratedBinaryClassificationMetrics metrics) + private static void PrintMetrics( + CalibratedBinaryClassificationMetrics metrics) + { Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.tt index c3de9d1882..0c7e32048d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.tt @@ -6,17 +6,23 @@ string Trainer = "FieldAwareFactorizationMachine"; string OptionsInclude = @"using Microsoft.ML.Trainers;"; string Comments = @" - // This example first train a field-aware factorization to binary classification, measure the trained model's quality, and finally + // This example first train a field-aware factorization to binary + // classification, measure the trained model's quality, and finally // use the trained model to make prediction."; -string TrainerDescription = @"// This trainer trains field-aware factorization (FFM) for binary classification. See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf - // for the theory behind and https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the training - // algorithm implemented in ML.NET."; +string TrainerDescription = @"// This trainer trains field-aware factorization (FFM) + // for binary classification. + // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory + // behind and + // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the + // training algorithm implemented in ML.NET."; string TrainerOptions = @"FieldAwareFactorizationMachineTrainer.Options { FeatureColumnName = nameof(DataPoint.Field0), - ExtraFeatureColumns = new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) }, + ExtraFeatureColumns = + new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) }, + LabelColumnName = nameof(DataPoint.Label), LambdaLatent = 0.01f, LambdaLinear = 0.001f, diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs index 3b9c36644f..d4ce855e3c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs @@ -7,12 +7,14 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class Gam { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. var mlContext = new MLContext(); // Create the dataset. @@ -27,30 +29,36 @@ public static void Example() var validSet = dataSets.TestSet; // Create a GAM trainer. - // Use a small number of bins for this example. The setting below means for each feature, - // we divide its range into 16 discrete regions for the training process. Note that these - // regions are not evenly spaced, and that the final model may contain fewer bins, as - // neighboring bins with identical values will be combined. In general, we recommend using - // at least the default number of bins, as a small number of bins limits the capacity of - // the model. - var trainer = mlContext.BinaryClassification.Trainers.Gam(maximumBinCountPerFeature: 16); - - // Fit the model using both of training and validation sets. GAM can use a technique called - // pruning to tune the model to the validation set after training to improve generalization. + // Use a small number of bins for this example. The setting below means + // for each feature, we divide its range into 16 discrete regions for + // the training process. Note that these regions are not evenly spaced, + // and that the final model may contain fewer bins, as neighboring bins + // with identical values will be combined. In general, we recommend + // using at least the default number of bins, as a small number of bins + // limits the capacity of the model. + var trainer = mlContext.BinaryClassification.Trainers + .Gam(maximumBinCountPerFeature: 16); + + // Fit the model using both of training and validation sets. GAM can use + // a technique called pruning to tune the model to the validation set + // after training to improve generalization. var model = trainer.Fit(trainSet, validSet); // Extract the model parameters. var gam = model.Model.SubModel; - // Now we can inspect the parameters of the Generalized Additive Model to understand the fit - // and potentially learn about our dataset. - // First, we will look at the bias; the bias represents the average prediction for the training data. + // Now we can inspect the parameters of the Generalized Additive Model + // to understand the fit and potentially learn about our dataset. First, + // we will look at the bias; the bias represents the average prediction + // for the training data. Console.WriteLine($"Average prediction: {gam.Bias:0.00}"); - // Now look at the shape functions that the model has learned. Similar to a linear model, we have - // one response per feature, and they are independent. Unlike a linear model, this response is a - // generic function instead of a line. Because we have included a bias term, each feature response - // represents the deviation from the average prediction as a function of the feature value. + // Now look at the shape functions that the model has learned. Similar + // to a linear model, we have one response per feature, and they are + // independent. Unlike a linear model, this response is a generic + // function instead of a line. Because we have included a bias term, + // each feature response represents the deviation from the average + // prediction as a function of the feature value. for (int i = 0; i < gam.NumberOfShapeFunctions; i++) { // Break a line. @@ -62,11 +70,14 @@ public static void Example() // Get the bin effects; these are the function values for each bin. var binEffects = gam.GetBinEffects(i); - // Now, write the function to the console. The function is a set of bins, and the corresponding - // function values. You can think of GAMs as building a bar-chart or lookup table for each feature. + // Now, write the function to the console. The function is a set of + // bins, and the corresponding function values. You can think of + // GAMs as building a bar-chart or lookup table for each feature. Console.WriteLine($"Feature{i}"); for (int j = 0; j < binUpperBounds.Count; j++) - Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}"); + Console.WriteLine( + $"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}"); + } // Expected output: @@ -91,18 +102,23 @@ public static void Example() // x < 0.31 => -0.138 // x < ∞ => -0.188 - // Let's consider this output. To score a given example, we look up the first bin where the inequality - // is satisfied for the feature value. We can look at the whole function to get a sense for how the - // model responds to the variable on a global level. - // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average - // expected output over the training set. Very few bins are used to model the second feature because the GAM model - // discards unchanged bins to create smaller models. - // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be - // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use - // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is - // real or just sampling noise. See for example: - // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model - // Distillation." arXiv:1710.06169." + // Let's consider this output. To score a given example, we look up the + // first bin where the inequality is satisfied for the feature value. + // We can look at the whole function to get a sense for how the model + // responds to the variable on a global level.The model can be seen to + // reconstruct the parabolic and step-wise function, shifted with + // respect to the average expected output over the training set. + // Very few bins are used to model the second feature because the GAM + // model discards unchanged bins to create smaller models. One last + // thing to notice is that these feature functions can be noisy. While + // we know that Feature1 should be symmetric, this is not captured in + // the model. This is due to noise in the data. Common practice is to + // use resampling methods to estimate a confidence interval at each bin. + // This will help to determine if the effect is real or just sampling + // noise. See for example: Tan, Caruana, Hooker, and Lou. + // "Distill-and-Compare: Auditing Black-Box Models Using Transparent + // Model Distillation." + // arXiv:1710.06169." } private class Data @@ -114,13 +130,17 @@ private class Data } /// - /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0, - /// while Feature2 is a simple piecewise function. + /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. + /// Feature1 is a parabola centered around 0, while Feature2 is a simple + /// piecewise function. /// /// The number of examples to generate. - /// The seed for the random number generator used to produce data. + /// The seed for the random number generator used to + /// produce data. /// - private static IEnumerable GenerateData(int numExamples = 25000, int seed = 1) + private static IEnumerable GenerateData(int numExamples = 25000, + int seed = 1) + { var rng = new Random(seed); float centeredFloat() => (float)(rng.NextDouble() - 0.5); @@ -131,7 +151,8 @@ private static IEnumerable GenerateData(int numExamples = 25000, int seed Features = new float[2] { centeredFloat(), centeredFloat() } }; // Compute the label from the shape functions and add noise. - data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5; + data.Label = Sigmoid(Parabola(data.Features[0]) + + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5; yield return data; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs index e4a408a3ae..cdacd51b93 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs @@ -8,12 +8,14 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class GamWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. var mlContext = new MLContext(); // Create the dataset. @@ -28,14 +30,15 @@ public static void Example() var validSet = dataSets.TestSet; // Create a GAM trainer. - // Use a small number of bins for this example. The setting below means for each feature, - // we divide its range into 16 discrete regions for the training process. Note that these - // regions are not evenly spaced, and that the final model may contain fewer bins, as - // neighboring bins with identical values will be combined. In general, we recommend using - // at least the default number of bins, as a small number of bins limits the capacity of - // the model. - // Also, set the learning rate to half the default to slow down the gradient descent, and - // double the number of iterations to compensate. + // Use a small number of bins for this example. The setting below means + // for each feature, we divide its range into 16 discrete regions for + // the training process. Note that these regions are not evenly spaced, + // and that the final model may contain fewer bins, as neighboring bins + // with identical values will be combined. In general, we recommend + // using at least the default number of bins, as a small number of bins + // limits the capacity of the model. Also, set the learning rate to half + // the default to slow down the gradient descent, and double the number + // of iterations to compensate. var trainer = mlContext.BinaryClassification.Trainers.Gam( new GamBinaryTrainer.Options { NumberOfIterations = 19000, @@ -43,22 +46,26 @@ public static void Example() LearningRate = 0.001 }); - // Fit the model using both of training and validation sets. GAM can use a technique called - // pruning to tune the model to the validation set after training to improve generalization. + // Fit the model using both of training and validation sets. GAM can use + // a technique called pruning to tune the model to the validation set + // after training to improve generalization. var model = trainer.Fit(trainSet, validSet); // Extract the model parameters. var gam = model.Model.SubModel; - // Now we can inspect the parameters of the Generalized Additive Model to understand the fit - // and potentially learn about our dataset. - // First, we will look at the bias; the bias represents the average prediction for the training data. + // Now we can inspect the parameters of the Generalized Additive Model + // to understand the fit and potentially learn about our dataset. First, + // we will look at the bias; the bias represents the average prediction + // for the training data. Console.WriteLine($"Average prediction: {gam.Bias:0.00}"); - // Now look at the shape functions that the model has learned. Similar to a linear model, we have - // one response per feature, and they are independent. Unlike a linear model, this response is a - // generic function instead of a line. Because we have included a bias term, each feature response - // represents the deviation from the average prediction as a function of the feature value. + // Now look at the shape functions that the model has learned. Similar + // to a linear model, we have one response per feature, and they are + // independent. Unlike a linear model, this response is a generic + // function instead of a line. Because we have included a bias term, + // each feature response represents the deviation from the average + // prediction as a function of the feature value. for (int i = 0; i < gam.NumberOfShapeFunctions; i++) { // Break a line. @@ -70,11 +77,13 @@ public static void Example() // Get the bin effects; these are the function values for each bin. var binEffects = gam.GetBinEffects(i); - // Now, write the function to the console. The function is a set of bins, and the corresponding - // function values. You can think of GAMs as building a bar-chart or lookup table for each feature. + // Now, write the function to the console. The function is a set of + // bins, and the corresponding function values. You can think of + // GAMs as building a bar-chart or lookup table for each feature. Console.WriteLine($"Feature{i}"); for (int j = 0; j < binUpperBounds.Count; j++) - Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}"); + Console.WriteLine( + $"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}"); } // Expected output: @@ -99,18 +108,23 @@ public static void Example() // x < 0.31 => -0.138 // x < ∞ => -0.188 - // Let's consider this output. To score a given example, we look up the first bin where the inequality - // is satisfied for the feature value. We can look at the whole function to get a sense for how the - // model responds to the variable on a global level. - // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average - // expected output over the training set. Very few bins are used to model the second feature because the GAM model - // discards unchanged bins to create smaller models. - // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be - // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use - // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is - // real or just sampling noise. See for example: - // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model - // Distillation." arXiv:1710.06169." + // Let's consider this output. To score a given example, we look up the + // first bin where the inequality is satisfied for the feature value. + // We can look at the whole function to get a sense for how the model + // responds to the variable on a global level. The model can be seen to + // reconstruct the parabolic and step-wise function, shifted with + // respect to the average expected output over the training set. Very + // few bins are used to model the second feature because the GAM model + // discards unchanged bins to create smaller models.One last thing to + // notice is that these feature functions can be noisy. While we know + // that Feature1 should be symmetric, this is not captured in the model. + // This is due to noise in the data. Common practice is to use + // resampling methods to estimate a confidence interval at each bin. + // This will help to determine if the effect is real or just sampling + // noise. See for example: Tan, Caruana, Hooker, and Lou. + // "Distill-and-Compare: Auditing Black-Box Models Using Transparent + // Model Distillation." + // arXiv:1710.06169." } private class Data @@ -122,13 +136,17 @@ private class Data } /// - /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0, - /// while Feature2 is a simple piecewise function. + /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. + /// Feature1 is a parabola centered around 0, while Feature2 is a simple + /// piecewise function. /// /// The number of examples to generate. - /// The seed for the random number generator used to produce data. + /// The seed for the random number generator used to + /// produce data. /// - private static IEnumerable GenerateData(int numExamples = 25000, int seed = 1) + private static IEnumerable GenerateData(int numExamples = 25000, + int seed = 1) + { var rng = new Random(seed); float centeredFloat() => (float)(rng.NextDouble() - 0.5); @@ -140,7 +158,8 @@ private static IEnumerable GenerateData(int numExamples = 25000, int seed Features = new float[2] { centeredFloat(), centeredFloat() } }; // Compute the label from the shape functions and add noise. - data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5; + data.Label = Sigmoid(Parabola(data.Features[0]) + + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5; yield return data; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs index e762da494b..5aece5a264 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs @@ -10,35 +10,43 @@ public static class LbfgsLogisticRegression { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(); + var pipeline = mlContext.BinaryClassification.Trainers + .LbfgsLogisticRegression(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -48,7 +56,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -74,7 +84,9 @@ public static void Example() // Precision || 0.8583 | 0.8972 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -85,13 +97,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -114,11 +131,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs index 4204f0c4c2..fe2fcb14ab 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs @@ -11,15 +11,17 @@ public static class LbfgsLogisticRegressionWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -31,23 +33,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(options); + var pipeline = mlContext.BinaryClassification.Trainers + .LbfgsLogisticRegression(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -57,7 +65,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -83,7 +93,9 @@ public static void Example() // Precision || 0.8571 | 0.8902 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -94,13 +106,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -123,11 +140,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs index 4bf27d017a..c89a4b2b2c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs @@ -8,39 +8,48 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class LightGbm { - // This example requires installation of additional nuget package - // Microsoft.ML.LightGbm. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(); + var pipeline = mlContext.BinaryClassification.Trainers + .LightGbm(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -50,7 +59,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -73,7 +84,9 @@ public static void Example() // Precision || 0.7531 | 0.7860 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -84,13 +97,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -113,11 +131,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.tt index 8bef899d83..2de4012354 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.tt @@ -10,8 +10,9 @@ string LabelThreshold = "0.5f"; string DataSepValue = "0.03f"; string OptionsInclude = ""; string Comments= @" - // This example requires installation of additional nuget package - // Microsoft.ML.LightGbm."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ExpectedOutputPerInstance = @"// Expected output: // Label: True, Prediction: True diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs index 82bdace764..5df0a59ee5 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class LightGbmWithOptions { - // This example requires installation of additional nuget package - // Microsoft.ML.LightGbm. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -35,23 +38,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(options); + var pipeline = mlContext.BinaryClassification.Trainers + .LightGbm(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -61,7 +70,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -84,7 +95,9 @@ public static void Example() // Precision || 0.6563 | 0.7131 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -95,13 +108,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -124,11 +142,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.tt index b193e17b54..3488a6178e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.tt @@ -9,8 +9,9 @@ string LabelThreshold = "0.5f"; string DataSepValue = "0.03f"; string OptionsInclude = "using Microsoft.ML.Trainers.LightGbm;"; string Comments= @" - // This example requires installation of additional nuget package - // Microsoft.ML.LightGbm."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string TrainerOptions = @"LightGbmBinaryTrainer.Options { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LinearSvm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LinearSvm.cs index 80c58da58a..f47279c331 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LinearSvm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LinearSvm.cs @@ -10,35 +10,43 @@ public static class LinearSvm { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.LinearSvm(); + var pipeline = mlContext.BinaryClassification.Trainers + .LinearSvm(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -48,7 +56,9 @@ public static void Example() // Label: False, Prediction: True // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -71,7 +81,9 @@ public static void Example() // Precision || 0.6624 | 0.8387 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -82,13 +94,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -111,11 +128,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LinearSvmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LinearSvmWithOptions.cs index 293769438b..857ac93439 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LinearSvmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LinearSvmWithOptions.cs @@ -11,15 +11,17 @@ public static class LinearSvmWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -31,23 +33,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.LinearSvm(options); + var pipeline = mlContext.BinaryClassification.Trainers + .LinearSvm(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -57,7 +65,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -80,7 +90,9 @@ public static void Example() // Precision || 0.8044 | 0.9127 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -91,13 +103,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -120,11 +137,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/MultipleFeatureColumnsBinaryClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/MultipleFeatureColumnsBinaryClassification.ttinclude index ace4bdec1d..d6f7e53a24 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/MultipleFeatureColumnsBinaryClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/MultipleFeatureColumnsBinaryClassification.ttinclude @@ -13,28 +13,32 @@ namespace Samples.Dynamic.Trainers.BinaryClassification {<#=Comments#> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. IEnumerable data = GenerateRandomDataPoints(500); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(data); <# if (TrainerOptions == null) { #> // Define the trainer. <#=TrainerDescription#> - var pipeline = mlContext.BinaryClassification.Trainers.<#=Trainer#>; + var pipeline = mlContext.BinaryClassification.Trainers + .<#=Trainer#>; <# } else { #> // Define trainer options. var options = new <#=TrainerOptions#>; // Define the trainer. <#=TrainerDescription#> - var pipeline = mlContext.BinaryClassification.Trainers.<#=Trainer#>(options); + var pipeline = mlContext.BinaryClassification.Trainers + .<#=Trainer#>(options); <# } #> // Train the model. @@ -44,7 +48,8 @@ namespace Samples.Dynamic.Trainers.BinaryClassification var transformedTrainingData = model.Transform(trainingData); // Measure the quality of the trained model. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTrainingData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTrainingData); // Show the quality metrics. PrintMetrics(metrics); @@ -52,14 +57,19 @@ namespace Samples.Dynamic.Trainers.BinaryClassification <#=ExpectedOutput#> // Create prediction function from the trained model. - var engine = mlContext.Model.CreatePredictionEngine(model); + var engine = mlContext.Model + .CreatePredictionEngine(model); // Make some predictions. foreach(var dataPoint in data.Take(5)) { var result = engine.Predict(dataPoint); - Console.WriteLine($"Actual label: {dataPoint.Label}, predicted label: {result.PredictedLabel}, " + - $"score of being positive class: {result.Score}, and probability of beling positive class: {result.Probability}."); + Console.WriteLine($"Actual label: {dataPoint.Label}, " + + $"predicted label: {result.PredictedLabel}, " + + $"score of being positive class: {result.Score}, " + + $"and probability of beling positive class: " + + $"{result.Probability}."); + } <#=ExpectedOutputPerInstance#> @@ -74,7 +84,8 @@ namespace Samples.Dynamic.Trainers.BinaryClassification // Label. public bool Label { get; set; } - // Features from the first field. Note that different fields can have different numbers of features. + // Features from the first field. Note that different fields can have + // different numbers of features. [VectorType(featureLength)] public float[] Field0 { get; set; } @@ -87,8 +98,8 @@ namespace Samples.Dynamic.Trainers.BinaryClassification public float[] Field2 { get; set; } } - // This class defines objects produced by trained model. The trained model maps - // a DataPoint to a Result. + // This class defines objects produced by trained model. The trained model + // maps a DataPoint to a Result. public class Result { // Label. @@ -102,13 +113,16 @@ namespace Samples.Dynamic.Trainers.BinaryClassification } // Function used to create toy data sets. - private static IEnumerable GenerateRandomDataPoints(int exampleCount, int seed = 0) + private static IEnumerable GenerateRandomDataPoints( + int exampleCount, int seed = 0) + { var rnd = new Random(seed); var data = new List(); for (int i = 0; i < exampleCount; ++i) { - // Initialize an example with a random label and an empty feature vector. + // Initialize an example with a random label and an empty feature + // vector. var sample = new DataPoint() { Label = rnd.Next() % 2 == 0, @@ -118,9 +132,10 @@ namespace Samples.Dynamic.Trainers.BinaryClassification }; // Fill feature vectors according the assigned label. - // Notice that features from different fields have different biases and therefore different distributions. - // In practices such as game recommendation, one may use one field to store features from user profile and - // another field to store features from game profile. + // Notice that features from different fields have different biases + // and therefore different distributions. In practices such as game + // recommendation, one may use one field to store features from user + // profile and another field to store features from game profile. for (int j = 0; j < featureLength; ++j) { var value0 = (float)rnd.NextDouble(); @@ -148,14 +163,20 @@ namespace Samples.Dynamic.Trainers.BinaryClassification } // Function used to show evaluation metrics such as accuracy of predictions. - private static void PrintMetrics(CalibratedBinaryClassificationMetrics metrics) + private static void PrintMetrics( + CalibratedBinaryClassificationMetrics metrics) + { Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs index 55d6c54cc4..f8db811758 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs @@ -9,8 +9,9 @@ public static class PermutationFeatureImportance { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. var mlContext = new MLContext(seed:1); // Create sample data. @@ -19,12 +20,15 @@ public static void Example() // Load the sample data as an IDataView. var data = mlContext.Data.LoadFromEnumerable(samples); - // Define a training pipeline that concatenates features into a vector, normalizes them, and then - // trains a linear model. - var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; - var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) - .Append(mlContext.Transforms.NormalizeMinMax("Features")) - .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression()); + // Define a training pipeline that concatenates features into a vector, + // normalizes them, and then trains a linear model. + var featureColumns = + new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; + var pipeline = mlContext.Transforms + .Concatenate("Features", featureColumns) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) + .Append(mlContext.BinaryClassification.Trainers + .SdcaLogisticRegression()); // Fit the pipeline to the data. var model = pipeline.Fit(data); @@ -35,17 +39,22 @@ public static void Example() // Extract the predictor. var linearPredictor = model.LastTransformer; - // Compute the permutation metrics for the linear model using the normalized data. - var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance( - linearPredictor, transformedData, permutationCount: 30); - - // Now let's look at which features are most important to the model overall. - // Get the feature indices sorted by their impact on AUC. - var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.AreaUnderRocCurve}) - .OrderByDescending(feature => Math.Abs(feature.AreaUnderRocCurve.Mean)) + // Compute the permutation metrics for the linear model using the + // normalized data. + var permutationMetrics = mlContext.BinaryClassification + .PermutationFeatureImportance(linearPredictor, transformedData, + permutationCount: 30); + + // Now let's look at which features are most important to the model + // overall. Get the feature indices sorted by their impact on AUC. + var sortedIndices = permutationMetrics + .Select((metrics, index) => new { index, metrics.AreaUnderRocCurve}) + .OrderByDescending( + feature => Math.Abs(feature.AreaUnderRocCurve.Mean)) .Select(feature => feature.index); - Console.WriteLine("Feature\tModel Weight\tChange in AUC\t95% Confidence in the Mean Change in AUC"); + Console.WriteLine("Feature\tModel Weight\tChange in AUC" + + "\t95% Confidence in the Mean Change in AUC"); var auc = permutationMetrics.Select(x => x.AreaUnderRocCurve).ToArray(); foreach (int i in sortedIndices) { @@ -76,10 +85,14 @@ private class Data /// linear combination of the features. /// /// The number of examples. - /// The bias, or offset, in the calculation of the label. - /// The weight to multiply the first feature with to compute the label. - /// The weight to multiply the second feature with to compute the label. - /// The seed for generating feature values and label noise. + /// The bias, or offset, in the calculation of the label. + /// + /// The weight to multiply the first feature with to + /// compute the label. + /// The weight to multiply the second feature with to + /// compute the label. + /// The seed for generating feature values and label + /// noise. /// An enumerable of Data objects. private static IEnumerable GenerateData(int nExamples = 10000, double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) @@ -94,7 +107,9 @@ private static IEnumerable GenerateData(int nExamples = 10000, }; // Create a noisy label. - var value = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + var value = (float)(bias + weight1 * data.Feature1 + weight2 * + data.Feature2 + rng.NextDouble() - 0.5); + data.Label = Sigmoid(value) > 0.5; yield return data; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainer.cs index ed9949a6ee..be54597f13 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainer.cs @@ -10,35 +10,43 @@ public static class Prior { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.Prior(); + var pipeline = mlContext.BinaryClassification.Trainers + .Prior(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -48,7 +56,9 @@ public static void Example() // Label: False, Prediction: True // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -71,7 +81,9 @@ public static void Example() // Precision || 0.6840 | 0.0000 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -82,13 +94,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.3f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.3f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -111,11 +128,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaLogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaLogisticRegression.cs index f3e56fc24f..521c6e671f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaLogisticRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaLogisticRegression.cs @@ -10,41 +10,51 @@ public static class SdcaLogisticRegression { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(); + var pipeline = mlContext.BinaryClassification.Trainers + .SdcaLogisticRegression(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -54,7 +64,9 @@ public static void Example() // Label: False, Prediction: True // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -77,7 +89,9 @@ public static void Example() // Precision || 0.6210 | 0.6667 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -88,13 +102,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -117,11 +136,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaLogisticRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaLogisticRegressionWithOptions.cs index 8e564fb8e3..9ef70a2193 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaLogisticRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaLogisticRegressionWithOptions.cs @@ -11,20 +11,24 @@ public static class SdcaLogisticRegressionWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); @@ -40,23 +44,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(options); + var pipeline = mlContext.BinaryClassification.Trainers + .SdcaLogisticRegression(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -66,7 +76,9 @@ public static void Example() // Label: False, Prediction: True // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -89,7 +101,9 @@ public static void Example() // Precision || 0.5957 | 0.6726 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -100,13 +114,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -129,11 +148,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaNonCalibrated.cs index 5127c84b28..73107bb4d7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaNonCalibrated.cs @@ -10,41 +10,51 @@ public static class SdcaNonCalibrated { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SdcaNonCalibrated(); + var pipeline = mlContext.BinaryClassification.Trainers + .SdcaNonCalibrated(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: True @@ -54,7 +64,9 @@ public static void Example() // Label: False, Prediction: True // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -76,7 +88,9 @@ public static void Example() // Precision || 0.6185 | 0.6653 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -87,13 +101,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -116,11 +135,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaNonCalibratedWithOptions.cs index 6d55a0286e..f08d65eed1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SdcaNonCalibratedWithOptions.cs @@ -11,20 +11,24 @@ public static class SdcaNonCalibratedWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); @@ -42,23 +46,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SdcaNonCalibrated(options); + var pipeline = mlContext.BinaryClassification.Trainers + .SdcaNonCalibrated(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: False @@ -68,7 +78,9 @@ public static void Example() // Label: False, Prediction: True // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -91,7 +103,9 @@ public static void Example() // Precision || 0.5705 | 0.6809 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -102,13 +116,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -131,11 +150,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdCalibrated.cs index 5eb6a3c947..dcc28ad25c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdCalibrated.cs @@ -10,35 +10,43 @@ public static class SgdCalibrated { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SgdCalibrated(); + var pipeline = mlContext.BinaryClassification.Trainers + .SgdCalibrated(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: False @@ -48,7 +56,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -71,7 +81,9 @@ public static void Example() // Precision || 0.6417 | 0.5763 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -82,13 +94,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -111,11 +128,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdCalibratedWithOptions.cs index bbd7772ec4..eea72de60a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdCalibratedWithOptions.cs @@ -11,15 +11,17 @@ public static class SgdCalibratedWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -34,23 +36,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SgdCalibrated(options); + var pipeline = mlContext.BinaryClassification.Trainers + .SgdCalibrated(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: False @@ -60,7 +68,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -83,7 +93,9 @@ public static void Example() // Precision || 0.5412 | 0.6625 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -94,13 +106,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -123,11 +140,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdNonCalibrated.cs index 91cee50153..e55f8d2d65 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdNonCalibrated.cs @@ -10,35 +10,43 @@ public static class SgdNonCalibrated { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SgdNonCalibrated(); + var pipeline = mlContext.BinaryClassification.Trainers + .SgdNonCalibrated(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: False @@ -48,7 +56,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -71,7 +81,9 @@ public static void Example() // Precision || 0.6441 | 0.5759 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -82,13 +94,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -111,11 +128,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdNonCalibratedWithOptions.cs index 5509e6f426..1729433565 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SgdNonCalibratedWithOptions.cs @@ -11,15 +11,17 @@ public static class SgdNonCalibratedWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -31,23 +33,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SgdNonCalibrated(options); + var pipeline = mlContext.BinaryClassification.Trainers + .SgdNonCalibrated(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: False @@ -57,7 +65,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); + var metrics = mlContext.BinaryClassification + .EvaluateNonCalibrated(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -80,7 +90,9 @@ public static void Example() // Precision || 0.5373 | 0.5878 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -91,13 +103,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.03f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -120,11 +137,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegression.cs index 243d99cccc..172199df0c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegression.cs @@ -8,39 +8,48 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class SymbolicSgdLogisticRegression { - // This example requires installation of additional NuGet package - // Microsoft.ML.Mkl.Components. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(); + var pipeline = mlContext.BinaryClassification.Trainers + .SymbolicSgdLogisticRegression(); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: False @@ -50,7 +59,9 @@ public static void Example() // Label: False, Prediction: True // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -73,7 +84,9 @@ public static void Example() // Precision || 0.8235 | 0.8397 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -84,13 +97,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -113,11 +131,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegression.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegression.tt index 8374d8c301..71f4e6ed66 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegression.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegression.tt @@ -10,8 +10,9 @@ string LabelThreshold = "0.5f"; string DataSepValue = "0.1f"; string OptionsInclude = ""; string Comments = @" - // This example requires installation of additional NuGet package - // Microsoft.ML.Mkl.Components."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string ExpectedOutputPerInstance = @"// Expected output: // Label: True, Prediction: False diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegressionWithOptions.cs index c8d179e617..a68fadc8c8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegressionWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.BinaryClassification { public static class SymbolicSgdLogisticRegressionWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.Mkl.Components. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -33,23 +36,29 @@ public static void Example() }; // Define the trainer. - var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression(options); + var pipeline = mlContext.BinaryClassification.Trainers + .SymbolicSgdLogisticRegression(options); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: True, Prediction: False @@ -59,7 +68,9 @@ public static void Example() // Label: False, Prediction: False // Evaluate the overall metrics. - var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + var metrics = mlContext.BinaryClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -82,7 +93,9 @@ public static void Example() // Precision || 0.7964 | 0.6847 | } - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)random.NextDouble(); @@ -93,13 +106,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = label, // Create random features that are correlated with the label. - // For data points with false label, the feature values are slightly increased by adding a constant. - Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + // For data points with false label, the feature values are + // slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50) + .Select(x => x ? randomFloat() : randomFloat() + + 0.1f).ToArray() + }; } } - // Example with label and 50 feature values. A data set is a collection of such examples. + // Example with label and 50 feature values. A data set is a collection of + // such examples. private class DataPoint { public bool Label { get; set; } @@ -122,11 +140,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics) Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}"); Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Precision: " + + $"{metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Precision: " + + $"{metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegressionWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegressionWithOptions.tt index c1b3ccd0e4..51bb6d0515 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegressionWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicSgdLogisticRegressionWithOptions.tt @@ -9,8 +9,9 @@ string LabelThreshold = "0.5f"; string DataSepValue = "0.1f"; string OptionsInclude = "using Microsoft.ML.Trainers;"; string Comments = @" - // This example requires installation of additional NuGet package - // Microsoft.ML.Mkl.Components."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; string TrainerOptions = @"SymbolicSgdLogisticRegressionBinaryTrainer.Options() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/TreeSamplesTemplate.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/TreeSamplesTemplate.ttinclude index fa37323b89..4cda1fc808 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/TreeSamplesTemplate.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/TreeSamplesTemplate.ttinclude @@ -4,6 +4,7 @@ string LabelThreshold = "0.5f"; string DataSepValue = "0.03f"; string OptionsInclude = "using Microsoft.ML.Trainers.FastTree;"; string Comments= @" - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; #> \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 5b318ae9e3..4c46399421 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Reflection; using Samples.Dynamic;