From 9c6325ff7d7aa00b88bda1824218245cb84e9177 Mon Sep 17 00:00:00 2001 From: Rayan Krishnan Date: Mon, 1 Jul 2019 19:13:42 -0700 Subject: [PATCH 1/3] transform/text and projection formatted to 85 char --- .../Transforms/Projection/VectorWhiten.cs | 41 +++++++---- .../Projection/VectorWhitenWithOptions.cs | 40 +++++++---- .../Text/ApplyCustomWordEmbedding.cs | 37 ++++++---- .../Transforms/Text/ApplyWordEmbedding.cs | 40 +++++++---- .../Dynamic/Transforms/Text/FeaturizeText.cs | 43 ++++++++---- .../Text/FeaturizeTextWithOptions.cs | 56 ++++++++++----- .../Text/LatentDirichletAllocation.cs | 43 ++++++++---- .../Dynamic/Transforms/Text/NormalizeText.cs | 26 ++++--- .../Transforms/Text/ProduceHashedNgrams.cs | 62 ++++++++++------ .../Transforms/Text/ProduceHashedWordBags.cs | 59 +++++++++++----- .../Dynamic/Transforms/Text/ProduceNgrams.cs | 63 +++++++++++------ .../Transforms/Text/ProduceWordBags.cs | 70 +++++++++++++------ .../Transforms/Text/RemoveDefaultStopWords.cs | 39 +++++++---- .../Transforms/Text/RemoveStopWords.cs | 39 +++++++---- .../Text/TokenizeIntoCharactersAsKeys.cs | 31 +++++--- .../Transforms/Text/TokenizeIntoWords.cs | 31 +++++--- 16 files changed, 485 insertions(+), 235 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs index 9de60d5130..5e5548227b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs @@ -9,11 +9,12 @@ namespace Samples.Dynamic public sealed class VectorWhiten { - /// This example requires installation of additional nuget package Microsoft.ML.Mkl.Components. + /// This example requires installation of additional nuget package + /// Microsoft.ML.Mkl.Components. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -32,20 +33,29 @@ public static void Example() // 6 7 8 9 0 1 2 3 4 5 // A small printing utility. - Action>> printHelper = (colName, column) => + Action>> printHelper = (colName, + column) => { - Console.WriteLine($"{colName} column obtained post-transformation."); + Console.WriteLine($"{colName} column obtained " + + $"post-transformation."); + foreach (var row in column) - Console.WriteLine($"{string.Join(" ", row.DenseValues().Select(x => x.ToString("f3")))} "); + Console.WriteLine(string.Join(" ", row.DenseValues().Select(x => + x.ToString("f3")))+" "); }; // A pipeline to project Features column into white noise vector. - var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(SampleVectorOfNumbersData.Features), - kind: Microsoft.ML.Transforms.WhiteningKind.ZeroPhaseComponentAnalysis); + var whiteningPipeline = ml.Transforms.VectorWhiten(nameof( + SampleVectorOfNumbersData.Features), kind: Microsoft.ML.Transforms + .WhiteningKind.ZeroPhaseComponentAnalysis); + // The transformed (projected) data. - var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData); + var transformedData = whiteningPipeline.Fit(trainData).Transform( + trainData); + // Getting the data of the newly created column, so we can preview it. - var whitening = transformedData.GetColumn>(transformedData.Schema[nameof(SampleVectorOfNumbersData.Features)]); + var whitening = transformedData.GetColumn>( + transformedData.Schema[nameof(SampleVectorOfNumbersData.Features)]); printHelper(nameof(SampleVectorOfNumbersData.Features), whitening); @@ -68,11 +78,16 @@ private class SampleVectorOfNumbersData /// /// Returns a few rows of the infertility dataset. /// - private static IEnumerable GetVectorOfNumbersData() + private static IEnumerable + GetVectorOfNumbersData() { var data = new List(); - data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } }); - data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 } }); + data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9 } }); + + data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 1, + 2, 3, 4, 5, 6, 7, 8, 9, 0 } }); + data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 2, 3, 4, 5, 6, 7, 8, 9, 0, 1 } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs index 2e4e7fe46a..b1f852fc2c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs @@ -8,11 +8,12 @@ namespace Samples.Dynamic { public sealed class VectorWhitenWithOptions { - /// This example requires installation of additional nuget package Microsoft.ML.Mkl.Components. + /// This example requires installation of additional nuget package + /// Microsoft.ML.Mkl.Components. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. @@ -31,20 +32,30 @@ public static void Example() // 6 7 8 9 0 1 2 3 4 5 // A small printing utility. - Action>> printHelper = (colName, column) => + Action>> printHelper = (colName, + column) => { - Console.WriteLine($"{colName} column obtained post-transformation."); + Console.WriteLine($"{colName} column obtained" + + $"post-transformation."); + foreach (var row in column) - Console.WriteLine($"{string.Join(" ", row.DenseValues().Select(x => x.ToString("f3")))} "); + Console.WriteLine(string.Join(" ", row.DenseValues().Select(x => + x.ToString("f3")))+" "); }; // A pipeline to project Features column into white noise vector. - var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(SampleVectorOfNumbersData.Features), kind: Microsoft.ML.Transforms.WhiteningKind.PrincipalComponentAnalysis, rank: 4); + var whiteningPipeline = ml.Transforms.VectorWhiten(nameof( + SampleVectorOfNumbersData.Features), kind: Microsoft.ML.Transforms + .WhiteningKind.PrincipalComponentAnalysis, rank: 4); + // The transformed (projected) data. - var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData); + var transformedData = whiteningPipeline.Fit(trainData).Transform( + trainData); + // Getting the data of the newly created column, so we can preview it. - var whitening = transformedData.GetColumn>(transformedData.Schema[nameof(SampleVectorOfNumbersData.Features)]); + var whitening = transformedData.GetColumn>( + transformedData.Schema[nameof(SampleVectorOfNumbersData.Features)]); printHelper(nameof(SampleVectorOfNumbersData.Features), whitening); @@ -66,11 +77,16 @@ private class SampleVectorOfNumbersData /// /// Returns a few rows of the infertility dataset. /// - private static IEnumerable GetVectorOfNumbersData() + private static IEnumerable + GetVectorOfNumbersData() { var data = new List(); - data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } }); - data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 } }); + data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9 } }); + + data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 1, + 2, 3, 4, 5, 6, 7, 8, 9, 0 } }); + data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 2, 3, 4, 5, 6, 7, 8, 9, 0, 1 } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs index 149b662ee5..e1a275f763 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs @@ -9,12 +9,13 @@ public static class ApplyCustomWordEmbedding { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'ApplyWordEmbedding' does not require training data as - // the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator. + // Create an empty list as the dataset. The 'ApplyWordEmbedding' does + // not require training data as the estimator ('WordEmbeddingEstimator') + // created by 'ApplyWordEmbedding' API is not a trainable estimator. // The empty list is only needed to pass input schema to the pipeline. var emptySamples = new List(); @@ -33,25 +34,33 @@ public static void Example() file.WriteLine("buy 0 0 20"); } - // A pipeline for converting text into a 9-dimension word embedding vector using the custom word embedding model. - // The 'ApplyWordEmbedding' computes the minimum, average and maximum values for each token's embedding vector. - // Tokens in 'custommodel.txt' model are represented as 3-dimension vector. - // Therefore, the output is of 9-dimension [min, avg, max]. + // A pipeline for converting text into a 9-dimension word embedding + // vector using the custom word embedding model. The + // 'ApplyWordEmbedding' computes the minimum, average and maximum values + // for each token's embedding vector. Tokens in 'custommodel.txt' model + // are represented as 3-dimension vector. Therefore, the output is of + // 9 -dimension [min, avg, max]. // // The 'ApplyWordEmbedding' API requires vector of text as input. - // The pipeline first normalizes and tokenizes text then applies word embedding transformation. + // The pipeline first normalizes and tokenizes text then applies word + // embedding transformation. var textPipeline = mlContext.Transforms.Text.NormalizeText("Text") - .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")) - .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", pathToCustomModel, "Tokens")); + .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", + "Text")) + .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", + pathToCustomModel, "Tokens")); // Fit to data. var textTransformer = textPipeline.Fit(emptyDataView); - // Create the prediction engine to get the embedding vector from the input text/string. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the embedding vector from the + // input text/string. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Call the prediction API to convert the text into embedding vector. - var data = new TextData() { Text = "This is a great product. I would like to buy it again." }; + var data = new TextData() { Text = "This is a great product. I would " + + "like to buy it again." }; var prediction = predictionEngine.Predict(data); // Print the length of the embedding vector. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs index d77b0a0a99..cfd1077f8f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs @@ -9,38 +9,48 @@ public static class ApplyWordEmbedding { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'ApplyWordEmbedding' does not require training data as - // the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator. + // Create an empty list as the dataset. The 'ApplyWordEmbedding' does + // not require training data as the estimator ('WordEmbeddingEstimator') + // created by 'ApplyWordEmbedding' API is not a trainable estimator. // The empty list is only needed to pass input schema to the pipeline. var emptySamples = new List(); // Convert sample list to an empty IDataView. var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); - // A pipeline for converting text into a 150-dimension embedding vector using pretrained 'SentimentSpecificWordEmbedding' model. - // The 'ApplyWordEmbedding' computes the minimum, average and maximum values for each token's embedding vector. - // Tokens in 'SentimentSpecificWordEmbedding' model are represented as 50-dimension vector. - // Therefore, the output is of 150-dimension [min, avg, max]. + // A pipeline for converting text into a 150-dimension embedding vector + // using pretrained 'SentimentSpecificWordEmbedding' model. The + // 'ApplyWordEmbedding' computes the minimum, average and maximum values + // for each token's embedding vector. Tokens in + // 'SentimentSpecificWordEmbedding' model are represented as + // 50 -dimension vector. Therefore, the output is of 150-dimension [min, + // avg, max]. // // The 'ApplyWordEmbedding' API requires vector of text as input. - // The pipeline first normalizes and tokenizes text then applies word embedding transformation. + // The pipeline first normalizes and tokenizes text then applies word + // embedding transformation. var textPipeline = mlContext.Transforms.Text.NormalizeText("Text") - .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")) - .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", "Tokens", - WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding)); + .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", + "Text")) + .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", + "Tokens", WordEmbeddingEstimator.PretrainedModelKind + .SentimentSpecificWordEmbedding)); // Fit to data. var textTransformer = textPipeline.Fit(emptyDataView); - // Create the prediction engine to get the embedding vector from the input text/string. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the embedding vector from the + // input text/string. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Call the prediction API to convert the text into embedding vector. - var data = new TextData() { Text = "This is a great product. I would like to buy it again." }; + var data = new TextData() { Text = "This is a great product. I would " + + "like to buy it again." }; var prediction = predictionEngine.Predict(data); // Print the length of the embedding vector. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/FeaturizeText.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/FeaturizeText.cs index 334f70fe76..5b62d0639e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/FeaturizeText.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/FeaturizeText.cs @@ -8,41 +8,58 @@ public static class FeaturizeText { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new TextData(){ Text = "ML.NET's FeaturizeText API uses a composition of several basic transforms to convert text into numeric features." }, - new TextData(){ Text = "This API can be used as a featurizer to perform text classification." }, - new TextData(){ Text = "There are a number of approaches to text classification." }, - new TextData(){ Text = "One of the simplest and most common approaches is called “Bag of Words”." }, - new TextData(){ Text = "Text classification can be used for a wide variety of tasks" }, - new TextData(){ Text = "such as sentiment analysis, topic detection, intent identification etc." }, + new TextData(){ Text = "ML.NET's FeaturizeText API uses a " + + "composition of several basic transforms to convert text " + + "into numeric features." }, + + new TextData(){ Text = "This API can be used as a featurizer to " + + "perform text classification." }, + + new TextData(){ Text = "There are a number of approaches to text " + + "classification." }, + + new TextData(){ Text = "One of the simplest and most common " + + "approaches is called “Bag of Words”." }, + + new TextData(){ Text = "Text classification can be used for a " + + "wide variety of tasks" }, + + new TextData(){ Text = "such as sentiment analysis, topic " + + "detection, intent identification etc." }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); // A pipeline for converting text into numeric features. - // The following call to 'FeaturizeText' instantiates 'TextFeaturizingEstimator' with default parameters. + // The following call to 'FeaturizeText' instantiates + // 'TextFeaturizingEstimator' with default parameters. // The default settings for the TextFeaturizingEstimator are // * StopWordsRemover: None // * CaseMode: Lowercase // * OutputTokensColumnName: None - // * KeepDiacritics: false, KeepPunctuations: true, KeepNumbers: true + // * KeepDiacritics: false, KeepPunctuations: true, KeepNumbers: + // true // * WordFeatureExtractor: NgramLength = 1 // * CharFeatureExtractor: NgramLength = 3, UseAllLengths = false // The length of the output feature vector depends on these settings. - var textPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "Text"); + var textPipeline = mlContext.Transforms.Text.FeaturizeText("Features", + "Text"); // Fit to data. var textTransformer = textPipeline.Fit(dataview); - // Create the prediction engine to get the features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the features extracted from the + // text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Convert the text into numeric features. var prediction = predictionEngine.Predict(samples[0]); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/FeaturizeTextWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/FeaturizeTextWithOptions.cs index 97a00b6c0f..3f405176bb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/FeaturizeTextWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/FeaturizeTextWithOptions.cs @@ -9,44 +9,65 @@ public static class FeaturizeTextWithOptions { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new TextData(){ Text = "ML.NET's FeaturizeText API uses a composition of several basic transforms to convert text into numeric features." }, - new TextData(){ Text = "This API can be used as a featurizer to perform text classification." }, - new TextData(){ Text = "There are a number of approaches to text classification." }, - new TextData(){ Text = "One of the simplest and most common approaches is called “Bag of Words”." }, - new TextData(){ Text = "Text classification can be used for a wide variety of tasks" }, - new TextData(){ Text = "such as sentiment analysis, topic detection, intent identification etc." }, + new TextData(){ Text = "ML.NET's FeaturizeText API uses a " + + "composition of several basic transforms to convert text into " + + "numeric features." }, + + new TextData(){ Text = "This API can be used as a featurizer to " + + "perform text classification." }, + + new TextData(){ Text = "There are a number of approaches to text " + + "classification." }, + + new TextData(){ Text = "One of the simplest and most common " + + "approaches is called “Bag of Words”." }, + + new TextData(){ Text = "Text classification can be used for a " + + "wide variety of tasks" }, + + new TextData(){ Text = "such as sentiment analysis, topic " + + "detection, intent identification etc." }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); // A pipeline for converting text into numeric features. - // The following call to 'FeaturizeText' instantiates 'TextFeaturizingEstimator' with given parameters. - // The length of the output feature vector depends on these settings. + // The following call to 'FeaturizeText' instantiates + // 'TextFeaturizingEstimator' with given parameters. The length of the + // output feature vector depends on these settings. var options = new TextFeaturizingEstimator.Options() { // Also output tokenized words OutputTokensColumnName = "OutputTokens", CaseMode = TextNormalizingEstimator.CaseMode.Lower, // Use ML.NET's built-in stop word remover - StopWordsRemoverOptions = new StopWordsRemovingEstimator.Options() { Language = TextFeaturizingEstimator.Language.English }, - WordFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 2, UseAllLengths = true }, - CharFeatureExtractor = new WordBagEstimator.Options() { NgramLength = 3, UseAllLengths= false }, + StopWordsRemoverOptions = new StopWordsRemovingEstimator.Options() { + Language = TextFeaturizingEstimator.Language.English }, + + WordFeatureExtractor = new WordBagEstimator.Options() { NgramLength + = 2, UseAllLengths = true }, + + CharFeatureExtractor = new WordBagEstimator.Options() { NgramLength + = 3, UseAllLengths= false }, }; - var textPipeline = mlContext.Transforms.Text.FeaturizeText("Features", options, "Text"); + var textPipeline = mlContext.Transforms.Text.FeaturizeText("Features", + options, "Text"); // Fit to data. var textTransformer = textPipeline.Fit(dataview); - // Create the prediction engine to get the features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the features extracted from the + // text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Convert the text into numeric features. var prediction = predictionEngine.Predict(samples[0]); @@ -59,7 +80,8 @@ public static void Example() for (int i = 0; i < 10; i++) Console.Write($"{prediction.Features[i]:F4} "); - Console.WriteLine($"\nTokens: {string.Join(",", prediction.OutputTokens)}"); + Console.WriteLine("\nTokens: " + string.Join(",", prediction + .OutputTokens)); // Expected output: // Number of Features: 282 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/LatentDirichletAllocation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/LatentDirichletAllocation.cs index 1e296111e0..15ce82e5b2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/LatentDirichletAllocation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/LatentDirichletAllocation.cs @@ -8,46 +8,61 @@ public static class LatentDirichletAllocation { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new TextData(){ Text = "ML.NET's LatentDirichletAllocation API computes topic models." }, - new TextData(){ Text = "ML.NET's LatentDirichletAllocation API is the best for topic models." }, + new TextData(){ Text = "ML.NET's LatentDirichletAllocation API " + + "computes topic models." }, + + new TextData(){ Text = "ML.NET's LatentDirichletAllocation API " + + "is the best for topic models." }, + new TextData(){ Text = "I like to eat broccoli and bananas." }, new TextData(){ Text = "I eat bananas for breakfast." }, - new TextData(){ Text = "This car is expensive compared to last week's price." }, + new TextData(){ Text = "This car is expensive compared to last " + + "week's price." }, + new TextData(){ Text = "This car was $X last week." }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); - // A pipeline for featurizing the text/string using LatentDirichletAllocation API. - // To be more accurate in computing the LDA features, the pipeline first normalizes text and removes stop words - // before passing tokens (the individual words, lower cased, with common words removed) to LatentDirichletAllocation. - var pipeline = mlContext.Transforms.Text.NormalizeText("NormalizedText", "Text") - .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "NormalizedText")) + // A pipeline for featurizing the text/string using + // LatentDirichletAllocation API. o be more accurate in computing the + // LDA features, the pipeline first normalizes text and removes stop + // words before passing tokens (the individual words, lower cased, with + // common words removed) to LatentDirichletAllocation. + var pipeline = mlContext.Transforms.Text.NormalizeText("NormalizedText", + "Text") + .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", + "NormalizedText")) .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("Tokens")) .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens")) .Append(mlContext.Transforms.Text.ProduceNgrams("Tokens")) - .Append(mlContext.Transforms.Text.LatentDirichletAllocation("Features", "Tokens", numberOfTopics: 3)); + .Append(mlContext.Transforms.Text.LatentDirichletAllocation( + "Features", "Tokens", numberOfTopics: 3)); // Fit to data. var transformer = pipeline.Fit(dataview); - // Create the prediction engine to get the LDA features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(transformer); + // Create the prediction engine to get the LDA features extracted from + // the text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(transformer); // Convert the sample text into LDA features and print it. PrintLdaFeatures(predictionEngine.Predict(samples[0])); PrintLdaFeatures(predictionEngine.Predict(samples[1])); // Features obtained post-transformation. - // For LatentDirichletAllocation, we had specified numTopic:3. Hence each prediction has been featurized as a vector of floats with length 3. + // For LatentDirichletAllocation, we had specified numTopic:3. Hence + // each prediction has been featurized as a vector of floats with length + // 3. // Topic1 Topic2 Topic3 // 0.6364 0.2727 0.0909 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs index e198979eda..2100c13371 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs @@ -9,21 +9,22 @@ public static class NormalizeText { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'NormalizeText' API does not require training data as - // the estimator ('TextNormalizingEstimator') created by 'NormalizeText' API is not a trainable estimator. - // The empty list is only needed to pass input schema to the pipeline. + // Create an empty list as the dataset. The 'NormalizeText' API does not + // require training data as the estimator ('TextNormalizingEstimator') + // created by 'NormalizeText' API is not a trainable estimator. The + // empty list is only needed to pass input schema to the pipeline. var emptySamples = new List(); // Convert sample list to an empty IDataView. var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); // A pipeline for normalizing text. - var normTextPipeline = mlContext.Transforms.Text.NormalizeText("NormalizedText", "Text", - TextNormalizingEstimator.CaseMode.Lower, + var normTextPipeline = mlContext.Transforms.Text.NormalizeText( + "NormalizedText", "Text", TextNormalizingEstimator.CaseMode.Lower, keepDiacritics: false, keepPunctuations: false, keepNumbers: false); @@ -31,11 +32,16 @@ public static void Example() // Fit to data. var normTextTransformer = normTextPipeline.Fit(emptyDataView); - // Create the prediction engine to get the normalized text from the input text/string. - var predictionEngine = mlContext.Model.CreatePredictionEngine(normTextTransformer); + // Create the prediction engine to get the normalized text from the + // input text/string. + var predictionEngine = mlContext.Model.CreatePredictionEngine(normTextTransformer); // Call the prediction API. - var data = new TextData() { Text = "ML.NET's NormalizeText API changes the case of the TEXT and removes/keeps diâcrîtîcs, punctuations, and/or numbers (123)." }; + var data = new TextData() { Text = "ML.NET's NormalizeText API " + + "changes the case of the TEXT and removes/keeps diâcrîtîcs, " + + "punctuations, and/or numbers (123)." }; + var prediction = predictionEngine.Predict(data); // Print the normalized text. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs index ce6f7816be..4d990d0a42 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs @@ -9,54 +9,74 @@ public static class ProduceHashedNgrams { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new TextData(){ Text = "This is an example to compute n-grams using hashing." }, - new TextData(){ Text = "N-gram is a sequence of 'N' consecutive words/tokens." }, - new TextData(){ Text = "ML.NET's ProduceHashedNgrams API produces count of n-grams and hashes it as an index into a vector of given bit length." }, - new TextData(){ Text = "The hashing reduces the size of the output feature vector" }, - new TextData(){ Text = "which is useful in case when number of n-grams is very large." }, + new TextData(){ Text = "This is an example to compute n-grams " + + "using hashing." }, + + new TextData(){ Text = "N-gram is a sequence of 'N' consecutive" + + " words/tokens." }, + + new TextData(){ Text = "ML.NET's ProduceHashedNgrams API " + + "produces count of n-grams and hashes it as an index into a " + + "vector of given bit length." }, + + new TextData(){ Text = "The hashing reduces the size of the " + + "output feature vector" }, + + new TextData(){ Text = "which is useful in case when number of " + + "n-grams is very large." }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); // A pipeline for converting text into numeric hashed n-gram features. - // The following call to 'ProduceHashedNgrams' requires the tokenized text/string as input. - // This is acheived by calling 'TokenizeIntoWords' first followed by 'ProduceHashedNgrams'. - // Please note that the length of the output feature vector depends on the 'numberOfBits' settings. - var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text") + // The following call to 'ProduceHashedNgrams' requires the tokenized + // text /string as input. This is acheived by calling + // 'TokenizeIntoWords' first followed by 'ProduceHashedNgrams'. + // Please note that the length of the output feature vector depends on + // the 'numberOfBits' settings. + var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", + "Text") .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens")) - .Append(mlContext.Transforms.Text.ProduceHashedNgrams("NgramFeatures", "Tokens", - numberOfBits: 5, - ngramLength: 3, - useAllLengths: false, - maximumNumberOfInverts: 1)); + .Append(mlContext.Transforms.Text.ProduceHashedNgrams( + "NgramFeatures", "Tokens", + numberOfBits: 5, + ngramLength: 3, + useAllLengths: false, + maximumNumberOfInverts: 1)); // Fit to data. var textTransformer = textPipeline.Fit(dataview); var transformedDataView = textTransformer.Transform(dataview); - // Create the prediction engine to get the features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the features extracted from the + // text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Convert the text into numeric features. var prediction = predictionEngine.Predict(samples[0]); // Print the length of the feature vector. - Console.WriteLine($"Number of Features: {prediction.NgramFeatures.Length}"); + Console.WriteLine("Number of Features: " + prediction.NgramFeatures + .Length); // Preview of the produced n-grams. // Get the slot names from the column's metadata. - // The slot names for a vector column corresponds to the names associated with each position in the vector. + // The slot names for a vector column corresponds to the names + // associated with each position in the vector. VBuffer> slotNames = default; transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames); - var NgramFeaturesColumn = transformedDataView.GetColumn>(transformedDataView.Schema["NgramFeatures"]); + var NgramFeaturesColumn = transformedDataView.GetColumn>( + transformedDataView.Schema["NgramFeatures"]); + var slots = slotNames.GetValues(); Console.Write("N-grams: "); foreach (var featureRow in NgramFeaturesColumn) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedWordBags.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedWordBags.cs index 48a5971b2d..05d88951bc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedWordBags.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedWordBags.cs @@ -9,28 +9,43 @@ public static class ProduceHashedWordBags { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new TextData(){ Text = "This is an example to compute bag-of-word features using hashing." }, - new TextData(){ Text = "ML.NET's ProduceHashedWordBags API produces count of n-grams and hashes it as an index into a vector of given bit length." }, - new TextData(){ Text = "It does so by first tokenizing text/string into words/tokens then " }, - new TextData(){ Text = "computing n-grams and hash them to the index given by hash value." }, - new TextData(){ Text = "The hashing reduces the size of the output feature vector" }, - new TextData(){ Text = "which is useful in case when number of n-grams is very large." }, + new TextData(){ Text = "This is an example to compute " + + "bag-of-word features using hashing." }, + + new TextData(){ Text = "ML.NET's ProduceHashedWordBags API " + + "produces count of n-grams and hashes it as an index into " + + "a vector of given bit length." }, + + new TextData(){ Text = "It does so by first tokenizing " + + "text/string into words/tokens then " }, + + new TextData(){ Text = "computing n-grams and hash them to the " + + "index given by hash value." }, + + new TextData(){ Text = "The hashing reduces the size of the " + + "output feature vector" }, + + new TextData(){ Text = "which is useful in case when number of" + + " n-grams is very large." }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); - // A pipeline for converting text into numeric bag-of-word features using hashing. - // The following call to 'ProduceHashedWordBags' implicitly tokenizes the text/string into words/tokens. - // Please note that the length of the output feature vector depends on the 'numberOfBits' settings. - var textPipeline = mlContext.Transforms.Text.ProduceHashedWordBags("BagOfWordFeatures", "Text", + // A pipeline for converting text into numeric bag-of-word features + // using hashing. The following call to 'ProduceHashedWordBags' + // implicitly tokenizes the text/string into words/tokens. Please note + // that the length of the output feature vector depends on the + // 'numberOfBits' settings. + var textPipeline = mlContext.Transforms.Text.ProduceHashedWordBags( + "BagOfWordFeatures", "Text", numberOfBits: 5, ngramLength: 3, useAllLengths: false, @@ -40,21 +55,29 @@ public static void Example() var textTransformer = textPipeline.Fit(dataview); var transformedDataView = textTransformer.Transform(dataview); - // Create the prediction engine to get the bag-of-word features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the bag-of-word features + // extracted from the text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Convert the text into numeric features. var prediction = predictionEngine.Predict(samples[0]); // Print the length of the feature vector. - Console.WriteLine($"Number of Features: {prediction.BagOfWordFeatures.Length}"); + Console.WriteLine("Number of Features: " + prediction.BagOfWordFeatures + .Length); // Preview of the produced n-grams. // Get the slot names from the column's metadata. - // The slot names for a vector column corresponds to the names associated with each position in the vector. + // The slot names for a vector column corresponds to the names + // associated with each position in the vector. VBuffer> slotNames = default; - transformedDataView.Schema["BagOfWordFeatures"].GetSlotNames(ref slotNames); - var BagOfWordFeaturesColumn = transformedDataView.GetColumn>(transformedDataView.Schema["BagOfWordFeatures"]); + transformedDataView.Schema["BagOfWordFeatures"].GetSlotNames(ref + slotNames); + + var BagOfWordFeaturesColumn = transformedDataView.GetColumn>(transformedDataView.Schema["BagOfWordFeatures"]); + var slots = slotNames.GetValues(); Console.Write("N-grams: "); foreach (var featureRow in BagOfWordFeaturesColumn) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs index 2a41cde412..9b124daf5b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs @@ -10,34 +10,52 @@ public static class ProduceNgrams { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { new TextData(){ Text = "This is an example to compute n-grams." }, - new TextData(){ Text = "N-gram is a sequence of 'N' consecutive words/tokens." }, - new TextData(){ Text = "ML.NET's ProduceNgrams API produces vector of n-grams." }, - new TextData(){ Text = "Each position in the vector corresponds to a particular n-gram." }, - new TextData(){ Text = "The value at each position corresponds to," }, - new TextData(){ Text = "the number of times n-gram occured in the data (Tf), or" }, - new TextData(){ Text = "the inverse of the number of documents that contain the n-gram (Idf)," }, - new TextData(){ Text = "or compute both and multiply together (Tf-Idf)." }, + new TextData(){ Text = "N-gram is a sequence of 'N' consecutive " + + "words/tokens." }, + + new TextData(){ Text = "ML.NET's ProduceNgrams API produces " + + "vector of n-grams." }, + + new TextData(){ Text = "Each position in the vector corresponds " + + "to a particular n-gram." }, + + new TextData(){ Text = "The value at each position corresponds " + + "to," }, + + new TextData(){ Text = "the number of times n-gram occured in " + + "the data (Tf), or" }, + + new TextData(){ Text = "the inverse of the number of documents " + + "that contain the n-gram (Idf)," }, + + new TextData(){ Text = "or compute both and multiply together " + + "(Tf-Idf)." }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); // A pipeline for converting text into numeric n-gram features. - // The following call to 'ProduceNgrams' requires the tokenized text/string as input. - // This is acheived by calling 'TokenizeIntoWords' first followed by 'ProduceNgrams'. - // Please note that the length of the output feature vector depends on the n-gram settings. - var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text") - // 'ProduceNgrams' takes key type as input. Converting the tokens into key type using 'MapValueToKey'. + // The following call to 'ProduceNgrams' requires the tokenized + // text /string as input. This is acheived by calling + // 'TokenizeIntoWords' first followed by 'ProduceNgrams'. Please note + // that the length of the output feature vector depends on the n-gram + // settings. + var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", + "Text") + // 'ProduceNgrams' takes key type as input. Converting the tokens + // into key type using 'MapValueToKey'. .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens")) - .Append(mlContext.Transforms.Text.ProduceNgrams("NgramFeatures", "Tokens", + .Append(mlContext.Transforms.Text.ProduceNgrams("NgramFeatures", + "Tokens", ngramLength: 3, useAllLengths: false, weighting: NgramExtractingEstimator.WeightingCriteria.Tf)); @@ -46,21 +64,26 @@ public static void Example() var textTransformer = textPipeline.Fit(dataview); var transformedDataView = textTransformer.Transform(dataview); - // Create the prediction engine to get the n-gram features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the n-gram features extracted + // from the text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Convert the text into numeric features. var prediction = predictionEngine.Predict(samples[0]); // Print the length of the feature vector. - Console.WriteLine($"Number of Features: {prediction.NgramFeatures.Length}"); + Console.WriteLine("Number of Features: " + prediction.NgramFeatures + .Length); // Preview of the produced n-grams. // Get the slot names from the column's metadata. - // The slot names for a vector column corresponds to the names associated with each position in the vector. + // The slot names for a vector column corresponds to the names + // associated with each position in the vector. VBuffer> slotNames = default; transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames); - var NgramFeaturesColumn = transformedDataView.GetColumn>(transformedDataView.Schema["NgramFeatures"]); + var NgramFeaturesColumn = transformedDataView.GetColumn>(transformedDataView.Schema["NgramFeatures"]); var slots = slotNames.GetValues(); Console.Write("N-grams: "); foreach (var featureRow in NgramFeaturesColumn) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceWordBags.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceWordBags.cs index 7745ca3fdd..a6feae3582 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceWordBags.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceWordBags.cs @@ -10,52 +10,80 @@ public static class ProduceWordBags { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); // Create a small dataset as an IEnumerable. var samples = new List() { - new TextData(){ Text = "This is an example to compute bag-of-word features." }, - new TextData(){ Text = "ML.NET's ProduceWordBags API produces bag-of-word features from input text." }, - new TextData(){ Text = "It does so by first tokenizing text/string into words/tokens then " }, - new TextData(){ Text = "computing n-grams and their neumeric values." }, - new TextData(){ Text = "Each position in the output vector corresponds to a particular n-gram." }, - new TextData(){ Text = "The value at each position corresponds to," }, - new TextData(){ Text = "the number of times n-gram occured in the data (Tf), or" }, - new TextData(){ Text = "the inverse of the number of documents contain the n-gram (Idf)," }, - new TextData(){ Text = "or compute both and multipy together (Tf-Idf)." }, + new TextData(){ Text = "This is an example to compute " + + "bag-of-word features." }, + + new TextData(){ Text = "ML.NET's ProduceWordBags API produces " + + "bag-of-word features from input text." }, + + new TextData(){ Text = "It does so by first tokenizing " + + "text/string into words/tokens then " }, + + new TextData(){ Text = "computing n-grams and their neumeric " + + "values." }, + + new TextData(){ Text = "Each position in the output vector " + + "corresponds to a particular n-gram." }, + + new TextData(){ Text = "The value at each position corresponds " + + "to," }, + + new TextData(){ Text = "the number of times n-gram occured in " + + "the data (Tf), or" }, + + new TextData(){ Text = "the inverse of the number of documents " + + "contain the n-gram (Idf)," }, + + new TextData(){ Text = "or compute both and multipy together " + + "(Tf-Idf)." }, }; // Convert training data to IDataView. var dataview = mlContext.Data.LoadFromEnumerable(samples); // A pipeline for converting text into numeric bag-of-word features. - // The following call to 'ProduceWordBags' implicitly tokenizes the text/string into words/tokens. - // Please note that the length of the output feature vector depends on the n-gram settings. - var textPipeline = mlContext.Transforms.Text.ProduceWordBags("BagOfWordFeatures", "Text", - ngramLength: 3, useAllLengths: false, weighting: NgramExtractingEstimator.WeightingCriteria.Tf); + // The following call to 'ProduceWordBags' implicitly tokenizes the + // text /string into words/tokens. Please note that the length of the + // output feature vector depends on the n-gram settings. + var textPipeline = mlContext.Transforms.Text.ProduceWordBags( + "BagOfWordFeatures", "Text", + ngramLength: 3, useAllLengths: false, + weighting: NgramExtractingEstimator.WeightingCriteria.Tf); // Fit to data. var textTransformer = textPipeline.Fit(dataview); var transformedDataView = textTransformer.Transform(dataview); - // Create the prediction engine to get the bag-of-word features extracted from the text. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the bag-of-word features + // extracted from the text. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Convert the text into numeric features. var prediction = predictionEngine.Predict(samples[0]); // Print the length of the feature vector. - Console.WriteLine($"Number of Features: {prediction.BagOfWordFeatures.Length}"); + Console.WriteLine("Number of Features: " + prediction.BagOfWordFeatures + .Length); // Preview of the produced n-grams. // Get the slot names from the column's metadata. - // The slot names for a vector column corresponds to the names associated with each position in the vector. + // The slot names for a vector column corresponds to the names + // associated with each position in the vector. VBuffer> slotNames = default; - transformedDataView.Schema["BagOfWordFeatures"].GetSlotNames(ref slotNames); - var BagOfWordFeaturesColumn = transformedDataView.GetColumn>(transformedDataView.Schema["BagOfWordFeatures"]); + transformedDataView.Schema["BagOfWordFeatures"].GetSlotNames(ref + slotNames); + + var BagOfWordFeaturesColumn = transformedDataView.GetColumn>(transformedDataView.Schema["BagOfWordFeatures"]); + var slots = slotNames.GetValues(); Console.Write("N-grams: "); foreach (var featureRow in BagOfWordFeaturesColumn) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs index 8c9e61cf5f..6147bf155e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveDefaultStopWords.cs @@ -9,13 +9,15 @@ public static class RemoveDefaultStopWords { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'RemoveDefaultStopWords' does not require training data as - // the estimator ('StopWordsRemovingEstimator') created by 'RemoveDefaultStopWords' API is not a trainable estimator. - // The empty list is only needed to pass input schema to the pipeline. + // Create an empty list as the dataset. The 'RemoveDefaultStopWords' + // does not require training data as the estimator + // ('StopWordsRemovingEstimator') created by 'RemoveDefaultStopWords' + // API is not a trainable estimator. The empty list is only needed to + // pass input schema to the pipeline. var emptySamples = new List(); // Convert sample list to an empty IDataView. @@ -23,25 +25,36 @@ public static void Example() // A pipeline for removing stop words from input text/string. // The pipeline first tokenizes text into words then removes stop words. - // The 'RemoveDefaultStopWords' API ignores casing of the text/string e.g. 'tHe' and 'the' are considered the same stop words. - var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text") - .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("WordsWithoutStopWords", "Words", language: StopWordsRemovingEstimator.Language.English)); + // The 'RemoveDefaultStopWords' API ignores casing of the text/string + // e.g. 'tHe' and 'the' are considered the same stop words. + var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", + "Text") + .Append(mlContext.Transforms.Text.RemoveDefaultStopWords( + "WordsWithoutStopWords", "Words", language: + StopWordsRemovingEstimator.Language.English)); // Fit to data. var textTransformer = textPipeline.Fit(emptyDataView); - // Create the prediction engine to remove the stop words from the input text/string. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to remove the stop words from the input + // text /string. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Call the prediction API to remove stop words. - var data = new TextData() { Text = "ML.NET's RemoveDefaultStopWords API removes stop words from tHe text/string. It requires the text/string to be tokenized beforehand." }; + var data = new TextData() { Text = "ML.NET's RemoveDefaultStopWords " + + "API removes stop words from tHe text/string. It requires the " + + "text/string to be tokenized beforehand." }; + var prediction = predictionEngine.Predict(data); // Print the length of the word vector after the stop words removed. - Console.WriteLine($"Number of words: {prediction.WordsWithoutStopWords.Length}"); + Console.WriteLine("Number of words: " + prediction.WordsWithoutStopWords + .Length); // Print the word vector without stop words. - Console.WriteLine($"\nWords without stop words: {string.Join(",", prediction.WordsWithoutStopWords)}"); + Console.WriteLine("\nWords without stop words: " + string.Join(",", + prediction.WordsWithoutStopWords)); // Expected output: // Number of words: 11 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs index 2596688a50..55a0f0c955 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/RemoveStopWords.cs @@ -8,13 +8,15 @@ public static class RemoveStopWords { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'RemoveStopWords' does not require training data as - // the estimator ('CustomStopWordsRemovingEstimator') created by 'RemoveStopWords' API is not a trainable estimator. - // The empty list is only needed to pass input schema to the pipeline. + // Create an empty list as the dataset. The 'RemoveStopWords' does not + // require training data as the estimator + // ('CustomStopWordsRemovingEstimator') created by 'RemoveStopWords' API + // is not a trainable estimator. The empty list is only needed to pass + // input schema to the pipeline. var emptySamples = new List(); // Convert sample list to an empty IDataView. @@ -22,25 +24,36 @@ public static void Example() // A pipeline for removing stop words from input text/string. // The pipeline first tokenizes text into words then removes stop words. - // The 'RemoveStopWords' API ignores casing of the text/string e.g. 'tHe' and 'the' are considered the same stop words. - var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text") - .Append(mlContext.Transforms.Text.RemoveStopWords("WordsWithoutStopWords", "Words", stopwords: new[] { "a", "the", "from", "by" })); + // The 'RemoveStopWords' API ignores casing of the text/string e.g. + // 'tHe' and 'the' are considered the same stop words. + var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", + "Text") + .Append(mlContext.Transforms.Text.RemoveStopWords( + "WordsWithoutStopWords", "Words", stopwords: + new[] { "a", "the","from", "by" })); // Fit to data. var textTransformer = textPipeline.Fit(emptyDataView); - // Create the prediction engine to remove the stop words from the input text/string. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to remove the stop words from the input + // text /string. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Call the prediction API to remove stop words. - var data = new TextData() { Text = "ML.NET's RemoveStopWords API removes stop words from tHe text/string using a list of stop words provided by the user." }; + var data = new TextData() { Text = "ML.NET's RemoveStopWords API " + + "removes stop words from tHe text/string using a list of stop " + + "words provided by the user." }; + var prediction = predictionEngine.Predict(data); // Print the length of the word vector after the stop words removed. - Console.WriteLine($"Number of words: {prediction.WordsWithoutStopWords.Length}"); + Console.WriteLine("Number of words: " + prediction.WordsWithoutStopWords + .Length); // Print the word vector without stop words. - Console.WriteLine($"\nWords without stop words: {string.Join(",", prediction.WordsWithoutStopWords)}"); + Console.WriteLine("\nWords without stop words: " + string.Join(",", + prediction.WordsWithoutStopWords)); // Expected output: // Number of words: 14 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs index 7815eec0d3..ff984b4d46 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharactersAsKeys.cs @@ -8,12 +8,14 @@ public static class TokenizeIntoCharactersAsKeys { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'TokenizeIntoCharactersAsKeys' does not require training data as - // the estimator ('TokenizingByCharactersEstimator') created by 'TokenizeIntoCharactersAsKeys' API is not a trainable estimator. + // Create an empty list as the dataset. The + // 'TokenizeIntoCharactersAsKeys' does not require training data as + // the estimator ('TokenizingByCharactersEstimator') created by + // 'TokenizeIntoCharactersAsKeys' API is not a trainable estimator. // The empty list is only needed to pass input schema to the pipeline. var emptySamples = new List(); @@ -23,24 +25,33 @@ public static void Example() // A pipeline for converting text into vector of characters. // The 'TokenizeIntoCharactersAsKeys' produces result as key type. // 'MapKeyToValue' is need to map keys back to their original values. - var textPipeline = mlContext.Transforms.Text.TokenizeIntoCharactersAsKeys("CharTokens", "Text", useMarkerCharacters: false) - .Append(mlContext.Transforms.Conversion.MapKeyToValue("CharTokens")); + var textPipeline = mlContext.Transforms.Text + .TokenizeIntoCharactersAsKeys("CharTokens", "Text", + useMarkerCharacters: false) + .Append(mlContext.Transforms.Conversion.MapKeyToValue( + "CharTokens")); // Fit to data. var textTransformer = textPipeline.Fit(emptyDataView); - // Create the prediction engine to get the character vector from the input text/string. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the character vector from the + // input text/string. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Call the prediction API to convert the text into characters. - var data = new TextData() { Text = "ML.NET's TokenizeIntoCharactersAsKeys API splits text/string into characters." }; + var data = new TextData() { Text = "ML.NET's " + + "TokenizeIntoCharactersAsKeys API splits text/string into " + + "characters." }; + var prediction = predictionEngine.Predict(data); // Print the length of the character vector. Console.WriteLine($"Number of tokens: {prediction.CharTokens.Length}"); // Print the character vector. - Console.WriteLine($"\nCharacter Tokens: {string.Join(",", prediction.CharTokens)}"); + Console.WriteLine("\nCharacter Tokens: " + string.Join(",", prediction + .CharTokens)); // Expected output: // Number of tokens: 77 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs index bfc61b8a96..ad0826ad19 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoWords.cs @@ -8,31 +8,40 @@ public static class TokenizeIntoWords { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'TokenizeIntoWords' does not require training data as - // the estimator ('WordTokenizingEstimator') created by 'TokenizeIntoWords' API is not a trainable estimator. - // The empty list is only needed to pass input schema to the pipeline. + // Create an empty list as the dataset. The 'TokenizeIntoWords' does + // not require training data as the estimator + // ('WordTokenizingEstimator') created by 'TokenizeIntoWords' API is not + // a trainable estimator. The empty list is only needed to pass input + // schema to the pipeline. var emptySamples = new List(); // Convert sample list to an empty IDataView. var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); // A pipeline for converting text into vector of words. - // The following call to 'TokenizeIntoWords' tokenizes text/string into words using space as a separator. - // Space is also a default value for the 'separators' argument if it is not specified. - var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text", separators: new[] { ' ' }); + // The following call to 'TokenizeIntoWords' tokenizes text/string into + // words using space as a separator. Space is also a default value for + // the 'separators' argument if it is not specified. + var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Words", + "Text", separators: new[] { ' ' }); // Fit to data. var textTransformer = textPipeline.Fit(emptyDataView); - // Create the prediction engine to get the word vector from the input text/string. - var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); + // Create the prediction engine to get the word vector from the input + // text /string. + var predictionEngine = mlContext.Model.CreatePredictionEngine(textTransformer); // Call the prediction API to convert the text into words. - var data = new TextData() { Text = "ML.NET's TokenizeIntoWords API splits text/string into words using the list of characters provided as separators." }; + var data = new TextData() { Text = "ML.NET's TokenizeIntoWords API " + + "splits text/string into words using the list of characters " + + "provided as separators." }; + var prediction = predictionEngine.Predict(data); // Print the length of the word vector. From da95c6008f4874a83470ff218a19a473b7882991 Mon Sep 17 00:00:00 2001 From: Rayan Krishnan Date: Mon, 1 Jul 2019 19:37:31 -0700 Subject: [PATCH 2/3] transforms/timeseries formatted to 85 char --- .../TimeSeries/DetectAnomalyBySrCnn.cs | 28 +++++++++----- .../DetectAnomalyBySrCnnBatchPrediction.cs | 24 ++++++++---- .../TimeSeries/DetectChangePointBySsa.cs | 35 ++++++++++------- .../DetectChangePointBySsaBatchPrediction.cs | 36 +++++++++++------- .../DetectChangePointBySsaStream.cs | 35 ++++++++++------- .../TimeSeries/DetectIidChangePoint.cs | 38 ++++++++++++------- .../DetectIidChangePointBatchPrediction.cs | 31 +++++++++------ .../Transforms/TimeSeries/DetectIidSpike.cs | 27 ++++++++----- .../DetectIidSpikeBatchPrediction.cs | 30 +++++++++------ .../Transforms/TimeSeries/DetectSpikeBySsa.cs | 32 ++++++++++------ .../DetectSpikeBySsaBatchPrediction.cs | 36 +++++++++++------- .../Transforms/TimeSeries/Forecasting.cs | 18 +++++---- .../ForecastingWithConfidenceInterval.cs | 35 +++++++++++------ 13 files changed, 258 insertions(+), 147 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs index 9fd6b849a8..ea816e855f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs @@ -9,12 +9,13 @@ namespace Samples.Dynamic { public static class DetectAnomalyBySrCnn { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // The estimator is applied then to identify spiking points in the series. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). The estimator is applied then to + // identify spiking points in the series. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with an anomaly @@ -37,12 +38,17 @@ public static void Example() string inputColumnName = nameof(TimeSeriesData.Value); // The transformed model. - ITransformer model = ml.Transforms.DetectAnomalyBySrCnn(outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit(dataView); + ITransformer model = ml.Transforms.DetectAnomalyBySrCnn( + outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit( + dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); + + Console.WriteLine($"{outputColumnName} column obtained post-" + + $"transformation."); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tMag"); // Prediction column obtained post-transformation. @@ -102,9 +108,11 @@ public static void Example() //5 0 0.01 0.25 } - private static void PrintPrediction(float value, SrCnnAnomalyDetection prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2]); + private static void PrintPrediction(float value, SrCnnAnomalyDetection + prediction) => + + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction + .Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); private class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs index ef1d2a0de9..cfea847065 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs @@ -9,7 +9,8 @@ public static class DetectAnomalyBySrCnnBatchPrediction { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); @@ -33,12 +34,18 @@ public static void Example() string inputColumnName = nameof(TimeSeriesData.Value); // The transformed data. - var transformedData = ml.Transforms.DetectAnomalyBySrCnn(outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit(dataView).Transform(dataView); + var transformedData = ml.Transforms.DetectAnomalyBySrCnn( + outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit( + dataView).Transform(dataView); - // Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Getting the data of the newly created column as an IEnumerable of + // SrCnnAnomalyDetection. + var predictionColumn = ml.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-" + + $"transformation."); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tMag"); int k = 0; @@ -75,9 +82,10 @@ public static void Example() //5 0 0.01 0.25 } - private static void PrintPrediction(float value, SrCnnAnomalyDetection prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2]); + private static void PrintPrediction(float value, SrCnnAnomalyDetection + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction + .Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); private class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs index 0bc494790f..c6695f3977 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs @@ -9,14 +9,16 @@ namespace Samples.Dynamic { public static class DetectChangePointBySsa { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // It demonstrates stateful prediction engine that updates the state of the model and allows for saving/reloading. - // The estimator is applied then to identify points where data distribution changed. - // This estimator can account for temporal seasonality in the data. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). It demonstrates stateful prediction + // engine that updates the state of the model and allows for + // saving/reloading. The estimator is applied then to identify points where + // data distribution changed. This estimator can account for temporal + // seasonality in the data. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a recurring pattern @@ -54,12 +56,16 @@ public static void Example() int changeHistoryLength = 8; // Train the change point detector. - ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, confidence, changeHistoryLength, TrainingSize, SeasonalitySize + 1).Fit(dataView); + ITransformer model = ml.Transforms.DetectChangePointBySsa( + outputColumnName, inputColumnName, confidence, changeHistoryLength, + TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); - // Start streaming new data points with no change point to the prediction engine. + // Start streaming new data points with no change point to the + // prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); @@ -99,7 +105,8 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesEngine(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) @@ -116,9 +123,11 @@ public static void Example() } - private static void PrintPrediction(float value, ChangePointPrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + private static void PrintPrediction(float value, ChangePointPrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs index 2e0df19007..43910cfa14 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs @@ -7,16 +7,18 @@ namespace Samples.Dynamic { public static class DetectChangePointBySsaBatchPrediction { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // The estimator is applied then to identify points where data distribution changed. - // This estimator can account for temporal seasonality in the data. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). The estimator is applied then to + // identify points where data distribution changed. This estimator can + // account for temporal seasonality in the data. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern and then a change in trend + // Generate sample series data with a recurring pattern and then a + // change in trend const int SeasonalitySize = 5; const int TrainingSeasons = 3; const int TrainingSize = SeasonalitySize * TrainingSeasons; @@ -56,12 +58,18 @@ public static void Example() var outputColumnName = nameof(ChangePointPrediction.Prediction); // The transformed data. - var transformedData = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + var transformedData = ml.Transforms.DetectChangePointBySsa( + outputColumnName, inputColumnName, 95, 8, TrainingSize, + SeasonalitySize + 1).Fit(dataView).Transform(dataView); - // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Getting the data of the newly created column as an IEnumerable of + // ChangePointPrediction. + var predictionColumn = ml.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + + Console.WriteLine(outputColumnName + " column obtained " + + "post-transformation."); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); int k = 0; foreach (var prediction in predictionColumn) @@ -91,9 +99,11 @@ public static void Example() // 400 0 357.11 0.03 45298370.86 } - private static void PrintPrediction(float value, ChangePointPrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + private static void PrintPrediction(float value, ChangePointPrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs index d9a97ee5ef..419041589d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs @@ -9,14 +9,16 @@ namespace Samples.Dynamic { public static class DetectChangePointBySsaStream { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // It demonstrates stateful prediction engine that updates the state of the model and allows for saving/reloading. - // The estimator is applied then to identify points where data distribution changed. - // This estimator can account for temporal seasonality in the data. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). It demonstrates stateful prediction + // engine that updates the state of the model and allows for + // saving/reloading. The estimator is applied then to identify points where + // data distribution changed. This estimator can account for temporal + // seasonality in the data. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a recurring pattern @@ -54,12 +56,16 @@ public static void Example() int changeHistoryLength = 8; // Train the change point detector. - ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, confidence, changeHistoryLength, TrainingSize, SeasonalitySize + 1).Fit(dataView); + ITransformer model = ml.Transforms.DetectChangePointBySsa( + outputColumnName, inputColumnName, confidence, changeHistoryLength, + TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); - // Start streaming new data points with no change point to the prediction engine. + // Start streaming new data points with no change point to the + // prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); @@ -103,7 +109,8 @@ public static void Example() model = ml.Model.Load(stream, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesEngine(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) @@ -120,9 +127,11 @@ public static void Example() } - private static void PrintPrediction(float value, ChangePointPrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + private static void PrintPrediction(float value, ChangePointPrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs index 466080c49d..5ec1cb6882 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs @@ -13,12 +13,13 @@ namespace Samples.Dynamic { public static class DetectIidChangePoint { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // The estimator is applied then to identify points where data distribution changed. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). The estimator is applied then to + // identify points where data distribution changed. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a change @@ -53,12 +54,16 @@ public static void Example() string inputColumnName = nameof(TimeSeriesData.Value); // Time Series model. - ITransformer model = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView); + ITransformer model = ml.Transforms.DetectIidChangePoint( + outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); + + Console.WriteLine($"{outputColumnName} column obtained " + + $"post-transformation."); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); // Data Alert Score P-Value Martingale value @@ -88,7 +93,8 @@ public static void Example() var modelPath = "temp.zip"; engine.CheckPoint(ml, modelPath); - // Reference to current time series engine because in the next step "engine" will point to the + // Reference to current time series engine because in the next step + // "engine" will point to the // checkpointed model being loaded from disk. var timeseries1 = engine; @@ -97,7 +103,9 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // Create a time series prediction engine from the checkpointed model. - engine = model.CreateTimeSeriesEngine(ml); + engine = model.CreateTimeSeriesEngine(ml); + for (int index = 0; index < 8; index++) { // Anomaly change point detection. @@ -112,8 +120,8 @@ public static void Example() // 7 0 7.00 0.50 0.00 // 7 0 7.00 0.50 0.00 - // Prediction from the original time series engine should match the prediction from - // check pointed model. + // Prediction from the original time series engine should match the + // prediction from check pointed model. engine = timeseries1; for (int index = 0; index < 8; index++) { @@ -130,9 +138,11 @@ public static void Example() // 7 0 7.00 0.50 0.00 } - private static void PrintPrediction(float value, ChangePointPrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + private static void PrintPrediction(float value, ChangePointPrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs index 760305df33..40a1f38ecd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs @@ -11,12 +11,13 @@ namespace Samples.Dynamic { public static class DetectIidChangePointBatchPrediction { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // The estimator is applied then to identify points where data distribution changed. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). The estimator is applied then to + // identify points where data distribution changed. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a change @@ -51,12 +52,18 @@ public static void Example() string inputColumnName = nameof(TimeSeriesData.Value); // The transformed data. - var transformedData = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + var transformedData = ml.Transforms.DetectIidChangePoint( + outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView) + .Transform(dataView); - // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Getting the data of the newly created column as an IEnumerable of + // ChangePointPrediction. + var predictionColumn = ml.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained " + + $"post-transformation."); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); int k = 0; foreach (var prediction in predictionColumn) @@ -82,9 +89,11 @@ public static void Example() // 7 0 7.00 0.50 0.00 } - private static void PrintPrediction(float value, ChangePointPrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + private static void PrintPrediction(float value, ChangePointPrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs index 1395399571..274eb57b21 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs @@ -9,12 +9,13 @@ namespace Samples.Dynamic { public static class DetectIidSpike { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // The estimator is applied then to identify spiking points in the series. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). The estimator is applied then to + // identify spiking points in the series. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a spike @@ -45,12 +46,16 @@ public static void Example() string inputColumnName = nameof(TimeSeriesData.Value); // The transformed model. - ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size).Fit(dataView); + ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName, + inputColumnName, 95, Size).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); + + Console.WriteLine($"{outputColumnName} column obtained " + + $"post-transformation."); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value"); // Prediction column obtained post-transformation. @@ -96,9 +101,11 @@ public static void Example() } - private static void PrintPrediction(float value, IidSpikePrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2]); + private static void PrintPrediction(float value, IidSpikePrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2]); class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs index 4145214918..cbb587d564 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs @@ -7,12 +7,13 @@ namespace Samples.Dynamic { public static class DetectIidSpikeBatchPrediction { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // The estimator is applied then to identify spiking points in the series. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). The estimator is applied then to + // identify spiking points in the series. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a spike @@ -43,12 +44,17 @@ public static void Example() string inputColumnName = nameof(TimeSeriesData.Value); // The transformed data. - var transformedData = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + var transformedData = ml.Transforms.DetectIidSpike(outputColumnName, + inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); - // Getting the data of the newly created column as an IEnumerable of IidSpikePrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Getting the data of the newly created column as an IEnumerable of + // IidSpikePrediction. + var predictionColumn = ml.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained " + + $"post-transformation."); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value"); int k = 0; @@ -70,9 +76,11 @@ public static void Example() // 5 0 5.00 0.50 } - private static void PrintPrediction(float value, IidSpikePrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2]); + private static void PrintPrediction(float value, IidSpikePrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2]); class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs index 39198e5f8d..d6328c6431 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs @@ -9,13 +9,14 @@ namespace Samples.Dynamic { public static class DetectSpikeBySsa { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // The estimator is applied then to identify spiking points in the series. - // This estimator can account for temporal seasonality in the data. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). The estimator is applied then to + // identify spiking points in the series. This estimator can account for + // temporal seasonality in the data. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a recurring pattern @@ -51,12 +52,16 @@ public static void Example() var outputColumnName = nameof(SsaSpikePrediction.Prediction); // Train the change point detector. - ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView); + ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName, + inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit( + dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesEngine(ml); + var engine = model.CreateTimeSeriesEngine(ml); - // Start streaming new data points with no change point to the prediction engine. + // Start streaming new data points with no change point to the + // prediction engine. Console.WriteLine($"Output from spike predictions on new data:"); Console.WriteLine("Data\tAlert\tScore\tP-Value"); @@ -94,7 +99,8 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesEngine(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) @@ -107,9 +113,11 @@ public static void Example() // 4 0 -23.24 0.28 } - private static void PrintPrediction(float value, SsaSpikePrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2]); + private static void PrintPrediction(float value, SsaSpikePrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2]); class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs index cc6a798dcf..0d5575c4ed 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs @@ -7,16 +7,18 @@ namespace Samples.Dynamic { public static class DetectSpikeBySsaBatchPrediction { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). - // The estimator is applied then to identify spiking points in the series. - // This estimator can account for temporal seasonality in the data. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot). The estimator is applied then to + // identify spiking points in the series. This estimator can account for + // temporal seasonality in the data. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern and a spike within the pattern + // Generate sample series data with a recurring pattern and a spike + // within the pattern const int SeasonalitySize = 5; const int TrainingSeasons = 3; const int TrainingSize = SeasonalitySize * TrainingSeasons; @@ -58,12 +60,18 @@ public static void Example() var outputColumnName = nameof(SsaSpikePrediction.Prediction); // The transformed data. - var transformedData = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + var transformedData = ml.Transforms.DetectSpikeBySsa(outputColumnName, + inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit( + dataView).Transform(dataView); - // Getting the data of the newly created column as an IEnumerable of SsaSpikePrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Getting the data of the newly created column as an IEnumerable of + // SsaSpikePrediction. + var predictionColumn = ml.Data.CreateEnumerable( + transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained " + + $"post-transformation."); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value"); int k = 0; foreach (var prediction in predictionColumn) @@ -94,9 +102,11 @@ public static void Example() // 4 0 -29.82 0.21 } - private static void PrintPrediction(float value, SsaSpikePrediction prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], - prediction.Prediction[1], prediction.Prediction[2]); + private static void PrintPrediction(float value, SsaSpikePrediction + prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2]); class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs index 0be475ff58..07f0eef23d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -8,12 +8,12 @@ namespace Samples.Dynamic { public static class Forecasting { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot) and then - // does forecasting. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot) and then does forecasting. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a recurring pattern. @@ -46,13 +46,16 @@ public static void Example() var outputColumnName = nameof(ForecastResult.Forecast); // Instantiate the forecasting model. - var model = ml.Forecasting.ForecastBySsa(outputColumnName, inputColumnName, 5, 11, data.Count, 5); + var model = ml.Forecasting.ForecastBySsa(outputColumnName, + inputColumnName, 5, 11, data.Count, 5); // Train. var transformer = model.Fit(dataView); // Forecast next five values. - var forecastEngine = transformer.CreateTimeSeriesEngine(ml); + var forecastEngine = transformer.CreateTimeSeriesEngine(ml); + var forecast = forecastEngine.Predict(); Console.WriteLine($"Forecasted values:"); @@ -76,7 +79,8 @@ public static void Example() modelCopy = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); + var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine< + TimeSeriesData, ForecastResult>(ml); // Forecast with the checkpointed model loaded from disk. forecast = forecastEngineCopy.Predict(); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs index 571cf9325a..f4c230accc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs @@ -8,12 +8,12 @@ namespace Samples.Dynamic { public static class ForecastingWithConfidenceInternal { - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot) and then - // does forecasting. + // This example creates a time series (list of Data with the i-th element + // corresponding to the i-th time slot) and then does forecasting. public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Generate sample series data with a recurring pattern. @@ -46,7 +46,8 @@ public static void Example() var outputColumnName = nameof(ForecastResult.Forecast); // Instantiate the forecasting model. - var model = ml.Forecasting.ForecastBySsa(outputColumnName, inputColumnName, 5, 11, data.Count, 5, + var model = ml.Forecasting.ForecastBySsa(outputColumnName, + inputColumnName, 5, 11, data.Count, 5, confidenceLevel: 0.95f, forcastingConfidentLowerBoundColumnName: "ConfidenceLowerBound", forcastingConfidentUpperBoundColumnName: "ConfidenceUpperBound"); @@ -55,10 +56,13 @@ public static void Example() var transformer = model.Fit(dataView); // Forecast next five values. - var forecastEngine = transformer.CreateTimeSeriesEngine(ml); + var forecastEngine = transformer.CreateTimeSeriesEngine(ml); + var forecast = forecastEngine.Predict(); - PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); + PrintForecastValuesAndIntervals(forecast.Forecast, forecast + .ConfidenceLowerBound, forecast.ConfidenceUpperBound); // Forecasted values: // [1.977226, 1.020494, 1.760543, 3.437509, 4.266461] // Confidence intervals: @@ -80,30 +84,37 @@ public static void Example() modelCopy = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); + var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine< + TimeSeriesData, ForecastResult>(ml); // Forecast with the checkpointed model loaded from disk. forecast = forecastEngineCopy.Predict(); - PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); + PrintForecastValuesAndIntervals(forecast.Forecast, forecast + .ConfidenceLowerBound, forecast.ConfidenceUpperBound); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Confidence intervals: // [0.1592142 - 3.423448] [-0.5617217 - 3.072772] [-1.512994 - 2.125025] [-2.022905 - 1.622013] [-1.351382 - 2.482941] // Forecast with the original model(that was checkpointed to disk). forecast = forecastEngine.Predict(); - PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); + PrintForecastValuesAndIntervals(forecast.Forecast, + forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Confidence intervals: // [0.1592142 - 3.423448] [-0.5617217 - 3.072772] [-1.512994 - 2.125025] [-2.022905 - 1.622013] [-1.351382 - 2.482941] } - static void PrintForecastValuesAndIntervals(float[] forecast, float[] confidenceIntervalLowerBounds, float[] confidenceIntervalUpperBounds) + static void PrintForecastValuesAndIntervals(float[] forecast, float[] + confidenceIntervalLowerBounds, float[] confidenceIntervalUpperBounds) { Console.WriteLine($"Forecasted values:"); Console.WriteLine("[{0}]", string.Join(", ", forecast)); Console.WriteLine($"Confidence intervals:"); for (int index = 0; index < forecast.Length; index++) - Console.Write($"[{confidenceIntervalLowerBounds[index]} - {confidenceIntervalUpperBounds[index]}] "); + Console.Write($"[{confidenceIntervalLowerBounds[index]} -" + + $" {confidenceIntervalUpperBounds[index]}] "); Console.WriteLine(); } From e6aca2a77675f0757f2aa465b4d8d004d91d8ca9 Mon Sep 17 00:00:00 2001 From: Rayan Krishnan Date: Tue, 2 Jul 2019 15:53:49 -0700 Subject: [PATCH 3/3] minor tab and spacing fixes --- .../Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs | 3 +-- .../TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs | 2 +- .../Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs | 4 ++-- .../TimeSeries/DetectChangePointBySsaBatchPrediction.cs | 4 ++-- .../Transforms/TimeSeries/DetectChangePointBySsaStream.cs | 4 ++-- .../Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs | 4 ++-- .../TimeSeries/DetectIidChangePointBatchPrediction.cs | 4 ++-- .../Dynamic/Transforms/TimeSeries/DetectIidSpike.cs | 4 ++-- .../Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs | 4 ++-- .../Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs | 4 ++-- .../Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs | 4 ++-- 11 files changed, 20 insertions(+), 21 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs index ea816e855f..db29b3ef82 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs @@ -110,9 +110,8 @@ public static void Example() private static void PrintPrediction(float value, SrCnnAnomalyDetection prediction) => - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction - .Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); + .Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); private class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs index cfea847065..5ef334bc76 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnnBatchPrediction.cs @@ -85,7 +85,7 @@ public static void Example() private static void PrintPrediction(float value, SrCnnAnomalyDetection prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction - .Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); + .Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); private class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs index c6695f3977..7d545770f9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs @@ -126,8 +126,8 @@ public static void Example() private static void PrintPrediction(float value, ChangePointPrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2], prediction.Prediction[3]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs index 43910cfa14..25819052d8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs @@ -102,8 +102,8 @@ public static void Example() private static void PrintPrediction(float value, ChangePointPrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2], prediction.Prediction[3]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs index 419041589d..c65d3af987 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs @@ -130,8 +130,8 @@ public static void Example() private static void PrintPrediction(float value, ChangePointPrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2], prediction.Prediction[3]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs index 5ec1cb6882..4e44a73607 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs @@ -141,8 +141,8 @@ public static void Example() private static void PrintPrediction(float value, ChangePointPrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2], prediction.Prediction[3]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs index 40a1f38ecd..35066b79e1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs @@ -92,8 +92,8 @@ public static void Example() private static void PrintPrediction(float value, ChangePointPrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2], prediction.Prediction[3]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2], prediction.Prediction[3]); class ChangePointPrediction { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs index 274eb57b21..637e5ccbd8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs @@ -104,8 +104,8 @@ public static void Example() private static void PrintPrediction(float value, IidSpikePrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2]); class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs index cbb587d564..e1c14e1e7d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs @@ -79,8 +79,8 @@ public static void Example() private static void PrintPrediction(float value, IidSpikePrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2]); class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs index d6328c6431..79a22272fa 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs @@ -116,8 +116,8 @@ public static void Example() private static void PrintPrediction(float value, SsaSpikePrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2]); class TimeSeriesData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs index 0d5575c4ed..9104f3af26 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs @@ -105,8 +105,8 @@ public static void Example() private static void PrintPrediction(float value, SsaSpikePrediction prediction) => Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, - prediction.Prediction[0], prediction.Prediction[1], - prediction.Prediction[2]); + prediction.Prediction[0], prediction.Prediction[1], + prediction.Prediction[2]); class TimeSeriesData {