Skip to content

Commit c1f99ae

Browse files
authored
Update Live with Master (#448)
* Update Sentiment Analysis tutorial for ML .NET .7 (#440) * initial revision * Added saveFileAsZip call * updated with feedback * revising for 0.7 * Updated ctx evaluator * Syncronized snippets * Revised IterateModel mthod to TrainFinalModel * updated to ML .NET 0.7.0 * Removed TrainFinalModel * Revising Taxi fare regression tutorial sample for ML.NET .7 release (#441) * Revise taxi fare tutorial to .7 release * Sychronized snippets * Updateds to ML .NET 0.7.0 * Revised based on feedback * remove multifilesource and renumber snippets (#449)
1 parent 53ede52 commit c1f99ae

File tree

5 files changed

+324
-149
lines changed

5 files changed

+324
-149
lines changed

machine-learning/tutorials/SentimentAnalysis/Program.cs

Lines changed: 164 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -3,102 +3,126 @@
33
using System.Collections.Generic;
44
using System.IO;
55
using System.Linq;
6-
using System.Threading.Tasks;
7-
using Microsoft.ML.Legacy;
8-
using Microsoft.ML.Legacy.Models;
9-
using Microsoft.ML.Legacy.Data;
10-
using Microsoft.ML.Legacy.Transforms;
11-
using Microsoft.ML.Legacy.Trainers;
6+
using Microsoft.ML;
7+
using Microsoft.ML.Core.Data;
128
using Microsoft.ML.Runtime.Api;
9+
using Microsoft.ML.Runtime.Data;
10+
using Microsoft.ML.Transforms.Text;
1311
// </Snippet1>
1412

1513
namespace SentimentAnalysis
1614
{
1715
class Program
1816
{
1917
// <Snippet2>
20-
static readonly string _dataPath = Path.Combine(Environment.CurrentDirectory, "Data", "wikipedia-detox-250-line-data.tsv");
18+
static readonly string _trainDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "wikipedia-detox-250-line-data.tsv");
2119
static readonly string _testDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "wikipedia-detox-250-line-test.tsv");
22-
static readonly string _modelpath = Path.Combine(Environment.CurrentDirectory, "Data", "Model.zip");
20+
static readonly string _modelPath = Path.Combine(Environment.CurrentDirectory, "Data", "Model.zip");
21+
static TextLoader _textLoader;
2322
// </Snippet2>
2423

25-
static async Task Main(string[] args)
24+
static void Main(string[] args)
2625
{
26+
// Create ML.NET context/local environment - allows you to add steps in order to keep everything together
27+
// during the learning process.
28+
//Create ML Context with seed for repeatable/deterministic results
2729
// <Snippet3>
28-
var model = await Train();
30+
MLContext mlContext = new MLContext(seed: 0);
2931
// </Snippet3>
3032

31-
// <Snippet12>
32-
Evaluate(model);
33-
// </Snippet12>
34-
35-
// <Snippet17>
36-
Predict(model);
37-
// </Snippet17>
38-
}
33+
// The TextLoader loads a dataset with comments and corresponding postive or negative sentiment.
34+
// When you create a loader, you specify the schema by passing a class to the loader containing
35+
// all the column names and their types. This is used to create the model, and train it.
36+
// Initialize our TextLoader
37+
// <Snippet4>
38+
_textLoader = mlContext.Data.TextReader(new TextLoader.Arguments()
39+
{
40+
Separator = "tab",
41+
HasHeader = true,
42+
Column = new[]
43+
{
44+
new TextLoader.Column("Label", DataKind.Bool, 0),
45+
new TextLoader.Column("SentimentText", DataKind.Text, 1)
46+
}
47+
}
48+
);
49+
// </Snippet4>
3950

40-
public static async Task<PredictionModel<SentimentData, SentimentPrediction>> Train()
41-
{
42-
// LearningPipeline allows you to add steps in order to keep everything together
43-
// during the learning process.
4451
// <Snippet5>
45-
var pipeline = new LearningPipeline();
52+
var model = Train(mlContext, _trainDataPath);
4653
// </Snippet5>
4754

48-
// The TextLoader loads a dataset with comments and corresponding postive or negative sentiment.
49-
// When you create a loader, you specify the schema by passing a class to the loader containing
50-
// all the column names and their types. This is used to create the model, and train it.
55+
// <Snippet11>
56+
Evaluate(mlContext, model);
57+
// </Snippet11>
58+
59+
// <Snippet16>
60+
Predict(mlContext, model);
61+
// </Snippet16>
62+
63+
// <Snippet25>
64+
PredictWithModelLoadedFromFile(mlContext);
65+
// <Snippet25>
66+
67+
Console.WriteLine();
68+
Console.WriteLine("=============== End of process ===============");
69+
}
70+
71+
public static ITransformer Train(MLContext mlContext, string dataPath)
72+
{
73+
//Note that this case, loading your training data from a file,
74+
//is the easiest way to get started, but ML.NET also allows you
75+
//to load data from databases or in-memory collections.
5176
// <Snippet6>
52-
pipeline.Add(new TextLoader(_dataPath).CreateFrom<SentimentData>());
77+
IDataView dataView =_textLoader.Read(dataPath);
5378
// </Snippet6>
5479

55-
// TextFeaturizer is a transform that is used to featurize an input column.
56-
// This is used to format and clean the data.
80+
// Create a flexible pipeline (composed by a chain of estimators) for creating/training the model.
81+
// This is used to format and clean the data.
82+
// Convert the text column to numeric vectors (Features column)
5783
// <Snippet7>
58-
pipeline.Add(new TextFeaturizer("Features", "SentimentText"));
59-
//</Snippet7>
84+
var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
85+
//</Snippet7>
6086

61-
// Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and
62-
// three hyperparameters to be used for tuning decision tree performance.
63-
// <Snippet8>
64-
pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 50, NumTrees = 50, MinDocumentsInLeafs = 20 });
87+
// Adds a FastTreeBinaryClassificationTrainer, the decision tree learner for this project
88+
// <Snippet8>
89+
.Append(mlContext.BinaryClassification.Trainers.FastTree(numLeaves: 50, numTrees: 50, minDatapointsInLeafs: 20));
6590
// </Snippet8>
6691

67-
// Train the pipeline based on the dataset that has been loaded, transformed.
92+
// Create and train the model based on the dataset that has been loaded, transformed.
6893
// <Snippet9>
69-
PredictionModel<SentimentData, SentimentPrediction> model =
70-
pipeline.Train<SentimentData, SentimentPrediction>();
94+
Console.WriteLine("=============== Create and Train the Model ===============");
95+
var model = pipeline.Fit(dataView);
96+
Console.WriteLine("=============== End of training ===============");
97+
Console.WriteLine();
7198
// </Snippet9>
7299

73-
// Saves the model we trained to a zip file.
74-
// <Snippet10>
75-
await model.WriteAsync(_modelpath);
76-
// </Snippet10>
77-
78100
// Returns the model we trained to use for evaluation.
79-
// <Snippet11>
101+
// <Snippet10>
80102
return model;
81-
// </Snippet11>
103+
// </Snippet10>
82104
}
83105

84-
public static void Evaluate(PredictionModel<SentimentData, SentimentPrediction> model)
106+
public static void Evaluate(MLContext mlContext, ITransformer model)
85107
{
86-
// Evaluates.
108+
// Evaluate the model and show accuracy stats
109+
// Load evaluation/test data
110+
// <Snippet12>
111+
IDataView dataView = _textLoader.Read(_testDataPath);
112+
// </Snippet12>
113+
114+
//Take the data in, make transformations, output the data.
87115
// <Snippet13>
88-
var testData = new TextLoader(_testDataPath).CreateFrom<SentimentData>();
116+
Console.WriteLine("=============== Evaluating Model accuracy with Test data===============");
117+
var predictions = model.Transform(dataView);
89118
// </Snippet13>
90119

91-
// BinaryClassificationEvaluator computes the quality metrics for the PredictionModel
92-
// using the specified data set.
120+
// BinaryClassificationContext.Evaluate returns a BinaryClassificationEvaluator.CalibratedResult
121+
// that contains the computed overall metrics.
93122
// <Snippet14>
94-
var evaluator = new BinaryClassificationEvaluator();
123+
var metrics = mlContext.BinaryClassification.Evaluate(predictions, "Label");
95124
// </Snippet14>
96125

97-
// BinaryClassificationMetrics contains the overall metrics computed by binary classification evaluators.
98-
// <Snippet15>
99-
BinaryClassificationMetrics metrics = evaluator.Evaluate(model, testData);
100-
// </Snippet15>
101-
102126
// The Accuracy metric gets the accuracy of a classifier, which is the proportion
103127
// of correct predictions in the test set.
104128

@@ -111,57 +135,117 @@ public static void Evaluate(PredictionModel<SentimentData, SentimentPrediction>
111135
// The F1 score is the harmonic mean of precision and recall:
112136
// 2 * precision * recall / (precision + recall).
113137

114-
// <Snippet16>
138+
// <Snippet15>
115139
Console.WriteLine();
116-
Console.WriteLine("PredictionModel quality metrics evaluation");
117-
Console.WriteLine("------------------------------------------");
140+
Console.WriteLine("Model quality metrics evaluation");
141+
Console.WriteLine("--------------------------------");
118142
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
119143
Console.WriteLine($"Auc: {metrics.Auc:P2}");
120144
Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
121-
// </Snippet16>
145+
Console.WriteLine("=============== End of model evaluation ===============");
146+
//</Snippet15>
147+
148+
// Save the new model to .ZIP file
149+
// <Snippet23>
150+
SaveModelAsFile(mlContext, model);
151+
// </Snippet23>
122152
}
123153

124-
public static void Predict(PredictionModel<SentimentData, SentimentPrediction> model)
154+
private static void Predict(MLContext mlContext, ITransformer model)
125155
{
126-
// Adds some comments to test the trained model's predictions.
156+
// <Snippet17>
157+
var predictionFunction = model.MakePredictionFunction<SentimentData, SentimentPrediction>(mlContext);
158+
// </Snippet17>
159+
127160
// <Snippet18>
161+
SentimentData sampleStatement = new SentimentData
162+
{
163+
SentimentText = "This is a very rude movie"
164+
};
165+
// </Snippet18>
166+
167+
// <Snippet19>
168+
var resultprediction = predictionFunction.Predict(sampleStatement);
169+
// </Snippet19>
170+
// <Snippet20>
171+
Console.WriteLine();
172+
Console.WriteLine("=============== Prediction Test of model with a single sample and test dataset ===============");
173+
174+
Console.WriteLine();
175+
Console.WriteLine($"Sentiment: {sampleStatement.SentimentText} | Prediction: {(Convert.ToBoolean(resultprediction.Prediction) ? "Toxic" : "Not Toxic")} | Probability: {resultprediction.Probability} ");
176+
Console.WriteLine("=============== End of Predictions ===============");
177+
Console.WriteLine();
178+
// </Snippet20>
179+
}
180+
181+
public static void PredictWithModelLoadedFromFile(MLContext mlContext)
182+
{
183+
// Adds some comments to test the trained model's predictions.
184+
// <Snippet26>
128185
IEnumerable<SentimentData> sentiments = new[]
129186
{
130187
new SentimentData
131188
{
132-
SentimentText = "Please refrain from adding nonsense to Wikipedia."
189+
SentimentText = "This is a very rude movie"
133190
},
134191
new SentimentData
135192
{
136193
SentimentText = "He is the best, and the article should say that."
137194
}
138195
};
139-
// </Snippet18>
196+
// </Snippet26>
140197

141-
// Use the model to predict the positive
142-
// or negative sentiment of the comment data.
143-
// <Snippet19>
144-
IEnumerable<SentimentPrediction> predictions = model.Predict(sentiments);
145-
// </Snippet19>
198+
// <Snippet27>
199+
ITransformer loadedModel;
200+
using (var stream = new FileStream(_modelPath, FileMode.Open, FileAccess.Read, FileShare.Read))
201+
{
202+
loadedModel = mlContext.Model.Load(stream);
203+
}
204+
// </Snippet27>
205+
206+
// <Snippet28>
207+
// Create prediction engine
208+
var sentimentStreamingDataView = mlContext.CreateStreamingDataView(sentiments);
209+
var predictions = loadedModel.Transform(sentimentStreamingDataView);
210+
211+
// Use the model to predict whether comment data is toxic (1) or nice (0).
212+
var predictedResults = predictions.AsEnumerable<SentimentPrediction>(mlContext, reuseRowObject: false);
213+
// </Snippet28>
214+
215+
// <Snippet29>
216+
Console.WriteLine();
217+
218+
Console.WriteLine("=============== Prediction Test of loaded model with a multiple samples ===============");
219+
// </Snippet29>
146220

147-
// <Snippet20>
148221
Console.WriteLine();
149-
Console.WriteLine("Sentiment Predictions");
150-
Console.WriteLine("---------------------");
151-
// </Snippet20>
152222

153223
// Builds pairs of (sentiment, prediction)
154-
// <Snippet21>
155-
var sentimentsAndPredictions = sentiments.Zip(predictions, (sentiment, prediction) => (sentiment, prediction));
156-
// </Snippet21>
224+
// <Snippet30>
225+
var sentimentsAndPredictions = sentiments.Zip(predictedResults, (sentiment, prediction) => (sentiment, prediction));
226+
// </Snippet30>
157227

158-
// <Snippet22>
228+
// <Snippet31>
159229
foreach (var item in sentimentsAndPredictions)
160230
{
161-
Console.WriteLine($"Sentiment: {item.sentiment.SentimentText} | Prediction: {(item.prediction.Sentiment ? "Negative" : "Positive")}");
231+
Console.WriteLine($"Sentiment: {item.sentiment.SentimentText} | Prediction: {(Convert.ToBoolean(item.prediction.Prediction) ? "Toxic" : "Not Toxic")} | Probability: {item.prediction.Probability} ");
162232
}
163-
Console.WriteLine();
164-
// </Snippet22>
233+
Console.WriteLine("=============== End of predictions ===============");
234+
235+
// </Snippet31>
236+
}
237+
238+
// Saves the model we trained to a zip file.
239+
240+
private static void SaveModelAsFile(MLContext mlContext, ITransformer model)
241+
{
242+
// <Snippet24>
243+
using (var fs = new FileStream(_modelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
244+
mlContext.Model.Save(model,fs);
245+
// </Snippet24>
246+
247+
Console.WriteLine("The model is saved to {0}", _modelPath);
165248
}
249+
166250
}
167251
}

machine-learning/tutorials/SentimentAnalysis/SentimentAnalysis.csproj

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,13 @@
1414
</ItemGroup>
1515

1616
<ItemGroup>
17-
<PackageReference Include="Microsoft.ML" Version="0.6.0" />
17+
<PackageReference Include="Microsoft.ML" Version="0.7.0" />
1818
</ItemGroup>
1919

2020
<ItemGroup>
21+
<None Update="Data\wikipedia-detox-250-line-all.tsv">
22+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
23+
</None>
2124
<None Update="Data\wikipedia-detox-250-line-data.tsv">
2225
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
2326
</None>

machine-learning/tutorials/SentimentAnalysis/SentimentData.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,13 @@ public class SentimentData
1616
public class SentimentPrediction
1717
{
1818
[ColumnName("PredictedLabel")]
19-
public bool Sentiment;
19+
public bool Prediction { get; set; }
20+
21+
[ColumnName("Probability")]
22+
public float Probability { get; set; }
23+
24+
[ColumnName("Score")]
25+
public float Score { get; set; }
2026
}
2127
// </Snippet2>
22-
}
28+
}

0 commit comments

Comments
 (0)