Skip to content

Commit 0f49d3b

Browse files
authored
Merge pull request #831 from dotnet/master
Update live with master
2 parents d07255c + 1fcb13a commit 0f49d3b

File tree

3 files changed

+35
-64
lines changed

3 files changed

+35
-64
lines changed

machine-learning/tutorials/SentimentAnalysis/Program.cs

Lines changed: 34 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
using System.Collections.Generic;
44
using System.IO;
55
using System.Linq;
6-
using Microsoft.Data.DataView;
76
using Microsoft.ML;
87
using Microsoft.ML.Data;
8+
using static Microsoft.ML.DataOperationsCatalog;
99
using Microsoft.ML.Trainers;
1010
using Microsoft.ML.Transforms.Text;
1111
// </SnippetAddUsings>
@@ -22,14 +22,13 @@ class Program
2222
static void Main(string[] args)
2323
{
2424
// Create ML.NET context/local environment - allows you to add steps in order to keep everything together
25-
// during the learning process.
26-
//Create ML Context with seed for repeatable/deterministic results
25+
// as you discover the ML.NET trainers and transforms
2726
// <SnippetCreateMLContext>
2827
MLContext mlContext = new MLContext();
2928
// </SnippetCreateMLContext>
3029

3130
// <SnippetCallLoadData>
32-
TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext);
31+
TrainTestData splitDataView = LoadData(mlContext);
3332
// </SnippetCallLoadData>
3433

3534

@@ -45,26 +44,28 @@ static void Main(string[] args)
4544
UseModelWithSingleItem(mlContext, model);
4645
// </SnippetCallUseModelWithSingleItem>
4746

48-
// <SnippetCallUseLoadedModelWithBatchItems>
49-
UseLoadedModelWithBatchItems(mlContext);
50-
// </SnippetCallUseLoadedModelWithBatchItems>
47+
// <SnippetCallUseModelWithBatchItems>
48+
UseModelWithBatchItems(mlContext, model);
49+
// </SnippetCallUseModelWithBatchItems>
5150

5251
Console.WriteLine();
5352
Console.WriteLine("=============== End of process ===============");
5453
}
5554

56-
public static TrainCatalogBase.TrainTestData LoadData(MLContext mlContext)
55+
public static TrainTestData LoadData(MLContext mlContext)
5756
{
58-
59-
//Note that this case, loading your training data from a file,
60-
//is the easiest way to get started, but ML.NET also allows you
61-
//to load data from databases or in-memory collections.
57+
// Note that this case, loading your training data from a file,
58+
// is the easiest way to get started, but ML.NET also allows you
59+
// to load data from databases or in-memory collections.
6260
// <SnippetLoadData>
63-
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentData>(_dataPath,hasHeader:false);
61+
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentData>(_dataPath, hasHeader: false);
6462
// </SnippetLoadData>
6563

64+
// You need both a training dataset to train the model and a test dataset to evaluate the model.
65+
// Split the loaded dataset into train and test datasets
66+
// Specify test dataset percentage with the `testFraction`parameter
6667
// <SnippetSplitData>
67-
TrainCatalogBase.TrainTestData splitDataView = mlContext.BinaryClassification.TrainTestSplit(dataView, testFraction: 0.2);
68+
TrainTestData splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
6869
// </SnippetSplitData>
6970

7071
// <SnippetReturnSplitData>
@@ -74,22 +75,21 @@ public static TrainCatalogBase.TrainTestData LoadData(MLContext mlContext)
7475

7576
public static ITransformer BuildAndTrainModel(MLContext mlContext, IDataView splitTrainSet)
7677
{
77-
7878
// Create a flexible pipeline (composed by a chain of estimators) for creating/training the model.
7979
// This is used to format and clean the data.
8080
// Convert the text column to numeric vectors (Features column)
8181
// <SnippetFeaturizeText>
82-
var pipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: DefaultColumnNames.Features, inputColumnName: nameof(SentimentData.SentimentText))
82+
var estimator = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentData.SentimentText))
8383
//</SnippetFeaturizeText>
84-
// Adds a FastTreeBinaryClassificationTrainer, the decision tree learner for this project
84+
// append the machine learning task to the estimator
8585
// <SnippetAddTrainer>
86-
.Append(mlContext.BinaryClassification.Trainers.FastTree(numLeaves: 50, numTrees: 50, minDatapointsInLeaves: 20));
86+
.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"));
8787
// </SnippetAddTrainer>
8888

8989
// Create and train the model based on the dataset that has been loaded, transformed.
9090
// <SnippetTrainModel>
9191
Console.WriteLine("=============== Create and Train the Model ===============");
92-
var model = pipeline.Fit(splitTrainSet);
92+
var model = estimator.Fit(splitTrainSet);
9393
Console.WriteLine("=============== End of training ===============");
9494
Console.WriteLine();
9595
// </SnippetTrainModel>
@@ -116,38 +116,32 @@ public static void Evaluate(MLContext mlContext, ITransformer model, IDataView s
116116
CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(predictions, "Label");
117117
// </SnippetEvaluate>
118118

119-
// The Accuracy metric gets the accuracy of a classifier, which is the proportion
119+
// The Accuracy metric gets the accuracy of a model, which is the proportion
120120
// of correct predictions in the test set.
121121

122-
// The Auc metric gets the area under the ROC curve.
123-
// The area under the ROC curve is equal to the probability that the classifier ranks
124-
// a randomly chosen positive instance higher than a randomly chosen negative one
125-
// (assuming 'positive' ranks higher than 'negative').
122+
// The AreaUnderRocCurve metric is an indicator of how confident the model is
123+
// correctly classifying the positive and negative classes as such.
126124

127-
// The F1Score metric gets the classifier's F1 score.
128-
// The F1 score is the harmonic mean of precision and recall:
125+
// The F1Score metric gets the model's F1 score.
126+
// F1 is a measure of tradeoff between precision and recall.
129127
// 2 * precision * recall / (precision + recall).
130128

131129
// <SnippetDisplayMetrics>
132130
Console.WriteLine();
133131
Console.WriteLine("Model quality metrics evaluation");
134132
Console.WriteLine("--------------------------------");
135-
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
136-
Console.WriteLine($"Auc: {metrics.Auc:P2}");
137-
Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
133+
Console.WriteLine($" Accuracy: {metrics.Accuracy:P2}");
134+
Console.WriteLine($"Area Under Roc Curve: {metrics.AreaUnderRocCurve:P2}");
135+
Console.WriteLine($" F1Score: {metrics.F1Score:P2}");
138136
Console.WriteLine("=============== End of model evaluation ===============");
139137
//</SnippetDisplayMetrics>
140138

141-
// Save the new model to .ZIP file
142-
// <SnippetCallSaveModel>
143-
SaveModelAsFile(mlContext, model);
144-
// </SnippetCallSaveModel>
145139
}
146140

147141
private static void UseModelWithSingleItem(MLContext mlContext, ITransformer model)
148142
{
149143
// <SnippetCreatePredictionEngine1>
150-
PredictionEngine<SentimentData, SentimentPrediction> predictionFunction = model.CreatePredictionEngine<SentimentData, SentimentPrediction>(mlContext);
144+
PredictionEngine<SentimentData, SentimentPrediction> predictionFunction = mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(model);
151145
// </SnippetCreatePredictionEngine1>
152146

153147
// <SnippetCreateTestIssue1>
@@ -172,9 +166,9 @@ private static void UseModelWithSingleItem(MLContext mlContext, ITransformer mod
172166
// </SnippetOutputPrediction>
173167
}
174168

175-
public static void UseLoadedModelWithBatchItems(MLContext mlContext)
169+
public static void UseModelWithBatchItems(MLContext mlContext, ITransformer model)
176170
{
177-
// Adds some comments to test the trained model's predictions.
171+
// Adds some comments to test the trained model's data points.
178172
// <SnippetCreateTestIssues>
179173
IEnumerable<SentimentData> sentiments = new[]
180174
{
@@ -189,19 +183,11 @@ public static void UseLoadedModelWithBatchItems(MLContext mlContext)
189183
};
190184
// </SnippetCreateTestIssues>
191185

192-
// <SnippetLoadModel>
193-
ITransformer loadedModel;
194-
using (var stream = new FileStream(_modelPath, FileMode.Open, FileAccess.Read, FileShare.Read))
195-
{
196-
loadedModel = mlContext.Model.Load(stream);
197-
}
198-
// </SnippetLoadModel>
199-
200-
// Load test data
186+
// Load batch comments just created
201187
// <SnippetPrediction>
202-
IDataView sentimentStreamingDataView = mlContext.Data.LoadFromEnumerable(sentiments);
188+
IDataView batchComments = mlContext.Data.LoadFromEnumerable(sentiments);
203189

204-
IDataView predictions = loadedModel.Transform(sentimentStreamingDataView);
190+
IDataView predictions = model.Transform(batchComments);
205191

206192
// Use model to predict whether comment data is Positive (1) or Negative (0).
207193
IEnumerable<SentimentPrediction> predictedResults = mlContext.Data.CreateEnumerable<SentimentPrediction>(predictions, reuseRowObject: false);
@@ -210,7 +196,7 @@ public static void UseLoadedModelWithBatchItems(MLContext mlContext)
210196
// <SnippetAddInfoMessage>
211197
Console.WriteLine();
212198

213-
Console.WriteLine("=============== Prediction Test of loaded model with a multiple samples ===============");
199+
Console.WriteLine("=============== Prediction Test of loaded model with multiple samples ===============");
214200
// </SnippetAddInfoMessage>
215201

216202
Console.WriteLine();
@@ -227,21 +213,8 @@ public static void UseLoadedModelWithBatchItems(MLContext mlContext)
227213

228214
}
229215
Console.WriteLine("=============== End of predictions ===============");
230-
231216
// </SnippetDisplayResults>
232217
}
233218

234-
// Saves the model we trained to a zip file.
235-
236-
private static void SaveModelAsFile(MLContext mlContext, ITransformer model)
237-
{
238-
// <SnippetSaveModel>
239-
using (var fs = new FileStream(_modelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
240-
mlContext.Model.Save(model, fs);
241-
// </SnippetSaveModel>
242-
243-
Console.WriteLine("The model is saved to {0}", _modelPath);
244-
}
245-
246219
}
247220
}

machine-learning/tutorials/SentimentAnalysis/SentimentAnalysis.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
</PropertyGroup>
1111

1212
<ItemGroup>
13-
<PackageReference Include="Microsoft.ML" Version="0.11.0" />
13+
<PackageReference Include="Microsoft.ML" Version="1.0.0-preview" />
1414
</ItemGroup>
1515

1616
<ItemGroup>

machine-learning/tutorials/SentimentAnalysis/SentimentData.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@ public class SentimentPrediction
1919
[ColumnName("PredictedLabel")]
2020
public bool Prediction { get; set; }
2121

22-
// [ColumnName("Probability")]
2322
public float Probability { get; set; }
2423

25-
// [ColumnName("Score")]
2624
public float Score { get; set; }
2725
}
2826
// </SnippetDeclareTypes>

0 commit comments

Comments
 (0)