Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 34 additions & 61 deletions machine-learning/tutorials/SentimentAnalysis/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.Data.DataView;
using Microsoft.ML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms.Text;
// </SnippetAddUsings>
Expand All @@ -22,14 +22,13 @@ class Program
static void Main(string[] args)
{
// Create ML.NET context/local environment - allows you to add steps in order to keep everything together
// during the learning process.
//Create ML Context with seed for repeatable/deterministic results
// as you discover the ML.NET trainers and transforms
// <SnippetCreateMLContext>
MLContext mlContext = new MLContext();
// </SnippetCreateMLContext>

// <SnippetCallLoadData>
TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext);
TrainTestData splitDataView = LoadData(mlContext);
// </SnippetCallLoadData>


Expand All @@ -45,26 +44,28 @@ static void Main(string[] args)
UseModelWithSingleItem(mlContext, model);
// </SnippetCallUseModelWithSingleItem>

// <SnippetCallUseLoadedModelWithBatchItems>
UseLoadedModelWithBatchItems(mlContext);
// </SnippetCallUseLoadedModelWithBatchItems>
// <SnippetCallUseModelWithBatchItems>
UseModelWithBatchItems(mlContext, model);
// </SnippetCallUseModelWithBatchItems>

Console.WriteLine();
Console.WriteLine("=============== End of process ===============");
}

public static TrainCatalogBase.TrainTestData LoadData(MLContext mlContext)
public static TrainTestData LoadData(MLContext mlContext)
{

//Note that this case, loading your training data from a file,
//is the easiest way to get started, but ML.NET also allows you
//to load data from databases or in-memory collections.
// Note that this case, loading your training data from a file,
// is the easiest way to get started, but ML.NET also allows you
// to load data from databases or in-memory collections.
// <SnippetLoadData>
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentData>(_dataPath,hasHeader:false);
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentData>(_dataPath, hasHeader: false);
// </SnippetLoadData>

// You need both a training dataset to train the model and a test dataset to evaluate the model.
// Split the loaded dataset into train and test datasets
// Specify test dataset percentage with the `testFraction`parameter
// <SnippetSplitData>
TrainCatalogBase.TrainTestData splitDataView = mlContext.BinaryClassification.TrainTestSplit(dataView, testFraction: 0.2);
TrainTestData splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
// </SnippetSplitData>

// <SnippetReturnSplitData>
Expand All @@ -74,22 +75,21 @@ public static TrainCatalogBase.TrainTestData LoadData(MLContext mlContext)

public static ITransformer BuildAndTrainModel(MLContext mlContext, IDataView splitTrainSet)
{

// Create a flexible pipeline (composed by a chain of estimators) for creating/training the model.
// This is used to format and clean the data.
// Convert the text column to numeric vectors (Features column)
// <SnippetFeaturizeText>
var pipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: DefaultColumnNames.Features, inputColumnName: nameof(SentimentData.SentimentText))
var estimator = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentData.SentimentText))
//</SnippetFeaturizeText>
// Adds a FastTreeBinaryClassificationTrainer, the decision tree learner for this project
// append the machine learning task to the estimator
// <SnippetAddTrainer>
.Append(mlContext.BinaryClassification.Trainers.FastTree(numLeaves: 50, numTrees: 50, minDatapointsInLeaves: 20));
.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"));
// </SnippetAddTrainer>

// Create and train the model based on the dataset that has been loaded, transformed.
// <SnippetTrainModel>
Console.WriteLine("=============== Create and Train the Model ===============");
var model = pipeline.Fit(splitTrainSet);
var model = estimator.Fit(splitTrainSet);
Console.WriteLine("=============== End of training ===============");
Console.WriteLine();
// </SnippetTrainModel>
Expand All @@ -116,38 +116,32 @@ public static void Evaluate(MLContext mlContext, ITransformer model, IDataView s
CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(predictions, "Label");
// </SnippetEvaluate>

// The Accuracy metric gets the accuracy of a classifier, which is the proportion
// The Accuracy metric gets the accuracy of a model, which is the proportion
// of correct predictions in the test set.

// The Auc metric gets the area under the ROC curve.
// The area under the ROC curve is equal to the probability that the classifier ranks
// a randomly chosen positive instance higher than a randomly chosen negative one
// (assuming 'positive' ranks higher than 'negative').
// The AreaUnderRocCurve metric is an indicator of how confident the model is
// correctly classifying the positive and negative classes as such.

// The F1Score metric gets the classifier's F1 score.
// The F1 score is the harmonic mean of precision and recall:
// The F1Score metric gets the model's F1 score.
// F1 is a measure of tradeoff between precision and recall.
// 2 * precision * recall / (precision + recall).

// <SnippetDisplayMetrics>
Console.WriteLine();
Console.WriteLine("Model quality metrics evaluation");
Console.WriteLine("--------------------------------");
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"Auc: {metrics.Auc:P2}");
Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
Console.WriteLine($" Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"Area Under Roc Curve: {metrics.AreaUnderRocCurve:P2}");
Console.WriteLine($" F1Score: {metrics.F1Score:P2}");
Console.WriteLine("=============== End of model evaluation ===============");
//</SnippetDisplayMetrics>

// Save the new model to .ZIP file
// <SnippetCallSaveModel>
SaveModelAsFile(mlContext, model);
// </SnippetCallSaveModel>
}

private static void UseModelWithSingleItem(MLContext mlContext, ITransformer model)
{
// <SnippetCreatePredictionEngine1>
PredictionEngine<SentimentData, SentimentPrediction> predictionFunction = model.CreatePredictionEngine<SentimentData, SentimentPrediction>(mlContext);
PredictionEngine<SentimentData, SentimentPrediction> predictionFunction = mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(model);
// </SnippetCreatePredictionEngine1>

// <SnippetCreateTestIssue1>
Expand All @@ -172,9 +166,9 @@ private static void UseModelWithSingleItem(MLContext mlContext, ITransformer mod
// </SnippetOutputPrediction>
}

public static void UseLoadedModelWithBatchItems(MLContext mlContext)
public static void UseModelWithBatchItems(MLContext mlContext, ITransformer model)
{
// Adds some comments to test the trained model's predictions.
// Adds some comments to test the trained model's data points.
// <SnippetCreateTestIssues>
IEnumerable<SentimentData> sentiments = new[]
{
Expand All @@ -189,19 +183,11 @@ public static void UseLoadedModelWithBatchItems(MLContext mlContext)
};
// </SnippetCreateTestIssues>

// <SnippetLoadModel>
ITransformer loadedModel;
using (var stream = new FileStream(_modelPath, FileMode.Open, FileAccess.Read, FileShare.Read))
{
loadedModel = mlContext.Model.Load(stream);
}
// </SnippetLoadModel>

// Load test data
// Load batch comments just created
// <SnippetPrediction>
IDataView sentimentStreamingDataView = mlContext.Data.LoadFromEnumerable(sentiments);
IDataView batchComments = mlContext.Data.LoadFromEnumerable(sentiments);

IDataView predictions = loadedModel.Transform(sentimentStreamingDataView);
IDataView predictions = model.Transform(batchComments);

// Use model to predict whether comment data is Positive (1) or Negative (0).
IEnumerable<SentimentPrediction> predictedResults = mlContext.Data.CreateEnumerable<SentimentPrediction>(predictions, reuseRowObject: false);
Expand All @@ -210,7 +196,7 @@ public static void UseLoadedModelWithBatchItems(MLContext mlContext)
// <SnippetAddInfoMessage>
Console.WriteLine();

Console.WriteLine("=============== Prediction Test of loaded model with a multiple samples ===============");
Console.WriteLine("=============== Prediction Test of loaded model with multiple samples ===============");
// </SnippetAddInfoMessage>

Console.WriteLine();
Expand All @@ -227,21 +213,8 @@ public static void UseLoadedModelWithBatchItems(MLContext mlContext)

}
Console.WriteLine("=============== End of predictions ===============");

// </SnippetDisplayResults>
}

// Saves the model we trained to a zip file.

private static void SaveModelAsFile(MLContext mlContext, ITransformer model)
{
// <SnippetSaveModel>
using (var fs = new FileStream(_modelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
mlContext.Model.Save(model, fs);
// </SnippetSaveModel>

Console.WriteLine("The model is saved to {0}", _modelPath);
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML" Version="0.11.0" />
<PackageReference Include="Microsoft.ML" Version="1.0.0-preview" />
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ public class SentimentPrediction
[ColumnName("PredictedLabel")]
public bool Prediction { get; set; }

// [ColumnName("Probability")]
public float Probability { get; set; }

// [ColumnName("Score")]
public float Score { get; set; }
}
// </SnippetDeclareTypes>
Expand Down