33using System . Collections . Generic ;
44using System . IO ;
55using System . Linq ;
6- using System . Threading . Tasks ;
7- using Microsoft . ML . Legacy ;
8- using Microsoft . ML . Legacy . Models ;
9- using Microsoft . ML . Legacy . Data ;
10- using Microsoft . ML . Legacy . Transforms ;
11- using Microsoft . ML . Legacy . Trainers ;
6+ using Microsoft . ML ;
7+ using Microsoft . ML . Core . Data ;
128using Microsoft . ML . Runtime . Api ;
9+ using Microsoft . ML . Runtime . Data ;
10+ using Microsoft . ML . Transforms . Text ;
1311// </Snippet1>
1412
1513namespace SentimentAnalysis
1614{
1715 class Program
1816 {
1917 // <Snippet2>
20- static readonly string _dataPath = Path . Combine ( Environment . CurrentDirectory , "Data" , "wikipedia-detox-250-line-data.tsv" ) ;
18+ static readonly string _trainDataPath = Path . Combine ( Environment . CurrentDirectory , "Data" , "wikipedia-detox-250-line-data.tsv" ) ;
2119 static readonly string _testDataPath = Path . Combine ( Environment . CurrentDirectory , "Data" , "wikipedia-detox-250-line-test.tsv" ) ;
22- static readonly string _modelpath = Path . Combine ( Environment . CurrentDirectory , "Data" , "Model.zip" ) ;
20+ static readonly string _modelPath = Path . Combine ( Environment . CurrentDirectory , "Data" , "Model.zip" ) ;
21+ static TextLoader _textLoader ;
2322 // </Snippet2>
2423
25- static async Task Main ( string [ ] args )
24+ static void Main ( string [ ] args )
2625 {
26+ // Create ML.NET context/local environment - allows you to add steps in order to keep everything together
27+ // during the learning process.
28+ //Create ML Context with seed for repeatable/deterministic results
2729 // <Snippet3>
28- var model = await Train ( ) ;
30+ MLContext mlContext = new MLContext ( seed : 0 ) ;
2931 // </Snippet3>
3032
31- // <Snippet12>
32- Evaluate ( model ) ;
33- // </Snippet12>
34-
35- // <Snippet17>
36- Predict ( model ) ;
37- // </Snippet17>
38- }
33+ // The TextLoader loads a dataset with comments and corresponding postive or negative sentiment.
34+ // When you create a loader, you specify the schema by passing a class to the loader containing
35+ // all the column names and their types. This is used to create the model, and train it.
36+ // Initialize our TextLoader
37+ // <Snippet4>
38+ _textLoader = mlContext . Data . TextReader ( new TextLoader . Arguments ( )
39+ {
40+ Separator = "tab" ,
41+ HasHeader = true ,
42+ Column = new [ ]
43+ {
44+ new TextLoader . Column ( "Label" , DataKind . Bool , 0 ) ,
45+ new TextLoader . Column ( "SentimentText" , DataKind . Text , 1 )
46+ }
47+ }
48+ ) ;
49+ // </Snippet4>
3950
40- public static async Task < PredictionModel < SentimentData , SentimentPrediction > > Train ( )
41- {
42- // LearningPipeline allows you to add steps in order to keep everything together
43- // during the learning process.
4451 // <Snippet5>
45- var pipeline = new LearningPipeline ( ) ;
52+ var model = Train ( mlContext , _trainDataPath ) ;
4653 // </Snippet5>
4754
48- // The TextLoader loads a dataset with comments and corresponding postive or negative sentiment.
49- // When you create a loader, you specify the schema by passing a class to the loader containing
50- // all the column names and their types. This is used to create the model, and train it.
55+ // <Snippet11>
56+ Evaluate ( mlContext , model ) ;
57+ // </Snippet11>
58+
59+ // <Snippet16>
60+ Predict ( mlContext , model ) ;
61+ // </Snippet16>
62+
63+ // <Snippet25>
64+ PredictWithModelLoadedFromFile ( mlContext ) ;
65+ // <Snippet25>
66+
67+ Console . WriteLine ( ) ;
68+ Console . WriteLine ( "=============== End of process ===============" ) ;
69+ }
70+
71+ public static ITransformer Train ( MLContext mlContext , string dataPath )
72+ {
73+ //Note that this case, loading your training data from a file,
74+ //is the easiest way to get started, but ML.NET also allows you
75+ //to load data from databases or in-memory collections.
5176 // <Snippet6>
52- pipeline . Add ( new TextLoader ( _dataPath ) . CreateFrom < SentimentData > ( ) ) ;
77+ IDataView dataView = _textLoader . Read ( dataPath ) ;
5378 // </Snippet6>
5479
55- // TextFeaturizer is a transform that is used to featurize an input column.
56- // This is used to format and clean the data.
80+ // Create a flexible pipeline (composed by a chain of estimators) for creating/training the model.
81+ // This is used to format and clean the data.
82+ // Convert the text column to numeric vectors (Features column)
5783 // <Snippet7>
58- pipeline . Add ( new TextFeaturizer ( "Features ", "SentimentText" ) ) ;
59- //</Snippet7>
84+ var pipeline = mlContext . Transforms . Text . FeaturizeText ( "SentimentText ", "Features" )
85+ //</Snippet7>
6086
61- // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and
62- // three hyperparameters to be used for tuning decision tree performance.
63- // <Snippet8>
64- pipeline . Add ( new FastTreeBinaryClassifier ( ) { NumLeaves = 50 , NumTrees = 50 , MinDocumentsInLeafs = 20 } ) ;
87+ // Adds a FastTreeBinaryClassificationTrainer, the decision tree learner for this project
88+ // <Snippet8>
89+ . Append ( mlContext . BinaryClassification . Trainers . FastTree ( numLeaves : 50 , numTrees : 50 , minDatapointsInLeafs : 20 ) ) ;
6590 // </Snippet8>
6691
67- // Train the pipeline based on the dataset that has been loaded, transformed.
92+ // Create and train the model based on the dataset that has been loaded, transformed.
6893 // <Snippet9>
69- PredictionModel < SentimentData , SentimentPrediction > model =
70- pipeline . Train < SentimentData , SentimentPrediction > ( ) ;
94+ Console . WriteLine ( "=============== Create and Train the Model ===============" ) ;
95+ var model = pipeline . Fit ( dataView ) ;
96+ Console . WriteLine ( "=============== End of training ===============" ) ;
97+ Console . WriteLine ( ) ;
7198 // </Snippet9>
7299
73- // Saves the model we trained to a zip file.
74- // <Snippet10>
75- await model . WriteAsync ( _modelpath ) ;
76- // </Snippet10>
77-
78100 // Returns the model we trained to use for evaluation.
79- // <Snippet11 >
101+ // <Snippet10 >
80102 return model ;
81- // </Snippet11 >
103+ // </Snippet10 >
82104 }
83105
84- public static void Evaluate ( PredictionModel < SentimentData , SentimentPrediction > model )
106+ public static void Evaluate ( MLContext mlContext , ITransformer model )
85107 {
86- // Evaluates.
108+ // Evaluate the model and show accuracy stats
109+ // Load evaluation/test data
110+ // <Snippet12>
111+ IDataView dataView = _textLoader . Read ( _testDataPath ) ;
112+ // </Snippet12>
113+
114+ //Take the data in, make transformations, output the data.
87115 // <Snippet13>
88- var testData = new TextLoader ( _testDataPath ) . CreateFrom < SentimentData > ( ) ;
116+ Console . WriteLine ( "=============== Evaluating Model accuracy with Test data===============" ) ;
117+ var predictions = model . Transform ( dataView ) ;
89118 // </Snippet13>
90119
91- // BinaryClassificationEvaluator computes the quality metrics for the PredictionModel
92- // using the specified data set .
120+ // BinaryClassificationContext.Evaluate returns a BinaryClassificationEvaluator.CalibratedResult
121+ // that contains the computed overall metrics .
93122 // <Snippet14>
94- var evaluator = new BinaryClassificationEvaluator ( ) ;
123+ var metrics = mlContext . BinaryClassification . Evaluate ( predictions , "Label" ) ;
95124 // </Snippet14>
96125
97- // BinaryClassificationMetrics contains the overall metrics computed by binary classification evaluators.
98- // <Snippet15>
99- BinaryClassificationMetrics metrics = evaluator . Evaluate ( model , testData ) ;
100- // </Snippet15>
101-
102126 // The Accuracy metric gets the accuracy of a classifier, which is the proportion
103127 // of correct predictions in the test set.
104128
@@ -111,57 +135,117 @@ public static void Evaluate(PredictionModel<SentimentData, SentimentPrediction>
111135 // The F1 score is the harmonic mean of precision and recall:
112136 // 2 * precision * recall / (precision + recall).
113137
114- // <Snippet16 >
138+ // <Snippet15 >
115139 Console . WriteLine ( ) ;
116- Console . WriteLine ( "PredictionModel quality metrics evaluation" ) ;
117- Console . WriteLine ( "------------------------------------------ " ) ;
140+ Console . WriteLine ( "Model quality metrics evaluation" ) ;
141+ Console . WriteLine ( "--------------------------------" ) ;
118142 Console . WriteLine ( $ "Accuracy: { metrics . Accuracy : P2} ") ;
119143 Console . WriteLine ( $ "Auc: { metrics . Auc : P2} ") ;
120144 Console . WriteLine ( $ "F1Score: { metrics . F1Score : P2} ") ;
121- // </Snippet16>
145+ Console . WriteLine ( "=============== End of model evaluation ===============" ) ;
146+ //</Snippet15>
147+
148+ // Save the new model to .ZIP file
149+ // <Snippet23>
150+ SaveModelAsFile ( mlContext , model ) ;
151+ // </Snippet23>
122152 }
123153
124- public static void Predict ( PredictionModel < SentimentData , SentimentPrediction > model )
154+ private static void Predict ( MLContext mlContext , ITransformer model )
125155 {
126- // Adds some comments to test the trained model's predictions.
156+ // <Snippet17>
157+ var predictionFunction = model . MakePredictionFunction < SentimentData , SentimentPrediction > ( mlContext ) ;
158+ // </Snippet17>
159+
127160 // <Snippet18>
161+ SentimentData sampleStatement = new SentimentData
162+ {
163+ SentimentText = "This is a very rude movie"
164+ } ;
165+ // </Snippet18>
166+
167+ // <Snippet19>
168+ var resultprediction = predictionFunction . Predict ( sampleStatement ) ;
169+ // </Snippet19>
170+ // <Snippet20>
171+ Console . WriteLine ( ) ;
172+ Console . WriteLine ( "=============== Prediction Test of model with a single sample and test dataset ===============" ) ;
173+
174+ Console . WriteLine ( ) ;
175+ Console . WriteLine ( $ "Sentiment: { sampleStatement . SentimentText } | Prediction: { ( Convert . ToBoolean ( resultprediction . Prediction ) ? "Toxic" : "Not Toxic" ) } | Probability: { resultprediction . Probability } ") ;
176+ Console . WriteLine ( "=============== End of Predictions ===============" ) ;
177+ Console . WriteLine ( ) ;
178+ // </Snippet20>
179+ }
180+
181+ public static void PredictWithModelLoadedFromFile ( MLContext mlContext )
182+ {
183+ // Adds some comments to test the trained model's predictions.
184+ // <Snippet26>
128185 IEnumerable < SentimentData > sentiments = new [ ]
129186 {
130187 new SentimentData
131188 {
132- SentimentText = "Please refrain from adding nonsense to Wikipedia. "
189+ SentimentText = "This is a very rude movie "
133190 } ,
134191 new SentimentData
135192 {
136193 SentimentText = "He is the best, and the article should say that."
137194 }
138195 } ;
139- // </Snippet18 >
196+ // </Snippet26 >
140197
141- // Use the model to predict the positive
142- // or negative sentiment of the comment data.
143- // <Snippet19>
144- IEnumerable < SentimentPrediction > predictions = model . Predict ( sentiments ) ;
145- // </Snippet19>
198+ // <Snippet27>
199+ ITransformer loadedModel ;
200+ using ( var stream = new FileStream ( _modelPath , FileMode . Open , FileAccess . Read , FileShare . Read ) )
201+ {
202+ loadedModel = mlContext . Model . Load ( stream ) ;
203+ }
204+ // </Snippet27>
205+
206+ // <Snippet28>
207+ // Create prediction engine
208+ var sentimentStreamingDataView = mlContext . CreateStreamingDataView ( sentiments ) ;
209+ var predictions = loadedModel . Transform ( sentimentStreamingDataView ) ;
210+
211+ // Use the model to predict whether comment data is toxic (1) or nice (0).
212+ var predictedResults = predictions . AsEnumerable < SentimentPrediction > ( mlContext , reuseRowObject : false ) ;
213+ // </Snippet28>
214+
215+ // <Snippet29>
216+ Console . WriteLine ( ) ;
217+
218+ Console . WriteLine ( "=============== Prediction Test of loaded model with a multiple samples ===============" ) ;
219+ // </Snippet29>
146220
147- // <Snippet20>
148221 Console . WriteLine ( ) ;
149- Console . WriteLine ( "Sentiment Predictions" ) ;
150- Console . WriteLine ( "---------------------" ) ;
151- // </Snippet20>
152222
153223 // Builds pairs of (sentiment, prediction)
154- // <Snippet21 >
155- var sentimentsAndPredictions = sentiments . Zip ( predictions , ( sentiment , prediction ) => ( sentiment , prediction ) ) ;
156- // </Snippet21 >
224+ // <Snippet30 >
225+ var sentimentsAndPredictions = sentiments . Zip ( predictedResults , ( sentiment , prediction ) => ( sentiment , prediction ) ) ;
226+ // </Snippet30 >
157227
158- // <Snippet22 >
228+ // <Snippet31 >
159229 foreach ( var item in sentimentsAndPredictions )
160230 {
161- Console . WriteLine ( $ "Sentiment: { item . sentiment . SentimentText } | Prediction: { ( item . prediction . Sentiment ? "Negative " : "Positive " ) } ") ;
231+ Console . WriteLine ( $ "Sentiment: { item . sentiment . SentimentText } | Prediction: { ( Convert . ToBoolean ( item . prediction . Prediction ) ? "Toxic " : "Not Toxic " ) } | Probability: { item . prediction . Probability } ") ;
162232 }
163- Console . WriteLine ( ) ;
164- // </Snippet22>
233+ Console . WriteLine ( "=============== End of predictions ===============" ) ;
234+
235+ // </Snippet31>
236+ }
237+
238+ // Saves the model we trained to a zip file.
239+
240+ private static void SaveModelAsFile ( MLContext mlContext , ITransformer model )
241+ {
242+ // <Snippet24>
243+ using ( var fs = new FileStream ( _modelPath , FileMode . Create , FileAccess . Write , FileShare . Write ) )
244+ mlContext . Model . Save ( model , fs ) ;
245+ // </Snippet24>
246+
247+ Console . WriteLine ( "The model is saved to {0}" , _modelPath ) ;
165248 }
249+
166250 }
167251}
0 commit comments