From c40a18cdb2521ed63341bc2b5569901e49547507 Mon Sep 17 00:00:00 2001
From: movelikeriver <mars.lenjoy@gmail.com>
Date: Mon, 8 Feb 2016 15:54:47 -0800
Subject: [PATCH 1/2] Refine naive Bayes example by checking model after
 loading it

---
 .../main/python/mllib/naive_bayes_example.py  | 26 ++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py
index f5e120c678fc..1f17094b2770 100644
--- a/examples/src/main/python/mllib/naive_bayes_example.py
+++ b/examples/src/main/python/mllib/naive_bayes_example.py
@@ -17,9 +17,16 @@
 
 """
 NaiveBayes Example.
+
+Usage:
+  `spark-submit --master local[4] examples/src/main/python/mllib/naive_bayes_example.py`
 """
+
 from __future__ import print_function
 
+from os import path
+import shutil
+
 from pyspark import SparkContext
 # $example on$
 from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
@@ -38,8 +45,12 @@ def parseLine(line):
 
     sc = SparkContext(appName="PythonNaiveBayesExample")
 
+    WORK_DIR = './'
+
     # $example on$
-    data = sc.textFile('data/mllib/sample_naive_bayes_data.txt').map(parseLine)
+    data = sc.textFile(path.join(WORK_DIR,
+                                 'data/mllib/sample_naive_bayes_data.txt')
+                       ).map(parseLine)
 
     # Split data aproximately into training (60%) and test (40%)
     training, test = data.randomSplit([0.6, 0.4], seed=0)
@@ -50,8 +61,17 @@ def parseLine(line):
     # Make prediction and test accuracy.
     predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
     accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
+    print('\n\tmodel accuracy %.4f\n' % accuracy)
 
     # Save and load model
-    model.save(sc, "target/tmp/myNaiveBayesModel")
-    sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel")
+    output_dir = '/tmp/myNaiveBayesModel'
+    shutil.rmtree(output_dir, ignore_errors=True)
+    model.save(sc, output_dir)
+    print('\n\tSaved to path %s\n' % output_dir)
+    sameModel = NaiveBayesModel.load(sc, output_dir)
+    print('\n\tLoaded from path %s\n' % output_dir)
+    predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
+    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
+    print('\n\tsameModel accuracy %.4f\n' % accuracy)
+
     # $example off$

From 7e3ea32fdd51f2e5a631602b23576b6330d9f112 Mon Sep 17 00:00:00 2001
From: movelikeriver <mars.lenjoy@gmail.com>
Date: Thu, 11 Feb 2016 17:47:17 -0800
Subject: [PATCH 2/2] fix comments

---
 .../src/main/python/mllib/naive_bayes_example.py  | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py
index 1f17094b2770..e7d5893d6741 100644
--- a/examples/src/main/python/mllib/naive_bayes_example.py
+++ b/examples/src/main/python/mllib/naive_bayes_example.py
@@ -24,7 +24,6 @@
 
 from __future__ import print_function
 
-from os import path
 import shutil
 
 from pyspark import SparkContext
@@ -45,12 +44,8 @@ def parseLine(line):
 
     sc = SparkContext(appName="PythonNaiveBayesExample")
 
-    WORK_DIR = './'
-
     # $example on$
-    data = sc.textFile(path.join(WORK_DIR,
-                                 'data/mllib/sample_naive_bayes_data.txt')
-                       ).map(parseLine)
+    data = sc.textFile('data/mllib/sample_naive_bayes_data.txt').map(parseLine)
 
     # Split data aproximately into training (60%) and test (40%)
     training, test = data.randomSplit([0.6, 0.4], seed=0)
@@ -61,17 +56,15 @@ def parseLine(line):
     # Make prediction and test accuracy.
     predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
     accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
-    print('\n\tmodel accuracy %.4f\n' % accuracy)
+    print('model accuracy {}'.format(accuracy))
 
     # Save and load model
-    output_dir = '/tmp/myNaiveBayesModel'
+    output_dir = 'target/tmp/myNaiveBayesModel'
     shutil.rmtree(output_dir, ignore_errors=True)
     model.save(sc, output_dir)
-    print('\n\tSaved to path %s\n' % output_dir)
     sameModel = NaiveBayesModel.load(sc, output_dir)
-    print('\n\tLoaded from path %s\n' % output_dir)
     predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
     accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
-    print('\n\tsameModel accuracy %.4f\n' % accuracy)
+    print('sameModel accuracy {}'.format(accuracy))
 
     # $example off$