From f0894615cc0686aa9b13316a8127a9eedb3212d5 Mon Sep 17 00:00:00 2001
From: "jose.cambronero" <jose.cambronero@cloudera.com>
Date: Wed, 12 Aug 2015 17:42:06 -0700
Subject: [PATCH 1/3] added example for python kolmogorovSmirnovTest to docs,
 and cleaned up docs for scala example

---
 docs/mllib-statistics.md | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index be04d0b4b53a8..c037bfdf69490 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -438,22 +438,42 @@ run a 1-sample, 2-sided Kolmogorov-Smirnov test. The following example demonstra
 and interpret the hypothesis tests.
 
 {% highlight scala %}
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.stat.Statistics._
+import org.apache.spark.mllib.stat.Statistics
 
 val data: RDD[Double] = ... // an RDD of sample data
 
 // run a KS test for the sample versus a standard normal distribution
 val testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1)
 println(testResult) // summary of the test including the p-value, test statistic,
-                      // and null hypothesis
-                      // if our p-value indicates significance, we can reject the null hypothesis
+                    // and null hypothesis
+                    // if our p-value indicates significance, we can reject the null hypothesis
 
 // perform a KS test using a cumulative distribution function of our making
 val myCDF: Double => Double = ...
 val testResult2 = Statistics.kolmogorovSmirnovTest(data, myCDF)
 {% endhighlight %}
 </div>
+
+<div data-lang="python" markdown="1">
+[`Statistics`](api/python/index.html#org.apache.spark.mllib.stat.Statistics$) provides methods to
+run a 1-sample, 2-sided Kolmogorov-Smirnov test. The following example demonstrates how to run
+and interpret the hypothesis tests.
+
+{% highlight python %}
+from pyspark.mllib.stat import Statistics
+
+localData = [1.0, 2.0, ... ] #a list of doubles
+parallelData =  sc.parallelize(localData) # an RDD of Double
+
+# run a KS test for the sample versus a standard normal distribution
+testResult = Statistics.kolmogorovSmirnovTest(parallelData, "norm", 0, 1)
+print(testResult) # summary of the test including the p-value, test statistic,
+                  # and null hypothesis
+                  # if our p-value indicates significance, we can reject the null hypothesis
+# Note that the Scala functionality of calling Statistics.kolmogorovSmirnovTest with
+# a lambda to calculate the CDF is not made available in the Python API
+{% endhighlight %}
+</div>
 </div>
 
 

From b914058d8349ca3de31a640ce9b2fbb9fc11b94c Mon Sep 17 00:00:00 2001
From: "jose.cambronero" <jose.cambronero@cloudera.com>
Date: Fri, 14 Aug 2015 15:07:00 -0700
Subject: [PATCH 2/3] fixed python docs link, incorporated PR feedback, and
 added Java example

---
 docs/mllib-statistics.md | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index c037bfdf69490..6b1d5c29e8493 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -454,16 +454,37 @@ val testResult2 = Statistics.kolmogorovSmirnovTest(data, myCDF)
 {% endhighlight %}
 </div>
 
+<div data-lang="java" markdown="1">
+[`Statistics`](api/java/org/apache/spark/mllib/stat/Statistics.html) provides methods to
+run a 1-sample, 2-sided Kolmogorov-Smirnov test. The following example demonstrates how to run
+and interpret the hypothesis tests.
+
+{% highlight java %}
+import com.google.common.collect.Lists;
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.stat.Statistics;
+import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult;
+
+JavaSparkContext jsc = ...
+JavaDoubleRDD data = jsc.parallelizeDoubles(Lists.newArrayList(0.2, 1.0, ...));
+KolmogorovSmirnovTestResult testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
+// summary of the test including the p-value, test statistic,
+// and null hypothesis
+// if our p-value indicates significance, we can reject the null hypothesis
+System.out.println(testResult1);
+{% endhighlight %}
+</div>
+
 <div data-lang="python" markdown="1">
-[`Statistics`](api/python/index.html#org.apache.spark.mllib.stat.Statistics$) provides methods to
+[`Statistics`](api/python/index.html#pyspark.mllib.stat.Statistics$) provides methods to
 run a 1-sample, 2-sided Kolmogorov-Smirnov test. The following example demonstrates how to run
 and interpret the hypothesis tests.
 
 {% highlight python %}
 from pyspark.mllib.stat import Statistics
 
-localData = [1.0, 2.0, ... ] #a list of doubles
-parallelData =  sc.parallelize(localData) # an RDD of Double
+parallelData = sc.parallelize([1.0, 2.0, ... ])
 
 # run a KS test for the sample versus a standard normal distribution
 testResult = Statistics.kolmogorovSmirnovTest(parallelData, "norm", 0, 1)

From 77637129b146b647db6e09583f26664e9461cc79 Mon Sep 17 00:00:00 2001
From: "jose.cambronero" <jose.cambronero@cloudera.com>
Date: Mon, 17 Aug 2015 18:15:29 -0700
Subject: [PATCH 3/3] incorporated new PR feedback: changed import in example
 to java core, fixed python link. Made 1 additional fix from a typo

---
 docs/mllib-statistics.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index 6b1d5c29e8493..a8a9be60bd340 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -460,24 +460,26 @@ run a 1-sample, 2-sided Kolmogorov-Smirnov test. The following example demonstra
 and interpret the hypothesis tests.
 
 {% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
+
 import org.apache.spark.api.java.JavaDoubleRDD;
 import org.apache.spark.api.java.JavaSparkContext;
+
 import org.apache.spark.mllib.stat.Statistics;
 import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult;
 
 JavaSparkContext jsc = ...
-JavaDoubleRDD data = jsc.parallelizeDoubles(Lists.newArrayList(0.2, 1.0, ...));
+JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.2, 1.0, ...));
 KolmogorovSmirnovTestResult testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
 // summary of the test including the p-value, test statistic,
 // and null hypothesis
 // if our p-value indicates significance, we can reject the null hypothesis
-System.out.println(testResult1);
+System.out.println(testResult);
 {% endhighlight %}
 </div>
 
 <div data-lang="python" markdown="1">
-[`Statistics`](api/python/index.html#pyspark.mllib.stat.Statistics$) provides methods to
+[`Statistics`](api/python/pyspark.mllib.html#pyspark.mllib.stat.Statistics) provides methods to
 run a 1-sample, 2-sided Kolmogorov-Smirnov test. The following example demonstrates how to run
 and interpret the hypothesis tests.