From ab6e4f1651ec09e576b8dcf8a611c9f2ea2169a5 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Thu, 25 May 2017 23:48:23 -0700
Subject: [PATCH 01/11] consolidated doc change for SQL aggregate functions

---
 R/pkg/R/functions.R | 398 +++++++++++++++++---------------------------
 R/pkg/R/generics.R  |  60 ++++---
 2 files changed, 194 insertions(+), 264 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 06a90192bb12..a9a341c22715 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -18,6 +18,21 @@
 #' @include generics.R column.R
 NULL
 
+#' Aggregate functions for Column operations
+#'
+#' Aggregate functions defined for \code{Column}.
+#'
+#' @param x Column to compute on.
+#' @param ... additional argument(s).
+#' @name column_aggregate_functions
+#' @rdname column_aggregate_functions
+#' @family aggregate functions
+#' @examples
+#' \dontrun{
+#' # Dataframe used throughout this doc
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))}
+NULL
+
 #' lit
 #'
 #' A new \linkS4class{Column} is created to represent the literal value.
@@ -85,17 +100,20 @@ setMethod("acos",
             column(jc)
           })
 
-#' Returns the approximate number of distinct items in a group
+#' @section Details:
+#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
 #'
-#' Returns the approximate number of distinct items in a group. This is a column
-#' aggregate function.
-#'
-#' @rdname approxCountDistinct
-#' @name approxCountDistinct
-#' @return the approximate number of distinct items in a group.
+#' @rdname column_aggregate_functions
 #' @export
-#' @aliases approxCountDistinct,Column-method
-#' @examples \dontrun{approxCountDistinct(df$c)}
+#' @aliases approxCountDistinct approxCountDistinct,Column-method
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, approxCountDistinct(df$gear)))
+#' head(select(df, approxCountDistinct(df$gear, 0.02)))
+#' head(select(df, countDistinct(df$gear)))
+#' head(select(df, n_distinct(df$gear)))
+#' head(distinct(select(df, "gear")))}
 #' @note approxCountDistinct(Column) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
@@ -166,16 +184,24 @@ setMethod("atan",
             column(jc)
           })
 
-#' avg
-#'
-#' Aggregate function: returns the average of the values in a group.
+#' @section Details:
+#' \code{avg}: Returns the average of the values in a group.
 #'
-#' @rdname avg
-#' @name avg
-#' @family aggregate functions
+#' @rdname column_aggregate_functions
 #' @export
-#' @aliases avg,Column-method
-#' @examples \dontrun{avg(df$c)}
+#' @aliases avg avg,Column-method
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, avg(df$mpg), mean(df$mpg), min(df$wt), max(df$qsec)))
+#'
+#' # metrics by num of cylinders
+#' tmp <- agg(groupBy(df, "cyl"), avg(df$mpg), avg(df$hp), avg(df$wt), avg(df$qsec))
+#' head(orderBy(tmp, "cyl"))
+#'
+#' # car with the max mpg
+#' mpg_max <- as.numeric(collect(agg(df, max(df$mpg))))
+#' head(where(df, df$mpg == mpg_max))}
 #' @note avg since 1.4.0
 setMethod("avg",
           signature(x = "Column"),
@@ -823,18 +849,16 @@ setMethod("isnan",
             column(jc)
           })
 
-#' kurtosis
+#' @section Details:
+#' \code{kurtosis}: Returns the kurtosis of the values in a group.
 #'
-#' Aggregate function: returns the kurtosis of the values in a group.
-#'
-#' @param x Column to compute on.
-#'
-#' @rdname kurtosis
-#' @name kurtosis
-#' @aliases kurtosis,Column-method
-#' @family aggregate functions
+#' @rdname column_aggregate_functions
+#' @aliases kurtosis kurtosis,Column-method
 #' @export
-#' @examples \dontrun{kurtosis(df$c)}
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, mean(df$mpg), sd(df$mpg), skewness(df$mpg), kurtosis(df$mpg)))}
 #' @note kurtosis since 1.6.0
 setMethod("kurtosis",
           signature(x = "Column"),
@@ -1040,18 +1064,11 @@ setMethod("ltrim",
             column(jc)
           })
 
-#' max
-#'
-#' Aggregate function: returns the maximum value of the expression in a group.
+#' @section Details:
+#' \code{max}: Returns the maximum value of the expression in a group.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname max
-#' @name max
-#' @family aggregate functions
-#' @aliases max,Column-method
-#' @export
-#' @examples \dontrun{max(df$c)}
+#' @rdname column_aggregate_functions
+#' @aliases max max,Column-method
 #' @note max since 1.5.0
 setMethod("max",
           signature(x = "Column"),
@@ -1081,19 +1098,12 @@ setMethod("md5",
             column(jc)
           })
 
-#' mean
-#'
-#' Aggregate function: returns the average of the values in a group.
-#' Alias for avg.
-#'
-#' @param x Column to compute on.
+#' @section Details:
+#' \code{mean}: Returns the average of the values in a group. Alias for avg.
 #'
-#' @rdname mean
-#' @name mean
-#' @family aggregate functions
-#' @aliases mean,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases mean mean,Column-method
 #' @export
-#' @examples \dontrun{mean(df$c)}
 #' @note mean since 1.5.0
 setMethod("mean",
           signature(x = "Column"),
@@ -1102,18 +1112,12 @@ setMethod("mean",
             column(jc)
           })
 
-#' min
-#'
-#' Aggregate function: returns the minimum value of the expression in a group.
-#'
-#' @param x Column to compute on.
+#' @section Details:
+#' \code{min}: Returns the minimum value of the expression in a group.
 #'
-#' @rdname min
-#' @name min
-#' @aliases min,Column-method
-#' @family aggregate functions
+#' @rdname column_aggregate_functions
+#' @aliases min min,Column-method
 #' @export
-#' @examples \dontrun{min(df$c)}
 #' @note min since 1.5.0
 setMethod("min",
           signature(x = "Column"),
@@ -1338,24 +1342,17 @@ setMethod("rtrim",
             column(jc)
           })
 
-#' sd
-#'
-#' Aggregate function: alias for \link{stddev_samp}
+
+#' @section Details:
+#' \code{sd}: Alias for \code{stddev_samp}.
 #'
-#' @param x Column to compute on.
-#' @param na.rm currently not used.
-#' @rdname sd
-#' @name sd
-#' @family aggregate functions
-#' @aliases sd,Column-method
-#' @seealso \link{stddev_pop}, \link{stddev_samp}
+#' @rdname column_aggregate_functions
+#' @aliases sd sd,Column-method
 #' @export
 #' @examples
-#'\dontrun{
-#'stddev(df$c)
-#'select(df, stddev(df$age))
-#'agg(df, sd(df$age))
-#'}
+#'
+#' \dontrun{
+#' head(select(df, sd(df$mpg), stddev(df$mpg), stddev_pop(df$wt), stddev_samp(df$qsec)))}
 #' @note sd since 1.6.0
 setMethod("sd",
           signature(x = "Column"),
@@ -1465,18 +1462,12 @@ setMethod("sinh",
             column(jc)
           })
 
-#' skewness
-#'
-#' Aggregate function: returns the skewness of the values in a group.
-#'
-#' @param x Column to compute on.
+#' @section Details:
+#' \code{skewness}: Returns the skewness of the values in a group.
 #'
-#' @rdname skewness
-#' @name skewness
-#' @family aggregate functions
-#' @aliases skewness,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases skewness skewness,Column-method
 #' @export
-#' @examples \dontrun{skewness(df$c)}
 #' @note skewness since 1.6.0
 setMethod("skewness",
           signature(x = "Column"),
@@ -1527,9 +1518,11 @@ setMethod("spark_partition_id",
             column(jc)
           })
 
-#' @rdname sd
-#' @aliases stddev,Column-method
-#' @name stddev
+#' @section Details:
+#' \code{stddev}: Alias for \code{std_dev}.
+#'
+#' @rdname column_aggregate_functions
+#' @aliases stddev stddev,Column-method
 #' @note stddev since 1.6.0
 setMethod("stddev",
           signature(x = "Column"),
@@ -1538,19 +1531,12 @@ setMethod("stddev",
             column(jc)
           })
 
-#' stddev_pop
+#' @section Details:
+#' \code{stddev_pop}: Returns the population standard deviation of the expression in a group.
 #'
-#' Aggregate function: returns the population standard deviation of the expression in a group.
-#'
-#' @param x Column to compute on.
-#'
-#' @rdname stddev_pop
-#' @name stddev_pop
-#' @family aggregate functions
-#' @aliases stddev_pop,Column-method
-#' @seealso \link{sd}, \link{stddev_samp}
+#' @rdname column_aggregate_functions
+#' @aliases stddev_pop stddev_pop,Column-method
 #' @export
-#' @examples \dontrun{stddev_pop(df$c)}
 #' @note stddev_pop since 1.6.0
 setMethod("stddev_pop",
           signature(x = "Column"),
@@ -1559,19 +1545,12 @@ setMethod("stddev_pop",
             column(jc)
           })
 
-#' stddev_samp
+#' @section Details:
+#' \code{stddev_samp}: Returns the unbiased sample standard deviation of the expression in a group.
 #'
-#' Aggregate function: returns the unbiased sample standard deviation of the expression in a group.
-#'
-#' @param x Column to compute on.
-#'
-#' @rdname stddev_samp
-#' @name stddev_samp
-#' @family aggregate functions
-#' @aliases stddev_samp,Column-method
-#' @seealso \link{stddev_pop}, \link{sd}
+#' @rdname column_aggregate_functions
+#' @aliases stddev_samp stddev_samp,Column-method
 #' @export
-#' @examples \dontrun{stddev_samp(df$c)}
 #' @note stddev_samp since 1.6.0
 setMethod("stddev_samp",
           signature(x = "Column"),
@@ -1630,18 +1609,12 @@ setMethod("sqrt",
             column(jc)
           })
 
-#' sum
-#'
-#' Aggregate function: returns the sum of all values in the expression.
+#' @section Details:
+#' \code{sum}: Returns the sum of all values in the expression.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname sum
-#' @name sum
-#' @family aggregate functions
-#' @aliases sum,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases sum sum,Column-method
 #' @export
-#' @examples \dontrun{sum(df$c)}
 #' @note sum since 1.5.0
 setMethod("sum",
           signature(x = "Column"),
@@ -1650,18 +1623,17 @@ setMethod("sum",
             column(jc)
           })
 
-#' sumDistinct
-#'
-#' Aggregate function: returns the sum of distinct values in the expression.
+#' @section Details:
+#' \code{sumDistinct}: Returns the sum of distinct values in the expression.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname sumDistinct
-#' @name sumDistinct
-#' @family aggregate functions
-#' @aliases sumDistinct,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases sumDistinct sumDistinct,Column-method
 #' @export
-#' @examples \dontrun{sumDistinct(df$c)}
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, sumDistinct(df$gear)))
+#' head(distinct(select(df, "gear")))}
 #' @note sumDistinct since 1.4.0
 setMethod("sumDistinct",
           signature(x = "Column"),
@@ -1952,24 +1924,16 @@ setMethod("upper",
             column(jc)
           })
 
-#' var
+#' @section Details:
+#' \code{var}: Alias for \code{var_samp}.
 #'
-#' Aggregate function: alias for \link{var_samp}.
-#'
-#' @param x a Column to compute on.
-#' @param y,na.rm,use currently not used.
-#' @rdname var
-#' @name var
-#' @family aggregate functions
-#' @aliases var,Column-method
-#' @seealso \link{var_pop}, \link{var_samp}
+#' @rdname column_aggregate_functions
+#' @aliases var var,Column-method
 #' @export
 #' @examples
+#'
 #'\dontrun{
-#'variance(df$c)
-#'select(df, var_pop(df$age))
-#'agg(df, var(df$age))
-#'}
+#'head(agg(df, var(df$mpg), variance(df$mpg), var_pop(df$mpg), var_samp(df$mpg)))}
 #' @note var since 1.6.0
 setMethod("var",
           signature(x = "Column"),
@@ -1978,9 +1942,9 @@ setMethod("var",
             var_samp(x)
           })
 
-#' @rdname var
-#' @aliases variance,Column-method
-#' @name variance
+#' @rdname column_aggregate_functions
+#' @aliases variance variance,Column-method
+#' @export
 #' @note variance since 1.6.0
 setMethod("variance",
           signature(x = "Column"),
@@ -1989,19 +1953,12 @@ setMethod("variance",
             column(jc)
           })
 
-#' var_pop
-#'
-#' Aggregate function: returns the population variance of the values in a group.
+#' @section Details:
+#' \code{var_pop}: Returns the population variance of the values in a group.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname var_pop
-#' @name var_pop
-#' @family aggregate functions
-#' @aliases var_pop,Column-method
-#' @seealso \link{var}, \link{var_samp}
+#' @rdname column_aggregate_functions
+#' @aliases var_pop var_pop,Column-method
 #' @export
-#' @examples \dontrun{var_pop(df$c)}
 #' @note var_pop since 1.5.0
 setMethod("var_pop",
           signature(x = "Column"),
@@ -2010,19 +1967,12 @@ setMethod("var_pop",
             column(jc)
           })
 
-#' var_samp
-#'
-#' Aggregate function: returns the unbiased variance of the values in a group.
+#' @section Details:
+#' \code{var_samp}: Returns the unbiased variance of the values in a group.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname var_samp
-#' @name var_samp
-#' @aliases var_samp,Column-method
-#' @family aggregate functions
-#' @seealso \link{var_pop}, \link{var}
+#' @rdname column_aggregate_functions
+#' @aliases var_samp var_samp,Column-method
 #' @export
-#' @examples \dontrun{var_samp(df$c)}
 #' @note var_samp since 1.6.0
 setMethod("var_samp",
           signature(x = "Column"),
@@ -2235,17 +2185,11 @@ setMethod("pmod", signature(y = "Column"),
             column(jc)
           })
 
-
-#' @rdname approxCountDistinct
-#' @name approxCountDistinct
-#'
-#' @param x Column to compute on.
 #' @param rsd maximum estimation error allowed (default = 0.05)
-#' @param ... further arguments to be passed to or from other methods.
 #'
+#' @rdname column_aggregate_functions
 #' @aliases approxCountDistinct,Column-method
 #' @export
-#' @examples \dontrun{approxCountDistinct(df$c, 0.02)}
 #' @note approxCountDistinct(Column, numeric) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
@@ -2254,18 +2198,12 @@ setMethod("approxCountDistinct",
             column(jc)
           })
 
-#' Count Distinct Values
+#' @section Details:
+#' \code{countDistinct}: Returns the number of distinct items in a group.
 #'
-#' @param x Column to compute on
-#' @param ... other columns
-#'
-#' @family aggregate functions
-#' @rdname countDistinct
-#' @name countDistinct
-#' @aliases countDistinct,Column-method
-#' @return the number of distinct items in a group.
+#' @rdname column_aggregate_functions
+#' @aliases countDistinct countDistinct,Column-method
 #' @export
-#' @examples \dontrun{countDistinct(df$c)}
 #' @note countDistinct since 1.4.0
 setMethod("countDistinct",
           signature(x = "Column"),
@@ -2384,15 +2322,12 @@ setMethod("sign", signature(x = "Column"),
             signum(x)
           })
 
-#' n_distinct
+#' @section Details:
+#' \code{n_distinct}: Returns the number of distinct items in a group.
 #'
-#' Aggregate function: returns the number of distinct items in a group.
-#'
-#' @rdname countDistinct
-#' @name n_distinct
-#' @aliases n_distinct,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases n_distinct n_distinct,Column-method
 #' @export
-#' @examples \dontrun{n_distinct(df$c)}
 #' @note n_distinct since 1.4.0
 setMethod("n_distinct", signature(x = "Column"),
           function(x, ...) {
@@ -3717,18 +3652,18 @@ setMethod("create_map",
             column(jc)
           })
 
-#' collect_list
-#'
-#' Creates a list of objects with duplicates.
-#'
-#' @param x Column to compute on
+#' @section Details:
+#' \code{collect_list}: Creates a list of objects with duplicates.
 #'
-#' @rdname collect_list
-#' @name collect_list
-#' @family aggregate functions
-#' @aliases collect_list,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases collect_list collect_list,Column-method
 #' @export
-#' @examples \dontrun{collect_list(df$x)}
+#' @examples
+#'
+#' \dontrun{
+#' df2 = df[df$mpg > 20, ]
+#' collect(select(df2, collect_list(df2$gear)))
+#' collect(select(df2, collect_set(df2$gear)))}
 #' @note collect_list since 2.3.0
 setMethod("collect_list",
           signature(x = "Column"),
@@ -3737,18 +3672,12 @@ setMethod("collect_list",
             column(jc)
           })
 
-#' collect_set
+#' @section Details:
+#' \code{collect_set}: Creates a list of objects with duplicate elements eliminated.
 #'
-#' Creates a list of objects with duplicate elements eliminated.
-#'
-#' @param x Column to compute on
-#'
-#' @rdname collect_set
-#' @name collect_set
-#' @family aggregate functions
-#' @aliases collect_set,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases collect_set collect_set,Column-method
 #' @export
-#' @examples \dontrun{collect_set(df$x)}
 #' @note collect_set since 2.3.0
 setMethod("collect_set",
           signature(x = "Column"),
@@ -3908,24 +3837,17 @@ setMethod("not",
             column(jc)
           })
 
-#' grouping_bit
-#'
-#' Indicates whether a specified column in a GROUP BY list is aggregated or not,
-#' returns 1 for aggregated or 0 for not aggregated in the result set.
-#'
-#' Same as \code{GROUPING} in SQL and \code{grouping} function in Scala.
-#'
-#' @param x Column to compute on
+#' @section Details:
+#' \code{grouping_bit}: Indicates whether a specified column in a GROUP BY list is aggregated or not,
+#' returns 1 for aggregated or 0 for not aggregated in the result set. Same as \code{GROUPING} in SQL
+#' and \code{grouping} function in Scala.
 #'
-#' @rdname grouping_bit
-#' @name grouping_bit
-#' @family aggregate functions
-#' @aliases grouping_bit,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases grouping_bit grouping_bit,Column-method
 #' @export
 #' @examples
-#' \dontrun{
-#' df <- createDataFrame(mtcars)
 #'
+#' \dontrun{
 #' # With cube
 #' agg(
 #'   cube(df, "cyl", "gear", "am"),
@@ -3938,8 +3860,7 @@ setMethod("not",
 #'   rollup(df, "cyl", "gear", "am"),
 #'   mean(df$mpg),
 #'   grouping_bit(df$cyl), grouping_bit(df$gear), grouping_bit(df$am)
-#' )
-#' }
+#' )}
 #' @note grouping_bit since 2.3.0
 setMethod("grouping_bit",
           signature(x = "Column"),
@@ -3948,26 +3869,18 @@ setMethod("grouping_bit",
             column(jc)
           })
 
-#' grouping_id
-#'
-#' Returns the level of grouping.
-#'
+#' @section Details:
+#' \code{grouping_id}: Returns the level of grouping.
 #' Equals to \code{
 #' grouping_bit(c1) * 2^(n - 1) + grouping_bit(c2) * 2^(n - 2)  + ... + grouping_bit(cn)
 #' }
 #'
-#' @param x Column to compute on
-#' @param ... additional Column(s) (optional).
-#'
-#' @rdname grouping_id
-#' @name grouping_id
-#' @family aggregate functions
-#' @aliases grouping_id,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases grouping_id grouping_id,Column-method
 #' @export
 #' @examples
-#' \dontrun{
-#' df <- createDataFrame(mtcars)
 #'
+#' \dontrun{
 #' # With cube
 #' agg(
 #'   cube(df, "cyl", "gear", "am"),
@@ -3980,8 +3893,7 @@ setMethod("grouping_bit",
 #'   rollup(df, "cyl", "gear", "am"),
 #'   mean(df$mpg),
 #'   grouping_id(df$cyl, df$gear, df$am)
-#' )
-#' }
+#' )}
 #' @note grouping_id since 2.3.0
 setMethod("grouping_id",
           signature(x = "Column"),
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 514ca99d45cd..d8950d4c2d27 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -907,8 +907,9 @@ setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy"
 #' @export
 setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
 
-#' @rdname approxCountDistinct
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
 
 #' @rdname array_contains
@@ -919,10 +920,9 @@ setGeneric("array_contains", function(x, value) { standardGeneric("array_contain
 #' @export
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
 
-#' @param x Column to compute on or a GroupedData object.
-#' @param ... additional argument(s) when \code{x} is a GroupedData object.
-#' @rdname avg
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("avg", function(x, ...) { standardGeneric("avg") })
 
 #' @rdname base64
@@ -949,12 +949,14 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
 #' @export
 setGeneric("ceil", function(x) { standardGeneric("ceil") })
 
-#' @rdname collect_list
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("collect_list", function(x) { standardGeneric("collect_list") })
 
-#' @rdname collect_set
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
 
 #' @rdname column
@@ -973,8 +975,9 @@ setGeneric("concat_ws", function(sep, x, ...) { standardGeneric("concat_ws") })
 #' @export
 setGeneric("conv", function(x, fromBase, toBase) { standardGeneric("conv") })
 
-#' @rdname countDistinct
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct") })
 
 #' @rdname crc32
@@ -1071,12 +1074,14 @@ setGeneric("from_unixtime", function(x, ...) { standardGeneric("from_unixtime")
 #' @export
 setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
 
-#' @rdname grouping_bit
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("grouping_bit", function(x) { standardGeneric("grouping_bit") })
 
-#' @rdname grouping_id
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("grouping_id", function(x, ...) { standardGeneric("grouping_id") })
 
 #' @rdname hex
@@ -1109,8 +1114,9 @@ setGeneric("instr", function(y, x) { standardGeneric("instr") })
 #' @export
 setGeneric("isnan", function(x) { standardGeneric("isnan") })
 
-#' @rdname kurtosis
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("kurtosis", function(x) { standardGeneric("kurtosis") })
 
 #' @rdname lag
@@ -1203,8 +1209,9 @@ setGeneric("next_day", function(y, x) { standardGeneric("next_day") })
 #' @export
 setGeneric("ntile", function(x) { standardGeneric("ntile") })
 
-#' @rdname countDistinct
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
 
 #' @param x empty. Should be used with no argument.
@@ -1274,8 +1281,9 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
 #' @export
 setGeneric("rtrim", function(x) { standardGeneric("rtrim") })
 
-#' @rdname sd
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("sd", function(x, na.rm = FALSE) { standardGeneric("sd") })
 
 #' @rdname second
@@ -1310,8 +1318,9 @@ setGeneric("signum", function(x) { standardGeneric("signum") })
 #' @export
 setGeneric("size", function(x) { standardGeneric("size") })
 
-#' @rdname skewness
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("skewness", function(x) { standardGeneric("skewness") })
 
 #' @rdname sort_array
@@ -1331,16 +1340,19 @@ setGeneric("soundex", function(x) { standardGeneric("soundex") })
 #' @export
 setGeneric("spark_partition_id", function(x = "missing") { standardGeneric("spark_partition_id") })
 
-#' @rdname sd
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("stddev", function(x) { standardGeneric("stddev") })
 
-#' @rdname stddev_pop
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("stddev_pop", function(x) { standardGeneric("stddev_pop") })
 
-#' @rdname stddev_samp
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("stddev_samp", function(x) { standardGeneric("stddev_samp") })
 
 #' @rdname struct
@@ -1351,8 +1363,9 @@ setGeneric("struct", function(x, ...) { standardGeneric("struct") })
 #' @export
 setGeneric("substring_index", function(x, delim, count) { standardGeneric("substring_index") })
 
-#' @rdname sumDistinct
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 
 #' @rdname toDegrees
@@ -1403,20 +1416,25 @@ setGeneric("unix_timestamp", function(x, format) { standardGeneric("unix_timesta
 #' @export
 setGeneric("upper", function(x) { standardGeneric("upper") })
 
-#' @rdname var
+#' @rdname column_aggregate_functions
+#' @param y,na.rm,use currently not used.
 #' @export
+#' @name NULL
 setGeneric("var", function(x, y = NULL, na.rm = FALSE, use) { standardGeneric("var") })
 
-#' @rdname var
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("variance", function(x) { standardGeneric("variance") })
 
-#' @rdname var_pop
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("var_pop", function(x) { standardGeneric("var_pop") })
 
-#' @rdname var_samp
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("var_samp", function(x) { standardGeneric("var_samp") })
 
 #' @rdname weekofyear

From 0044b29853c949b0baac7c70ed35658ed6005943 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Wed, 31 May 2017 23:45:27 -0700
Subject: [PATCH 02/11] address comments

---
 R/pkg/R/functions.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a9a341c22715..3dc76a56a420 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -193,7 +193,7 @@ setMethod("atan",
 #' @examples
 #'
 #' \dontrun{
-#' head(select(df, avg(df$mpg), mean(df$mpg), min(df$wt), max(df$qsec)))
+#' head(select(df, avg(df$mpg), mean(df$mpg), sum(df$mpg), min(df$wt), max(df$qsec)))
 #'
 #' # metrics by num of cylinders
 #' tmp <- agg(groupBy(df, "cyl"), avg(df$mpg), avg(df$hp), avg(df$wt), avg(df$qsec))
@@ -1099,7 +1099,7 @@ setMethod("md5",
           })
 
 #' @section Details:
-#' \code{mean}: Returns the average of the values in a group. Alias for avg.
+#' \code{mean}: Returns the average of the values in a group. Alias for \code{avg}.
 #'
 #' @rdname column_aggregate_functions
 #' @aliases mean mean,Column-method

From 014b9f3069a6e2075cb8be307c5d74081dabe15a Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Thu, 15 Jun 2017 20:19:31 -0700
Subject: [PATCH 03/11] address comments

---
 R/pkg/R/functions.R | 49 +++++++++++++++++++++++----------------------
 R/pkg/R/generics.R  |  1 -
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 3dc76a56a420..f6bec04c5757 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -23,7 +23,8 @@ NULL
 #' Aggregate functions defined for \code{Column}.
 #'
 #' @param x Column to compute on.
-#' @param ... additional argument(s).
+#' @param y,na.rm,use currently not used.
+#' @param ... additional argument(s). For example, it could be used to pass additional Columns.
 #' @name column_aggregate_functions
 #' @rdname column_aggregate_functions
 #' @family aggregate functions
@@ -100,7 +101,7 @@ setMethod("acos",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -111,7 +112,7 @@ setMethod("acos",
 #' \dontrun{
 #' head(select(df, approxCountDistinct(df$gear)))
 #' head(select(df, approxCountDistinct(df$gear, 0.02)))
-#' head(select(df, countDistinct(df$gear)))
+#' head(select(df, countDistinct(df$gear, df$cyl)))
 #' head(select(df, n_distinct(df$gear)))
 #' head(distinct(select(df, "gear")))}
 #' @note approxCountDistinct(Column) since 1.4.0
@@ -184,7 +185,7 @@ setMethod("atan",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{avg}: Returns the average of the values in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -849,7 +850,7 @@ setMethod("isnan",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{kurtosis}: Returns the kurtosis of the values in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -1064,7 +1065,7 @@ setMethod("ltrim",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{max}: Returns the maximum value of the expression in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -1098,7 +1099,7 @@ setMethod("md5",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{mean}: Returns the average of the values in a group. Alias for \code{avg}.
 #'
 #' @rdname column_aggregate_functions
@@ -1112,7 +1113,7 @@ setMethod("mean",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{min}: Returns the minimum value of the expression in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -1343,7 +1344,7 @@ setMethod("rtrim",
           })
 
 
-#' @section Details:
+#' @details
 #' \code{sd}: Alias for \code{stddev_samp}.
 #'
 #' @rdname column_aggregate_functions
@@ -1462,7 +1463,7 @@ setMethod("sinh",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{skewness}: Returns the skewness of the values in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -1518,7 +1519,7 @@ setMethod("spark_partition_id",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{stddev}: Alias for \code{std_dev}.
 #'
 #' @rdname column_aggregate_functions
@@ -1531,7 +1532,7 @@ setMethod("stddev",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{stddev_pop}: Returns the population standard deviation of the expression in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -1545,7 +1546,7 @@ setMethod("stddev_pop",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{stddev_samp}: Returns the unbiased sample standard deviation of the expression in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -1609,7 +1610,7 @@ setMethod("sqrt",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{sum}: Returns the sum of all values in the expression.
 #'
 #' @rdname column_aggregate_functions
@@ -1623,7 +1624,7 @@ setMethod("sum",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{sumDistinct}: Returns the sum of distinct values in the expression.
 #'
 #' @rdname column_aggregate_functions
@@ -1924,7 +1925,7 @@ setMethod("upper",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{var}: Alias for \code{var_samp}.
 #'
 #' @rdname column_aggregate_functions
@@ -1953,7 +1954,7 @@ setMethod("variance",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{var_pop}: Returns the population variance of the values in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -1967,7 +1968,7 @@ setMethod("var_pop",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{var_samp}: Returns the unbiased variance of the values in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -2198,7 +2199,7 @@ setMethod("approxCountDistinct",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{countDistinct}: Returns the number of distinct items in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -2322,7 +2323,7 @@ setMethod("sign", signature(x = "Column"),
             signum(x)
           })
 
-#' @section Details:
+#' @details
 #' \code{n_distinct}: Returns the number of distinct items in a group.
 #'
 #' @rdname column_aggregate_functions
@@ -3652,7 +3653,7 @@ setMethod("create_map",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{collect_list}: Creates a list of objects with duplicates.
 #'
 #' @rdname column_aggregate_functions
@@ -3672,7 +3673,7 @@ setMethod("collect_list",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{collect_set}: Creates a list of objects with duplicate elements eliminated.
 #'
 #' @rdname column_aggregate_functions
@@ -3837,7 +3838,7 @@ setMethod("not",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{grouping_bit}: Indicates whether a specified column in a GROUP BY list is aggregated or not,
 #' returns 1 for aggregated or 0 for not aggregated in the result set. Same as \code{GROUPING} in SQL
 #' and \code{grouping} function in Scala.
@@ -3869,7 +3870,7 @@ setMethod("grouping_bit",
             column(jc)
           })
 
-#' @section Details:
+#' @details
 #' \code{grouping_id}: Returns the level of grouping.
 #' Equals to \code{
 #' grouping_bit(c1) * 2^(n - 1) + grouping_bit(c2) * 2^(n - 2)  + ... + grouping_bit(cn)
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index d8950d4c2d27..802e76c6b09a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1417,7 +1417,6 @@ setGeneric("unix_timestamp", function(x, format) { standardGeneric("unix_timesta
 setGeneric("upper", function(x) { standardGeneric("upper") })
 
 #' @rdname column_aggregate_functions
-#' @param y,na.rm,use currently not used.
 #' @export
 #' @name NULL
 setGeneric("var", function(x, y = NULL, na.rm = FALSE, use) { standardGeneric("var") })

From 36203dfa4a5be1603754130585adeb6c3f233b01 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Thu, 15 Jun 2017 20:43:24 -0700
Subject: [PATCH 04/11] pull avg into its own doc

---
 R/pkg/R/functions.R | 32 +++++++++++++++++---------------
 R/pkg/R/generics.R  |  4 ++--
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index f6bec04c5757..22f22f965d77 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -185,24 +185,14 @@ setMethod("atan",
             column(jc)
           })
 
-#' @details
-#' \code{avg}: Returns the average of the values in a group.
+#' avg
 #'
-#' @rdname column_aggregate_functions
+#' Returns the average of the values in a group.
+#'
+#' @rdname avg
 #' @export
 #' @aliases avg avg,Column-method
-#' @examples
-#'
-#' \dontrun{
-#' head(select(df, avg(df$mpg), mean(df$mpg), sum(df$mpg), min(df$wt), max(df$qsec)))
-#'
-#' # metrics by num of cylinders
-#' tmp <- agg(groupBy(df, "cyl"), avg(df$mpg), avg(df$hp), avg(df$wt), avg(df$qsec))
-#' head(orderBy(tmp, "cyl"))
-#'
-#' # car with the max mpg
-#' mpg_max <- as.numeric(collect(agg(df, max(df$mpg))))
-#' head(where(df, df$mpg == mpg_max))}
+#' @family aggregate functions
 #' @note avg since 1.4.0
 setMethod("avg",
           signature(x = "Column"),
@@ -1105,6 +1095,18 @@ setMethod("md5",
 #' @rdname column_aggregate_functions
 #' @aliases mean mean,Column-method
 #' @export
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, avg(df$mpg), mean(df$mpg), sum(df$mpg), min(df$wt), max(df$qsec)))
+#'
+#' # metrics by num of cylinders
+#' tmp <- agg(groupBy(df, "cyl"), avg(df$mpg), avg(df$hp), avg(df$wt), avg(df$qsec))
+#' head(orderBy(tmp, "cyl"))
+#'
+#' # car with the max mpg
+#' mpg_max <- as.numeric(collect(agg(df, max(df$mpg))))
+#' head(where(df, df$mpg == mpg_max))}
 #' @note mean since 1.5.0
 setMethod("mean",
           signature(x = "Column"),
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 802e76c6b09a..b17f9a119c9a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -920,9 +920,9 @@ setGeneric("array_contains", function(x, value) { standardGeneric("array_contain
 #' @export
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
 
-#' @rdname column_aggregate_functions
+#' @param x Column to compute on or a GroupedData object.
+#' @rdname avg
 #' @export
-#' @name NULL
 setGeneric("avg", function(x, ...) { standardGeneric("avg") })
 
 #' @rdname base64

From 0a7f5fcac2e0295d92b82d8909c4f1b11c82f016 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Thu, 15 Jun 2017 20:45:31 -0700
Subject: [PATCH 05/11] add back avg example

---
 R/pkg/R/functions.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 22f22f965d77..e3c36493f01f 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -193,6 +193,7 @@ setMethod("atan",
 #' @export
 #' @aliases avg avg,Column-method
 #' @family aggregate functions
+#' @examples \dontrun{avg(df$c)}
 #' @note avg since 1.4.0
 setMethod("avg",
           signature(x = "Column"),

From 19d063c6995fa6bd780830a941f6b1f7c45c1bac Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Thu, 15 Jun 2017 20:48:35 -0700
Subject: [PATCH 06/11] revert avg

---
 R/pkg/R/functions.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index e3c36493f01f..0681b3c84684 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -187,12 +187,13 @@ setMethod("atan",
 
 #' avg
 #'
-#' Returns the average of the values in a group.
+#' Aggregate function: returns the average of the values in a group.
 #'
 #' @rdname avg
-#' @export
-#' @aliases avg avg,Column-method
+#' @name avg
 #' @family aggregate functions
+#' @export
+#' @aliases avg,Column-method
 #' @examples \dontrun{avg(df$c)}
 #' @note avg since 1.4.0
 setMethod("avg",

From 978e13b498b492495a2fa21e915c120791b59b9f Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Fri, 16 Jun 2017 13:00:31 -0700
Subject: [PATCH 07/11] fix issue in avg doc

---
 R/pkg/R/generics.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index b17f9a119c9a..a1774e73c4d1 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -921,6 +921,7 @@ setGeneric("array_contains", function(x, value) { standardGeneric("array_contain
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
 
 #' @param x Column to compute on or a GroupedData object.
+#' @param ... additional argument(s) when \code{x} is a GroupedData object.
 #' @rdname avg
 #' @export
 setGeneric("avg", function(x, ...) { standardGeneric("avg") })

From 875db0dc02e03fab1df57ba105033a6597d45249 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sat, 17 Jun 2017 11:58:52 -0700
Subject: [PATCH 08/11] update doc for corr cov functions

---
 R/pkg/R/functions.R | 42 ++++++++++++++++++++----------------------
 R/pkg/R/stats.R     | 20 +++++++-------------
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 0681b3c84684..ba671e503a39 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -361,10 +361,13 @@ setMethod("column",
 #'
 #' @rdname corr
 #' @name corr
-#' @family math functions
+#' @family aggregate functions
 #' @export
 #' @aliases corr,Column-method
-#' @examples \dontrun{corr(df$c, df$d)}
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' head(select(df, corr(df$mpg, df$hp)))}
 #' @note corr since 1.6.0
 setMethod("corr", signature(x = "Column"),
           function(x, col2) {
@@ -375,20 +378,22 @@ setMethod("corr", signature(x = "Column"),
 
 #' cov
 #'
-#' Compute the sample covariance between two expressions.
+#' Compute the covariance between two expressions.
+#'
+#' @details
+#' \code{cov}: Compute the sample covariance between two expressions.
 #'
 #' @rdname cov
 #' @name cov
-#' @family math functions
+#' @family aggregate functions
 #' @export
 #' @aliases cov,characterOrColumn-method
 #' @examples
 #' \dontrun{
-#' cov(df$c, df$d)
-#' cov("c", "d")
-#' covar_samp(df$c, df$d)
-#' covar_samp("c", "d")
-#' }
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' head(select(df, cov(df$mpg, df$hp), cov("mpg", "hp"),
+#'                 covar_samp(df$mpg, df$hp), covar_samp("mpg", "hp"),
+#'                 covar_pop(df$mpg, df$hp), covar_pop("mpg", "hp")))}
 #' @note cov since 1.6.0
 setMethod("cov", signature(x = "characterOrColumn"),
           function(x, col2) {
@@ -396,6 +401,9 @@ setMethod("cov", signature(x = "characterOrColumn"),
             covar_samp(x, col2)
           })
 
+#' @details
+#' \code{covar_sample}: Alias for \code{cov}.
+#'
 #' @rdname cov
 #'
 #' @param col1 the first Column.
@@ -414,23 +422,13 @@ setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterO
             column(jc)
           })
 
-#' covar_pop
-#'
-#' Compute the population covariance between two expressions.
-#'
-#' @param col1 First column to compute cov_pop.
-#' @param col2 Second column to compute cov_pop.
+#' @details
+#' \code{covar_pop}: Computes the population covariance between two expressions.
 #'
-#' @rdname covar_pop
+#' @rdname cov
 #' @name covar_pop
-#' @family math functions
 #' @export
 #' @aliases covar_pop,characterOrColumn,characterOrColumn-method
-#' @examples
-#' \dontrun{
-#' covar_pop(df$c, df$d)
-#' covar_pop("c", "d")
-#' }
 #' @note covar_pop since 2.0.0
 setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
           function(col1, col2) {
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index d78a10893f92..04cbc74fda28 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -52,22 +52,17 @@ setMethod("crosstab",
             collect(dataFrame(sct))
           })
 
-#' Calculate the sample covariance of two numerical columns of a SparkDataFrame.
-#'
 #' @param colName1 the name of the first column
 #' @param colName2 the name of the second column
-#' @return The covariance of the two columns.
 #'
 #' @rdname cov
-#' @name cov
 #' @aliases cov,SparkDataFrame-method
 #' @family stat functions
 #' @export
 #' @examples
-#'\dontrun{
-#' df <- read.json("/path/to/file.json")
-#' cov <- cov(df, "title", "gender")
-#' }
+#'
+#' \dontrun{
+#' cov(df, "mpg", "hp")}
 #' @note cov since 1.6.0
 setMethod("cov",
           signature(x = "SparkDataFrame"),
@@ -93,11 +88,10 @@ setMethod("cov",
 #' @family stat functions
 #' @export
 #' @examples
-#'\dontrun{
-#' df <- read.json("/path/to/file.json")
-#' corr <- corr(df, "title", "gender")
-#' corr <- corr(df, "title", "gender", method = "pearson")
-#' }
+#'
+#' \dontrun{
+#' corr(df, "mpg", "hp")
+#' corr(df, "mpg", "hp", method = "pearson")}
 #' @note corr since 1.6.0
 setMethod("corr",
           signature(x = "SparkDataFrame"),

From 79d9fdf424cc24277673f30ec673ed6ae3eafeee Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sat, 17 Jun 2017 12:06:09 -0700
Subject: [PATCH 09/11] fix issue with covar_pop

---
 R/pkg/R/generics.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index a1774e73c4d1..0331483a3e83 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -479,7 +479,7 @@ setGeneric("corr", function(x, ...) {standardGeneric("corr") })
 #' @export
 setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
 
-#' @rdname covar_pop
+#' @rdname cov
 #' @export
 setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
 

From 6eae126398e4229aa84130728792f407c67a75e6 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sun, 18 Jun 2017 20:54:58 -0700
Subject: [PATCH 10/11] add back return value in cov stat

---
 R/pkg/R/stats.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 04cbc74fda28..21422bfc8390 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -54,6 +54,7 @@ setMethod("crosstab",
 
 #' @param colName1 the name of the first column
 #' @param colName2 the name of the second column
+#' @return The covariance of the two columns.
 #'
 #' @rdname cov
 #' @aliases cov,SparkDataFrame-method

From 4cf5ab98771f19924e483ac716bd8a0618ba3f2e Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Mon, 19 Jun 2017 11:00:49 -0700
Subject: [PATCH 11/11] update cov method for SparkDataFrame

---
 R/pkg/R/stats.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 21422bfc8390..9a9fa84044ce 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -52,6 +52,10 @@ setMethod("crosstab",
             collect(dataFrame(sct))
           })
 
+#' @details
+#' \code{cov}: When applied to SparkDataFrame, this calculates the sample covariance of two numerical
+#' columns of \emph{one} SparkDataFrame.
+#'
 #' @param colName1 the name of the first column
 #' @param colName2 the name of the second column
 #' @return The covariance of the two columns.
@@ -63,7 +67,8 @@ setMethod("crosstab",
 #' @examples
 #'
 #' \dontrun{
-#' cov(df, "mpg", "hp")}
+#' cov(df, "mpg", "hp")
+#' cov(df, df$mpg, df$hp)}
 #' @note cov since 1.6.0
 setMethod("cov",
           signature(x = "SparkDataFrame"),