From 2c1f67353b6049e7679947d9c6c1e9901d7e1c9f Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Fri, 27 Jan 2017 14:50:09 -0800 Subject: [PATCH 1/3] update doc --- R/pkg/R/DataFrame.R | 13 ++++++++++++- R/pkg/R/mllib_classification.R | 2 +- R/pkg/R/mllib_clustering.R | 6 +++--- R/pkg/R/mllib_regression.R | 4 ++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 523343ea9f4f..56e78345f279 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1831,6 +1831,8 @@ setMethod("[", signature(x = "SparkDataFrame"), #' Return subsets of SparkDataFrame according to given conditions #' @param x a SparkDataFrame. #' @param i,subset (Optional) a logical expression to filter on rows. +#' For extract operator [[ and replacement operator [[<-, the indexing parameter for +#' a single Column. #' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame. #' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column. #' Otherwise, a SparkDataFrame will always be returned. @@ -1841,6 +1843,7 @@ setMethod("[", signature(x = "SparkDataFrame"), #' @export #' @family SparkDataFrame functions #' @aliases subset,SparkDataFrame-method +#' @seealso \link{rename} \link{withColumn} #' @rdname subset #' @name subset #' @family subsetting functions @@ -1858,6 +1861,10 @@ setMethod("[", signature(x = "SparkDataFrame"), #' subset(df, df$age %in% c(19, 30), 1:2) #' subset(df, df$age %in% c(19), select = c(1,2)) #' subset(df, select = c(1,2)) +#' # Columns can be selected and set +#' df[["age"]] <- 23 +#' df[[1]] <- df$age +#' df[[2]] <- NULL # drop column #' } #' @note subset since 1.5.0 setMethod("subset", signature(x = "SparkDataFrame"), @@ -1982,7 +1989,7 @@ setMethod("selectExpr", #' @aliases withColumn,SparkDataFrame,character-method #' @rdname withColumn #' @name withColumn -#' @seealso \link{rename} \link{mutate} +#' @seealso \link{rename} \link{mutate} \link{subset} #' @export #' @examples #'\dontrun{ @@ -1993,6 +2000,10 @@ setMethod("selectExpr", #' # Replace an existing column #' newDF2 <- withColumn(newDF, "newCol", newDF$col1) #' newDF3 <- withColumn(newDF, "newCol", 42) +#' # Use extract operator to set an existing or new column +#' df[["age"]] <- 23 +#' df[[2]] <- df$col1 +#' df[[2]] <- NULL # drop column #' } #' @note withColumn since 1.4.0 setMethod("withColumn", diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R index 8da84499dfc0..fee4a4cc9ff2 100644 --- a/R/pkg/R/mllib_classification.R +++ b/R/pkg/R/mllib_classification.R @@ -41,7 +41,7 @@ setClass("NaiveBayesModel", representation(jobj = "jobj")) #' Logistic Regression Model #' -#' Fits an logistic regression model against a Spark DataFrame. It supports "binomial": Binary logistic regression +#' Fits an logistic regression model against a SparkDataFrame. It supports "binomial": Binary logistic regression #' with pivoting; "multinomial": Multinomial logistic (softmax) regression without pivoting, similar to glmnet. #' Users can print, make predictions on the produced model and save the model to the input path. #' diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R index 05bbab680d27..e384c7398b22 100644 --- a/R/pkg/R/mllib_clustering.R +++ b/R/pkg/R/mllib_clustering.R @@ -47,7 +47,7 @@ setClass("LDAModel", representation(jobj = "jobj")) #' Bisecting K-Means Clustering Model #' -#' Fits a bisecting k-means clustering model against a Spark DataFrame. +#' Fits a bisecting k-means clustering model against a SparkDataFrame. #' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models. #' @@ -189,7 +189,7 @@ setMethod("write.ml", signature(object = "BisectingKMeansModel", path = "charact #' Multivariate Gaussian Mixture Model (GMM) #' -#' Fits multivariate gaussian mixture model against a Spark DataFrame, similarly to R's +#' Fits multivariate gaussian mixture model against a SparkDataFrame, similarly to R's #' mvnormalmixEM(). Users can call \code{summary} to print a summary of the fitted model, #' \code{predict} to make predictions on new data, and \code{write.ml}/\code{read.ml} #' to save/load fitted models. @@ -314,7 +314,7 @@ setMethod("write.ml", signature(object = "GaussianMixtureModel", path = "charact #' K-Means Clustering Model #' -#' Fits a k-means clustering model against a Spark DataFrame, similarly to R's kmeans(). +#' Fits a k-means clustering model against a SparkDataFrame, similarly to R's kmeans(). #' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models. #' diff --git a/R/pkg/R/mllib_regression.R b/R/pkg/R/mllib_regression.R index 0e07d3bfd899..79086003463e 100644 --- a/R/pkg/R/mllib_regression.R +++ b/R/pkg/R/mllib_regression.R @@ -41,7 +41,7 @@ setClass("IsotonicRegressionModel", representation(jobj = "jobj")) #' Generalized Linear Models #' -#' Fits generalized linear model against a Spark DataFrame. +#' Fits generalized linear model against a SparkDataFrame. #' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models. #' @@ -259,7 +259,7 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat #' Isotonic Regression Model #' -#' Fits an Isotonic Regression model against a Spark DataFrame, similarly to R's isoreg(). +#' Fits an Isotonic Regression model against a SparkDataFrame, similarly to R's isoreg(). #' Users can print, make predictions on the produced model and save the model to the input path. #' #' @param data SparkDataFrame for training. From bff1e56af55fdbf5e216d49a5e673cee6085cc13 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Fri, 27 Jan 2017 14:58:00 -0800 Subject: [PATCH 2/3] vignettes error --- R/pkg/vignettes/sparkr-vignettes.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 9b0ded3b8d38..36a78477dc26 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -923,9 +923,9 @@ The main method calls of actual computation happen in the Spark JVM of the drive Two kinds of RPCs are supported in the SparkR JVM backend: method invocation and creating new objects. Method invocation can be done in two ways. -* `sparkR.invokeJMethod` takes a reference to an existing Java object and a list of arguments to be passed on to the method. +* `sparkR.callJMethod` takes a reference to an existing Java object and a list of arguments to be passed on to the method. -* `sparkR.invokeJStatic` takes a class name for static method and a list of arguments to be passed on to the method. +* `sparkR.callJStatic` takes a class name for static method and a list of arguments to be passed on to the method. The arguments are serialized using our custom wire format which is then deserialized on the JVM side. We then use Java reflection to invoke the appropriate method. From de56852daf03de33fbc6dfa0280e1ea5f5f32cc7 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Fri, 27 Jan 2017 15:00:19 -0800 Subject: [PATCH 3/3] do not link to rename --- R/pkg/R/DataFrame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 56e78345f279..bfec3245cf52 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1843,7 +1843,7 @@ setMethod("[", signature(x = "SparkDataFrame"), #' @export #' @family SparkDataFrame functions #' @aliases subset,SparkDataFrame-method -#' @seealso \link{rename} \link{withColumn} +#' @seealso \link{withColumn} #' @rdname subset #' @name subset #' @family subsetting functions