From 2e1d693a38c62d484decfeec1bc8b481caa7c4f0 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 4 Nov 2018 16:46:20 +0800 Subject: [PATCH 1/5] Add schema_of_json() and schema_of_csv() to R --- R/pkg/NAMESPACE | 2 + R/pkg/R/functions.R | 123 ++++++++++++++---- R/pkg/R/generics.R | 8 ++ R/pkg/tests/fulltests/test_sparkSQL.R | 20 ++- .../org/apache/spark/sql/api/r/SQLUtils.scala | 8 +- 5 files changed, 131 insertions(+), 30 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 9d4f05af75af..60fa322d816c 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -347,6 +347,8 @@ exportMethods("%<=>%", "row_number", "rpad", "rtrim", + "schema_of_csv", + "schema_of_json", "second", "sha1", "sha2", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 9292363d1ad2..b60ee6f838a6 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -202,14 +202,22 @@ NULL #' \itemize{ #' \item \code{from_json}: a structType object to use as the schema to use #' when parsing the JSON string. Since Spark 2.3, the DDL-formatted string is -#' also supported for the schema. -#' \item \code{from_csv}: a DDL-formatted string +#' also supported for the schema. Since Spark 3.0, \code{schema_of_json} or +#' a string literal can also be accepted. +#' \item \code{from_csv}: a structType object, DDL-formatted string or \code{schema_of_csv} +#' } +#' @param ... additional argument(s). +#' \itemize{ +#' \item \code{to_json}, \code{from_json} and \code{schema_of_json}: this contains +#' additional named properties to control how it is converted and accepts the +#' same options as the JSON data source. +#' \item \code{to_json}: it supports the "pretty" option which enables pretty +#' JSON generation. +#' \item \code{to_csv}, \code{from_csv} and \code{schema_of_csv}: this contains +#' additional named properties to control how it is converted and accepts the +#' same options as the CSV data source. +#' \item \code{arrays_zip}, this contains additional Columns of arrays to be merged. #' } -#' @param ... additional argument(s). In \code{to_json}, \code{to_csv} and \code{from_json}, -#' this contains additional named properties to control how it is converted, accepts -#' the same options as the JSON/CSV data source. Additionally \code{to_json} supports -#' the "pretty" option which enables pretty JSON generation. In \code{arrays_zip}, -#' this contains additional Columns of arrays to be merged. #' @name column_collection_functions #' @rdname column_collection_functions #' @family collection functions @@ -2188,6 +2196,8 @@ setMethod("date_format", signature(y = "Column", x = "character"), column(jc) }) +setClassUnion("characterOrstructTypeOrColumn", c("character", "structType", "Column")) + #' @details #' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType} #' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set @@ -2195,7 +2205,7 @@ setMethod("date_format", signature(y = "Column", x = "character"), #' #' @rdname column_collection_functions #' @param as.json.array indicating if input string is JSON array of objects or a single object. -#' @aliases from_json from_json,Column,characterOrstructType-method +#' @aliases from_json from_json,Column,characterOrstructTypeOrColumn-method #' @examples #' #' \dontrun{ @@ -2203,25 +2213,31 @@ setMethod("date_format", signature(y = "Column", x = "character"), #' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy')) #' schema <- structType(structField("date", "string")) #' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy'))) - #' df2 <- sql("SELECT named_struct('name', 'Bob') as people") #' df2 <- mutate(df2, people_json = to_json(df2$people)) #' schema <- structType(structField("name", "string")) #' head(select(df2, from_json(df2$people_json, schema))) -#' head(select(df2, from_json(df2$people_json, "name STRING")))} +#' head(select(df2, from_json(df2$people_json, "name STRING"))) +#' head(select(df2, from_json(df2$people_json, schema_of_json(head(df2)$people_json))))} #' @note from_json since 2.2.0 -setMethod("from_json", signature(x = "Column", schema = "characterOrstructType"), +setMethod("from_json", signature(x = "Column", schema = "characterOrstructTypeOrColumn"), function(x, schema, as.json.array = FALSE, ...) { if (is.character(schema)) { - schema <- structType(schema) + jschema <- structType(schema)$jobj + } else if (class(schema) == "structType") { + jschema <- schema$jobj + } else { + jschema <- schema@jc } if (as.json.array) { - jschema <- callJStatic("org.apache.spark.sql.types.DataTypes", - "createArrayType", - schema$jobj) - } else { - jschema <- schema$jobj + # This case is R-specifically different. Unlike Scala and Python side, + # R side has 'as.json.array' option to indicate if the schema should be + # treated as struct or element type of array in order to make it more + # R-friendly. + jschema <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", + "createArrayType", + jschema) } options <- varargsToStrEnv(...) jc <- callJStatic("org.apache.spark.sql.functions", @@ -2230,28 +2246,59 @@ setMethod("from_json", signature(x = "Column", schema = "characterOrstructType") column(jc) }) +#' @details +#' \code{schema_of_json}: Parses a JSON string and infers its schema in DDL format. +#' +#' @rdname column_collection_functions +#' @aliases schema_of_json schema_of_json,characterOrColumn-method +#' @examples +#' +#' \dontrun{ +#' json <- '{"name":"Bob"}' +#' df <- sql("SELECT * FROM range(1)") +#' head(select(df, schema_of_json(json)))} +#' @note schema_of_json since 3.0.0 +setMethod("schema_of_json", signature(x = "characterOrColumn"), + function(x, ...) { + if (class(x) == "character") { + col <- callJStatic("org.apache.spark.sql.functions", "lit", x) + } else { + col <- x@jc + } + options <- varargsToStrEnv(...) + jc <- callJStatic("org.apache.spark.sql.functions", + "schema_of_json", + col, options) + column(jc) + }) + #' @details #' \code{from_csv}: Parses a column containing a CSV string into a Column of \code{structType} #' with the specified \code{schema}. #' If the string is unparseable, the Column will contain the value NA. #' #' @rdname column_collection_functions -#' @aliases from_csv from_csv,Column,character-method +#' @aliases from_csv from_csv,Column,characterOrstructTypeOrColumn-method #' @examples #' #' \dontrun{ -#' df <- sql("SELECT 'Amsterdam,2018' as csv") +#' csv <- "'Amsterdam,2018'" +#' df <- sql(paste("SELECT", csv, "as csv")) #' schema <- "city STRING, year INT" -#' head(select(df, from_csv(df$csv, schema)))} +#' head(select(df, from_csv(df$csv, schema))) +#' head(select(df, from_csv(df$csv, structType(schema)))) +#' head(select(df, from_csv(df$csv, schema_of_csv(csv))))} #' @note from_csv since 3.0.0 -setMethod("from_csv", signature(x = "Column", schema = "characterOrColumn"), +setMethod("from_csv", signature(x = "Column", schema = "characterOrstructTypeOrColumn"), function(x, schema, ...) { - if (class(schema) == "Column") { - jschema <- schema@jc - } else if (is.character(schema)) { + if (class(schema) == "structType") { + schema <- callJMethod(schema$job, "toDDL") + } + + if (is.character(schema)) { jschema <- callJStatic("org.apache.spark.sql.functions", "lit", schema) } else { - stop("schema argument should be a column or character") + jschema <- schema@jc } options <- varargsToStrEnv(...) jc <- callJStatic("org.apache.spark.sql.functions", @@ -2260,6 +2307,32 @@ setMethod("from_csv", signature(x = "Column", schema = "characterOrColumn"), column(jc) }) +#' @details +#' \code{schema_of_csv}: Parses a CSV string and infers its schema in DDL format. +#' +#' @rdname column_collection_functions +#' @aliases schema_of_csv schema_of_csv,characterOrColumn-method +#' @examples +#' +#' \dontrun{ +#' csv <- "'Amsterdam,2018'" +#' df <- sql("SELECT * FROM range(1)") +#' head(select(df, schema_of_csv(csv)))} +#' @note schema_of_csv since 3.0.0 +setMethod("schema_of_csv", signature(x = "characterOrColumn"), + function(x, ...) { + if (class(x) == "character") { + col <- callJStatic("org.apache.spark.sql.functions", "lit", x) + } else { + col <- x@jc + } + options <- varargsToStrEnv(...) + jc <- callJStatic("org.apache.spark.sql.functions", + "schema_of_csv", + col, options) + column(jc) + }) + #' @details #' \code{from_utc_timestamp}: This is a common function for databases supporting TIMESTAMP WITHOUT #' TIMEZONE. This function takes a timestamp which is timezone-agnostic, and interprets it as a diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 463102c780b5..bbf5f7dc3334 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1199,6 +1199,14 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") }) #' @name NULL setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") }) +#' @rdname column_collection_functions +#' @name NULL +setGeneric("schema_of_csv", function(x, ...) { standardGeneric("schema_of_csv") }) + +#' @rdname column_collection_functions +#' @name NULL +setGeneric("schema_of_json", function(x, ...) { standardGeneric("schema_of_json") }) + #' @rdname column_aggregate_functions #' @name NULL setGeneric("sd", function(x, na.rm = FALSE) { standardGeneric("sd") }) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index faec387ce4ef..f50253a51d5a 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1620,14 +1620,20 @@ test_that("column functions", { expect_equal(collect(select(df, bround(df$x, 0)))[[1]][1], 2) expect_equal(collect(select(df, bround(df$x, 0)))[[1]][2], 4) - # Test from_csv() + # Test from_csv(), schema_of_csv() df <- as.DataFrame(list(list("col" = "1"))) c <- collect(select(df, alias(from_csv(df$col, "a INT"), "csv"))) expect_equal(c[[1]][[1]]$a, 1) c <- collect(select(df, alias(from_csv(df$col, lit("a INT")), "csv"))) expect_equal(c[[1]][[1]]$a, 1) + c <- collect(select(df, alias(from_csv(df$col, structType("a INT")), "csv"))) + expect_equal(c[[1]][[1]]$a, 1) + c <- collect(select(df, alias(from_csv(df$col, schema_of_csv("1")), "csv"))) + expect_equal(c[[1]][[1]]$`_c0`, 1) + c <- collect(select(df, alias(from_csv(df$col, schema_of_csv(lit("1"))), "csv"))) + expect_equal(c[[1]][[1]]$`_c0`, 1) - # Test to_json(), from_json() + # Test to_json(), from_json(), schema_of_json() df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people") j <- collect(select(df, alias(to_json(df$people), "json"))) expect_equal(j[order(j$json), ][1], "[{\"name\":\"Bob\"},{\"name\":\"Alice\"}]") @@ -1645,7 +1651,9 @@ test_that("column functions", { expect_equal(j[order(j$json), ][1], "{\"age\":16,\"height\":176.5}") df <- as.DataFrame(j) schemas <- list(structType(structField("age", "integer"), structField("height", "double")), - "age INT, height DOUBLE") + "age INT, height DOUBLE", + schema_of_json("{\"age\":16,\"height\":176.5}"), + schema_of_json(lit("{\"age\":16,\"height\":176.5}"))) for (schema in schemas) { s <- collect(select(df, alias(from_json(df$json, schema), "structcol"))) expect_equal(ncol(s), 1) @@ -1679,7 +1687,11 @@ test_that("column functions", { # check if array type in string is correctly supported. jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]" df <- as.DataFrame(list(list("people" = jsonArr))) - for (schema in list(structType(structField("name", "string")), "name STRING")) { + schemas <- list(structType(structField("name", "string")), + "name STRING", + schema_of_json("{\"name\":\"Alice\"}"), + schema_of_json(lit("{\"name\":\"Bob\"}"))) + for (schema in schemas) { arr <- collect(select(df, alias(from_json(df$people, schema, as.json.array = TRUE), "arrcol"))) expect_equal(ncol(arr), 1) expect_equal(nrow(arr), 1) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index af20764f9a96..97d991c1ee93 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -30,7 +30,7 @@ import org.apache.spark.broadcast.Broadcast import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericRowWithSchema} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.command.ShowTablesCommand import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION @@ -225,4 +225,10 @@ private[sql] object SQLUtils extends Logging { } sparkSession.sessionState.catalog.listTables(db).map(_.table).toArray } + + def createArrayType(elementType: DataType): ArrayType = DataTypes.createArrayType(elementType) + + def createArrayType(elementType: Column): ArrayType = { + new ArrayType(ExprUtils.evalTypeExpr(elementType.expr), true) + } } From 3416ac747179e8063082676c79d1842709011c93 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 5 Nov 2018 11:04:58 +0800 Subject: [PATCH 2/5] Address comments --- R/pkg/R/functions.R | 54 +++++++------------ R/pkg/tests/fulltests/test_sparkSQL.R | 26 ++++----- .../org/apache/spark/sql/api/r/SQLUtils.scala | 8 +-- 3 files changed, 35 insertions(+), 53 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index b60ee6f838a6..0bade9422e6d 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -202,9 +202,8 @@ NULL #' \itemize{ #' \item \code{from_json}: a structType object to use as the schema to use #' when parsing the JSON string. Since Spark 2.3, the DDL-formatted string is -#' also supported for the schema. Since Spark 3.0, \code{schema_of_json} or -#' a string literal can also be accepted. -#' \item \code{from_csv}: a structType object, DDL-formatted string or \code{schema_of_csv} +#' also supported for the schema. +#' \item \code{from_csv}: a DDL-formatted string #' } #' @param ... additional argument(s). #' \itemize{ @@ -2196,8 +2195,6 @@ setMethod("date_format", signature(y = "Column", x = "character"), column(jc) }) -setClassUnion("characterOrstructTypeOrColumn", c("character", "structType", "Column")) - #' @details #' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType} #' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set @@ -2205,7 +2202,7 @@ setClassUnion("characterOrstructTypeOrColumn", c("character", "structType", "Col #' #' @rdname column_collection_functions #' @param as.json.array indicating if input string is JSON array of objects or a single object. -#' @aliases from_json from_json,Column,characterOrstructTypeOrColumn-method +#' @aliases from_json from_json,Column,characterOrstructType-method #' @examples #' #' \dontrun{ @@ -2213,31 +2210,25 @@ setClassUnion("characterOrstructTypeOrColumn", c("character", "structType", "Col #' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy')) #' schema <- structType(structField("date", "string")) #' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy'))) + #' df2 <- sql("SELECT named_struct('name', 'Bob') as people") #' df2 <- mutate(df2, people_json = to_json(df2$people)) #' schema <- structType(structField("name", "string")) #' head(select(df2, from_json(df2$people_json, schema))) -#' head(select(df2, from_json(df2$people_json, "name STRING"))) -#' head(select(df2, from_json(df2$people_json, schema_of_json(head(df2)$people_json))))} +#' head(select(df2, from_json(df2$people_json, "name STRING")))} #' @note from_json since 2.2.0 -setMethod("from_json", signature(x = "Column", schema = "characterOrstructTypeOrColumn"), +setMethod("from_json", signature(x = "Column", schema = "characterOrstructType"), function(x, schema, as.json.array = FALSE, ...) { if (is.character(schema)) { - jschema <- structType(schema)$jobj - } else if (class(schema) == "structType") { - jschema <- schema$jobj - } else { - jschema <- schema@jc + schema <- structType(schema) } if (as.json.array) { - # This case is R-specifically different. Unlike Scala and Python side, - # R side has 'as.json.array' option to indicate if the schema should be - # treated as struct or element type of array in order to make it more - # R-friendly. - jschema <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", - "createArrayType", - jschema) + jschema <- callJStatic("org.apache.spark.sql.types.DataTypes", + "createArrayType", + schema$jobj) + } else { + jschema <- schema$jobj } options <- varargsToStrEnv(...) jc <- callJStatic("org.apache.spark.sql.functions", @@ -2278,27 +2269,22 @@ setMethod("schema_of_json", signature(x = "characterOrColumn"), #' If the string is unparseable, the Column will contain the value NA. #' #' @rdname column_collection_functions -#' @aliases from_csv from_csv,Column,characterOrstructTypeOrColumn-method +#' @aliases from_csv from_csv,Column,character-method #' @examples #' #' \dontrun{ -#' csv <- "'Amsterdam,2018'" -#' df <- sql(paste("SELECT", csv, "as csv")) +#' df <- sql("SELECT 'Amsterdam,2018' as csv") #' schema <- "city STRING, year INT" -#' head(select(df, from_csv(df$csv, schema))) -#' head(select(df, from_csv(df$csv, structType(schema)))) -#' head(select(df, from_csv(df$csv, schema_of_csv(csv))))} +#' head(select(df, from_csv(df$csv, schema)))} #' @note from_csv since 3.0.0 -setMethod("from_csv", signature(x = "Column", schema = "characterOrstructTypeOrColumn"), +setMethod("from_csv", signature(x = "Column", schema = "characterOrColumn"), function(x, schema, ...) { - if (class(schema) == "structType") { - schema <- callJMethod(schema$job, "toDDL") - } - - if (is.character(schema)) { + if (class(schema) == "Column") { + jschema <- schema@jc + } else if (is.character(schema)) { jschema <- callJStatic("org.apache.spark.sql.functions", "lit", schema) } else { - jschema <- schema@jc + stop("schema argument should be a column or character") } options <- varargsToStrEnv(...) jc <- callJStatic("org.apache.spark.sql.functions", diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index f50253a51d5a..b3a4dd72ac44 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1628,10 +1628,12 @@ test_that("column functions", { expect_equal(c[[1]][[1]]$a, 1) c <- collect(select(df, alias(from_csv(df$col, structType("a INT")), "csv"))) expect_equal(c[[1]][[1]]$a, 1) - c <- collect(select(df, alias(from_csv(df$col, schema_of_csv("1")), "csv"))) - expect_equal(c[[1]][[1]]$`_c0`, 1) - c <- collect(select(df, alias(from_csv(df$col, schema_of_csv(lit("1"))), "csv"))) - expect_equal(c[[1]][[1]]$`_c0`, 1) + + df <- as.DataFrame(list(list("col" = "1"))) + c <- collect(select(df, schema_of_csv("Amsterdam,2018"))) + expect_equal(c[[1]], "struct<_c0:string,_c1:int>") + c <- collect(select(df, schema_of_csv(lit("Amsterdam,2018")))) + expect_equal(c[[1]], "struct<_c0:string,_c1:int>") # Test to_json(), from_json(), schema_of_json() df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people") @@ -1651,9 +1653,7 @@ test_that("column functions", { expect_equal(j[order(j$json), ][1], "{\"age\":16,\"height\":176.5}") df <- as.DataFrame(j) schemas <- list(structType(structField("age", "integer"), structField("height", "double")), - "age INT, height DOUBLE", - schema_of_json("{\"age\":16,\"height\":176.5}"), - schema_of_json(lit("{\"age\":16,\"height\":176.5}"))) + "age INT, height DOUBLE") for (schema in schemas) { s <- collect(select(df, alias(from_json(df$json, schema), "structcol"))) expect_equal(ncol(s), 1) @@ -1662,6 +1662,12 @@ test_that("column functions", { expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 }))) } + df <- as.DataFrame(list(list("col" = "1"))) + c <- collect(select(df, schema_of_json('{"name":"Bob"}'))) + expect_equal(c[[1]], "struct") + c <- collect(select(df, schema_of_json(lit('{"name":"Bob"}')))) + expect_equal(c[[1]], "struct") + # Test to_json() supports arrays of primitive types and arrays df <- sql("SELECT array(19, 42, 70) as age") j <- collect(select(df, alias(to_json(df$age), "json"))) @@ -1687,11 +1693,7 @@ test_that("column functions", { # check if array type in string is correctly supported. jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]" df <- as.DataFrame(list(list("people" = jsonArr))) - schemas <- list(structType(structField("name", "string")), - "name STRING", - schema_of_json("{\"name\":\"Alice\"}"), - schema_of_json(lit("{\"name\":\"Bob\"}"))) - for (schema in schemas) { + for (schema in list(structType(structField("name", "string")), "name STRING")) { arr <- collect(select(df, alias(from_json(df$people, schema, as.json.array = TRUE), "arrcol"))) expect_equal(ncol(arr), 1) expect_equal(nrow(arr), 1) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index 97d991c1ee93..af20764f9a96 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -30,7 +30,7 @@ import org.apache.spark.broadcast.Broadcast import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.command.ShowTablesCommand import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION @@ -225,10 +225,4 @@ private[sql] object SQLUtils extends Logging { } sparkSession.sessionState.catalog.listTables(db).map(_.table).toArray } - - def createArrayType(elementType: DataType): ArrayType = DataTypes.createArrayType(elementType) - - def createArrayType(elementType: Column): ArrayType = { - new ArrayType(ExprUtils.evalTypeExpr(elementType.expr), true) - } } From c0a9384d292cdeff3a8799b20e166522f64ff50d Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 5 Nov 2018 11:06:23 +0800 Subject: [PATCH 3/5] Remove another test --- R/pkg/tests/fulltests/test_sparkSQL.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index b3a4dd72ac44..8e7d5434715e 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1626,8 +1626,6 @@ test_that("column functions", { expect_equal(c[[1]][[1]]$a, 1) c <- collect(select(df, alias(from_csv(df$col, lit("a INT")), "csv"))) expect_equal(c[[1]][[1]]$a, 1) - c <- collect(select(df, alias(from_csv(df$col, structType("a INT")), "csv"))) - expect_equal(c[[1]][[1]]$a, 1) df <- as.DataFrame(list(list("col" = "1"))) c <- collect(select(df, schema_of_csv("Amsterdam,2018"))) From c582757b8e5e885e4060695458361c4d779a2c52 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 5 Nov 2018 19:30:01 +0800 Subject: [PATCH 4/5] Address Felix's comments --- R/pkg/R/functions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 0bade9422e6d..a9fc975155af 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -2245,7 +2245,7 @@ setMethod("from_json", signature(x = "Column", schema = "characterOrstructType") #' @examples #' #' \dontrun{ -#' json <- '{"name":"Bob"}' +#' json <- "{\"name\":\"Bob\"}" #' df <- sql("SELECT * FROM range(1)") #' head(select(df, schema_of_json(json)))} #' @note schema_of_json since 3.0.0 @@ -2301,7 +2301,7 @@ setMethod("from_csv", signature(x = "Column", schema = "characterOrColumn"), #' @examples #' #' \dontrun{ -#' csv <- "'Amsterdam,2018'" +#' csv <- "Amsterdam,2018" #' df <- sql("SELECT * FROM range(1)") #' head(select(df, schema_of_csv(csv)))} #' @note schema_of_csv since 3.0.0 From 7c8e226a8591d6137c9f8f766080a4988e1e5612 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Wed, 7 Nov 2018 19:54:46 +0800 Subject: [PATCH 5/5] Address felix's comments --- R/pkg/R/functions.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index a9fc975155af..5cae7f3e3332 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1734,12 +1734,16 @@ setMethod("to_date", #' df2 <- mutate(df2, people_json = to_json(df2$people)) #' #' # Converts a map into a JSON object -#' df2 <- sql("SELECT map('name', 'Bob')) as people") +#' df2 <- sql("SELECT map('name', 'Bob') as people") #' df2 <- mutate(df2, people_json = to_json(df2$people)) #' #' # Converts an array of maps into a JSON array #' df2 <- sql("SELECT array(map('name', 'Bob'), map('name', 'Alice')) as people") -#' df2 <- mutate(df2, people_json = to_json(df2$people))} +#' df2 <- mutate(df2, people_json = to_json(df2$people)) +#' +#' # Converts a map into a pretty JSON object +#' df2 <- sql("SELECT map('name', 'Bob') as people") +#' df2 <- mutate(df2, people_json = to_json(df2$people, pretty = TRUE))} #' @note to_json since 2.2.0 setMethod("to_json", signature(x = "Column"), function(x, ...) {