diff --git a/R/pkg/tests/fulltests/data/test_utils_utf.json b/R/pkg/tests/fulltests/data/test_utils_utf.json new file mode 100644 index 000000000000..b78352ee52ef --- /dev/null +++ b/R/pkg/tests/fulltests/data/test_utils_utf.json @@ -0,0 +1,4 @@ +{"name": "안녕하세요"} +{"name": "您好", "age": 30} +{"name": "こんにちは", "age": 19} +{"name": "Xin chào"} diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index d435a8b6d7c4..4fcc2baa0546 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -848,24 +848,31 @@ test_that("collect() and take() on a DataFrame return the same number of rows an }) test_that("collect() support Unicode characters", { - lines <- c("{\"name\":\"안녕하세요\"}", - "{\"name\":\"您好\", \"age\":30}", - "{\"name\":\"こんにちは\", \"age\":19}", - "{\"name\":\"Xin chào\"}") + jsonPath <- file.path( + Sys.getenv("SPARK_HOME"), + "R", "pkg", "tests", "fulltests", "data", + "test_utils_utf.json" + ) + + lines <- readLines(jsonPath, encoding = "UTF-8") - jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") - writeLines(lines, jsonPath) + expected <- regmatches(lines, gregexpr('(?<="name": ").*?(?=")', lines, perl = TRUE)) df <- read.df(jsonPath, "json") rdf <- collect(df) expect_true(is.data.frame(rdf)) - expect_equal(rdf$name[1], markUtf8("안녕하세요")) - expect_equal(rdf$name[2], markUtf8("您好")) - expect_equal(rdf$name[3], markUtf8("こんにちは")) - expect_equal(rdf$name[4], markUtf8("Xin chào")) + expect_equal(rdf$name[1], expected[[1]]) + expect_equal(rdf$name[2], expected[[2]]) + expect_equal(rdf$name[3], expected[[3]]) + expect_equal(rdf$name[4], expected[[4]]) df1 <- createDataFrame(rdf) - expect_equal(collect(where(df1, df1$name == markUtf8("您好")))$name, markUtf8("您好")) + expect_equal( + collect( + where(df1, df1$name == expected[[2]]) + )$name, + expected[[2]] + ) }) test_that("multiple pipeline transformations result in an RDD with the correct values", {