From 87bf625774727f63b82b756ab7881693bc7153f9 Mon Sep 17 00:00:00 2001 From: zero323 Date: Fri, 24 Jan 2020 01:01:03 +0100 Subject: [PATCH] Move Unicode test data to external file --- .../tests/fulltests/data/test_utils_utf.json | 4 +++ R/pkg/tests/fulltests/test_sparkSQL.R | 29 ++++++++++++------- 2 files changed, 22 insertions(+), 11 deletions(-) create mode 100644 R/pkg/tests/fulltests/data/test_utils_utf.json diff --git a/R/pkg/tests/fulltests/data/test_utils_utf.json b/R/pkg/tests/fulltests/data/test_utils_utf.json new file mode 100644 index 000000000000..b78352ee52ef --- /dev/null +++ b/R/pkg/tests/fulltests/data/test_utils_utf.json @@ -0,0 +1,4 @@ +{"name": "안녕하세요"} +{"name": "您好", "age": 30} +{"name": "こんにちは", "age": 19} +{"name": "Xin chào"} diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index d435a8b6d7c4..4fcc2baa0546 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -848,24 +848,31 @@ test_that("collect() and take() on a DataFrame return the same number of rows an }) test_that("collect() support Unicode characters", { - lines <- c("{\"name\":\"안녕하세요\"}", - "{\"name\":\"您好\", \"age\":30}", - "{\"name\":\"こんにちは\", \"age\":19}", - "{\"name\":\"Xin chào\"}") + jsonPath <- file.path( + Sys.getenv("SPARK_HOME"), + "R", "pkg", "tests", "fulltests", "data", + "test_utils_utf.json" + ) + + lines <- readLines(jsonPath, encoding = "UTF-8") - jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") - writeLines(lines, jsonPath) + expected <- regmatches(lines, gregexpr('(?<="name": ").*?(?=")', lines, perl = TRUE)) df <- read.df(jsonPath, "json") rdf <- collect(df) expect_true(is.data.frame(rdf)) - expect_equal(rdf$name[1], markUtf8("안녕하세요")) - expect_equal(rdf$name[2], markUtf8("您好")) - expect_equal(rdf$name[3], markUtf8("こんにちは")) - expect_equal(rdf$name[4], markUtf8("Xin chào")) + expect_equal(rdf$name[1], expected[[1]]) + expect_equal(rdf$name[2], expected[[2]]) + expect_equal(rdf$name[3], expected[[3]]) + expect_equal(rdf$name[4], expected[[4]]) df1 <- createDataFrame(rdf) - expect_equal(collect(where(df1, df1$name == markUtf8("您好")))$name, markUtf8("您好")) + expect_equal( + collect( + where(df1, df1$name == expected[[2]]) + )$name, + expected[[2]] + ) }) test_that("multiple pipeline transformations result in an RDD with the correct values", {