From 87bf625774727f63b82b756ab7881693bc7153f9 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 24 Jan 2020 01:01:03 +0100
Subject: [PATCH] Move Unicode test data to external file

---
 .../tests/fulltests/data/test_utils_utf.json  |  4 +++
 R/pkg/tests/fulltests/test_sparkSQL.R         | 29 ++++++++++++-------
 2 files changed, 22 insertions(+), 11 deletions(-)
 create mode 100644 R/pkg/tests/fulltests/data/test_utils_utf.json

diff --git a/R/pkg/tests/fulltests/data/test_utils_utf.json b/R/pkg/tests/fulltests/data/test_utils_utf.json
new file mode 100644
index 000000000000..b78352ee52ef
--- /dev/null
+++ b/R/pkg/tests/fulltests/data/test_utils_utf.json
@@ -0,0 +1,4 @@
+{"name": "안녕하세요"}
+{"name": "您好", "age": 30}
+{"name": "こんにちは", "age": 19}
+{"name": "Xin chào"}
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index d435a8b6d7c4..4fcc2baa0546 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -848,24 +848,31 @@ test_that("collect() and take() on a DataFrame return the same number of rows an
 })
 
 test_that("collect() support Unicode characters", {
-  lines <- c("{\"name\":\"안녕하세요\"}",
-             "{\"name\":\"您好\", \"age\":30}",
-             "{\"name\":\"こんにちは\", \"age\":19}",
-             "{\"name\":\"Xin chào\"}")
+  jsonPath <- file.path(
+    Sys.getenv("SPARK_HOME"),
+    "R", "pkg", "tests", "fulltests", "data",
+    "test_utils_utf.json"
+  )
+
+  lines <- readLines(jsonPath, encoding = "UTF-8")
 
-  jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
-  writeLines(lines, jsonPath)
+  expected <- regmatches(lines, gregexpr('(?<="name": ").*?(?=")', lines, perl = TRUE))
 
   df <- read.df(jsonPath, "json")
   rdf <- collect(df)
   expect_true(is.data.frame(rdf))
-  expect_equal(rdf$name[1], markUtf8("안녕하세요"))
-  expect_equal(rdf$name[2], markUtf8("您好"))
-  expect_equal(rdf$name[3], markUtf8("こんにちは"))
-  expect_equal(rdf$name[4], markUtf8("Xin chào"))
+  expect_equal(rdf$name[1], expected[[1]])
+  expect_equal(rdf$name[2], expected[[2]])
+  expect_equal(rdf$name[3], expected[[3]])
+  expect_equal(rdf$name[4], expected[[4]])
 
   df1 <- createDataFrame(rdf)
-  expect_equal(collect(where(df1, df1$name == markUtf8("您好")))$name, markUtf8("您好"))
+  expect_equal(
+    collect(
+      where(df1, df1$name == expected[[2]])
+    )$name,
+    expected[[2]]
+  )
 })
 
 test_that("multiple pipeline transformations result in an RDD with the correct values", {