diff --git a/unsafe/pom.xml b/unsafe/pom.xml
index 33782c6c66f9..0065961faf74 100644
--- a/unsafe/pom.xml
+++ b/unsafe/pom.xml
@@ -70,6 +70,10 @@
mockito-core
test
+
+ org.scalacheck
+ scalacheck_${scala.binary.version}
+
target/scala-${scala.binary.version}/classes
diff --git a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala
new file mode 100644
index 000000000000..b0852bd812e6
--- /dev/null
+++ b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyChecks.scala
@@ -0,0 +1,133 @@
+package org.apache.spark.unsafe.types
+
+import org.scalacheck.{Arbitrary, Gen}
+import org.scalatest.prop.GeneratorDrivenPropertyChecks
+import org.scalatest.{FunSuite, Matchers}
+
+import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8}
+
+class UTF8StringPropertyChecks extends FunSuite with GeneratorDrivenPropertyChecks with Matchers {
+
+ test("toString") {
+ forAll { (s: String) =>
+ assert(s === toUTF8(s).toString())
+ }
+ }
+
+ test("numChars") {
+ forAll { (s: String) =>
+ assert(toUTF8(s).numChars() === s.length)
+ }
+ }
+
+ test("startsWith") {
+ forAll { (s: String) =>
+ val utf8 = toUTF8(s)
+ assert(utf8.startsWith(utf8))
+ for (i <- 1 to s.length) {
+ assert(utf8.startsWith(toUTF8(s.dropRight(i))))
+ }
+ }
+ }
+
+ test("endsWith") {
+ forAll { (s: String) =>
+ val utf8 = toUTF8(s)
+ assert(utf8.endsWith(utf8))
+ for (i <- 1 to s.length) {
+ assert(utf8.endsWith(toUTF8(s.drop(i))))
+ }
+ }
+ }
+
+ test("toUpperCase") {
+ forAll { (s: String) =>
+ assert(s.toUpperCase === toUTF8(s).toUpperCase.toString)
+ }
+ }
+
+ test("toLowerCase") {
+ forAll { (s: String) =>
+ assert(s.toLowerCase === toUTF8(s).toLowerCase.toString)
+ }
+ }
+
+ test("compare") {
+ forAll { (s1: String, s2: String) =>
+ assert(Math.signum(s1.compareTo(s2)) === Math.signum(toUTF8(s1).compareTo(toUTF8(s2))))
+ }
+ }
+
+ test("substring") {
+ forAll { (s: String) =>
+ for (start <- 0 to s.length; end <- 0 to s.length) {
+ withClue(s"start=$start, end=$end") {
+ assert(s.substring(start, end) === toUTF8(s).substring(start, end).toString)
+ }
+ }
+ }
+ }
+
+ // TODO: substringSQL
+
+ test("contains") {
+ forAll { (s: String) =>
+ for (start <- 0 to s.length; end <- 0 to s.length) {
+ val substring = s.substring(start, end)
+ withClue(s"substring=$substring") {
+ assert(s.contains(substring) === toUTF8(s).contains(toUTF8(substring)))
+ }
+ }
+ }
+ }
+
+ val whitespaceChar: Gen[Char] = Gen.choose(0x00, 0x20).map(_.toChar)
+ val whitespaceString: Gen[String] = Gen.listOf(whitespaceChar).map(_.mkString)
+ val randomString: Gen[String] = Arbitrary.arbString.arbitrary
+
+ test("trim, trimLeft, trimRight") {
+ forAll(
+ whitespaceString,
+ randomString,
+ whitespaceString
+ ) { (start: String, middle: String, end: String) =>
+ val s = start + middle + end
+ assert(s.trim() === toUTF8(s).trim().toString)
+ assert(s.stripMargin === toUTF8(s).trimLeft().toString)
+ assert(s.reverse.stripMargin.reverse === toUTF8(s).trimRight().toString)
+ }
+ }
+
+ test("reverse") {
+ forAll() { (s: String) =>
+ assert(s.reverse === toUTF8(s).reverse.toString)
+ }
+ }
+
+ // TODO: repeat
+ // TODO: indexOf
+ // TODO: lpad
+ // TODO: rpad
+
+ test("concat") {
+ forAll() { (inputs: Seq[String]) =>
+ // TODO: test case where at least one of the inputs is null
+ assert(inputs.mkString === UTF8String.concat(inputs.map(toUTF8): _*).toString)
+ }
+ }
+
+ test("concatWs") {
+ forAll() { (sep: String, inputs: Seq[String]) =>
+ // TODO: handle case where at least one of the inputs is null
+ assert(
+ inputs.mkString(sep) === UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*).toString)
+ }
+ }
+
+ // TODO: split
+
+ // TODO: levenshteinDistance that tests against StringUtils' implementation
+
+ // TODO: equals(), hashCode(), and compare()
+
+}