Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
assert(collationId == 0)
val collateExpr = Collate(Literal("abc"), "UTF8_BINARY")
assert(collateExpr.dataType === StringType(collationId))
collateExpr.dataType.asInstanceOf[StringType].collationId == 0
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix an miss check

assert(collateExpr.dataType.asInstanceOf[StringType].collationId == 0)
checkEvaluation(collateExpr, "abc")
}

test("collate against literal") {
val collateExpr = Collate(Literal("abc"), "UTF8_LCASE")
val collationId = CollationFactory.collationNameToId("UTF8_LCASE")
assert(collateExpr.dataType == StringType(collationId))
assert(collateExpr.dataType === StringType(collationId))
checkEvaluation(collateExpr, "abc")
}

Expand Down Expand Up @@ -67,16 +67,16 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
}

test("collation on non-explicit default collation") {
checkEvaluation(Collation(Literal("abc")).replacement, "UTF8_BINARY")
checkEvaluation(Collation(Literal("abc")), "UTF8_BINARY")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does checkEvaluation take care of RuntimeReplaceable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, as follows:

protected def checkEvaluation(
expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
// Make it as method to obtain fresh expression everytime.
def expr = prepareEvaluation(expression)

private def prepareEvaluation(expression: Expression): Expression = {
val serializer = new JavaSerializer(new SparkConf()).newInstance()
val resolver = ResolveTimeZone
val expr = resolver.resolveTimeZones(replace(expression))

recursion

protected def replace(expr: Expression): Expression = expr match {
case r: RuntimeReplaceable => replace(r.replacement)

}

test("collation on explicitly collated string") {
checkEvaluation(
Collation(Literal.create("abc",
StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))).replacement,
StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))),
"UTF8_LCASE")
checkEvaluation(
Collation(Collate(Literal("abc"), "UTF8_LCASE")).replacement,
Collation(Collate(Literal("abc"), "UTF8_LCASE")),
"UTF8_LCASE")
}

Expand Down Expand Up @@ -212,7 +212,7 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
("sR_cYRl_sRb", "sr_Cyrl_SRB")
).foreach {
case (collation, normalized) =>
checkEvaluation(Collation(Literal.create("abc", StringType(collation))).replacement,
checkEvaluation(Collation(Literal.create("abc", StringType(collation))),
normalized)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String

class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {

Expand All @@ -47,7 +48,7 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalH
// ILike
checkEvaluation(ILike(
Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
Literal.create(t.regexLike, StringType), '\\').replacement, t.expectedILike)
Literal.create(t.regexLike, StringType), '\\'), t.expectedILike)
// RLike
checkEvaluation(RLike(
Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
Expand Down Expand Up @@ -106,7 +107,7 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalH
// RegExpCount
checkEvaluation(RegExpCount(
Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
Literal.create(t.r, StringType)).replacement, t.expectedCount)
Literal.create(t.r, StringType)), t.expectedCount)
// RegExpInStr
def expectedInStr(count: Any): Any = count match {
case null => null
Expand All @@ -120,50 +121,84 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalH
}

test("MultiLikeBase regexp expressions with collated strings") {
val nullStr = Literal.create(null, StringType)
// Supported collations (StringTypeBinaryLcase)
val binaryCollation = StringType(CollationFactory.collationNameToId("UTF8_BINARY"))
val lowercaseCollation = StringType(CollationFactory.collationNameToId("UTF8_LCASE"))
// LikeAll
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although the following modifications are not related to this PR, they are only made to maintain consistency with the rest of the checks in this PR.

checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%oo"), true)
checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%bar%"), false)
checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%", "%oo"), true)
checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%", "%bar%"), false)
checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%oo"), true)
checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%bar%"), false)
checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", nullStr), null)
checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%feo%", nullStr), false)
checkEvaluation(Literal.create(null, binaryCollation).likeAll("%foo%", "%oo"), null)
case class LikeAllTestCase[R](l: String, p1: String, p2: String, collation: String,
expectedLikeAll: R)
val likeAllTestCases = Seq(
LikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", true),
LikeAllTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", false),
LikeAllTestCase("Foo", "%foo%", "%oo", "UTF8_LCASE", true),
LikeAllTestCase("Foo", "%foo%", "%bar%", "UTF8_LCASE", false),
LikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", true),
LikeAllTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", false),
LikeAllTestCase("foo", "%foo%", null, "UTF8_BINARY", null),
LikeAllTestCase("foo", "%feo%", null, "UTF8_BINARY", false),
LikeAllTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
)
likeAllTestCases.foreach(t => {
checkEvaluation(LikeAll(
Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), t.expectedLikeAll)
})

// NotLikeAll
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", "%oo"), false)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%", "%bar%"), true)
checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAll("%foo%", "%oo"), false)
checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAll("%goo%", "%bar%"), true)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", "%oo"), false)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%", "%bar%"), true)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", nullStr), false)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%feo%", nullStr), null)
checkEvaluation(Literal.create(null, binaryCollation).notLikeAll("%foo%", "%oo"), null)
case class NotLikeAllTestCase[R](l: String, p1: String, p2: String, collation: String,
expectedNotLikeAll: R)
val notLikeAllTestCases = Seq(
NotLikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", false),
NotLikeAllTestCase("foo", "%goo%", "%bar%", "UTF8_BINARY", true),
NotLikeAllTestCase("Foo", "%foo%", "%oo", "UTF8_LCASE", false),
NotLikeAllTestCase("Foo", "%goo%", "%bar%", "UTF8_LCASE", true),
NotLikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", false),
NotLikeAllTestCase("foo", "%goo%", "%bar%", "UTF8_BINARY", true),
NotLikeAllTestCase("foo", "%foo%", null, "UTF8_BINARY", false),
NotLikeAllTestCase("foo", "%feo%", null, "UTF8_BINARY", null),
NotLikeAllTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
)
notLikeAllTestCases.foreach(t => {
checkEvaluation(NotLikeAll(
Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), t.expectedNotLikeAll)
})

// LikeAny
checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%", "%hoo"), false)
checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", "%bar%"), true)
checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%goo%", "%hoo"), false)
checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%foo%", "%bar%"), true)
checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%", "%hoo"), false)
checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", "%bar%"), true)
checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", nullStr), true)
checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%feo%", nullStr), null)
checkEvaluation(Literal.create(null, binaryCollation).likeAny("%foo%", "%oo"), null)
case class LikeAnyTestCase[R](l: String, p1: String, p2: String, collation: String,
expectedLikeAny: R)
val likeAnyTestCases = Seq(
LikeAnyTestCase("foo", "%goo%", "%hoo", "UTF8_BINARY", false),
LikeAnyTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", true),
LikeAnyTestCase("Foo", "%goo%", "%hoo", "UTF8_LCASE", false),
LikeAnyTestCase("Foo", "%foo%", "%bar%", "UTF8_LCASE", true),
LikeAnyTestCase("foo", "%goo%", "%hoo", "UTF8_BINARY", false),
LikeAnyTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", true),
LikeAnyTestCase("foo", "%foo%", null, "UTF8_BINARY", true),
LikeAnyTestCase("foo", "%feo%", null, "UTF8_BINARY", null),
LikeAnyTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
)
likeAnyTestCases.foreach(t => {
checkEvaluation(LikeAny(
Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), t.expectedLikeAny)
})

// NotLikeAny
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%hoo"), true)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%oo%"), false)
checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAny("%Foo%", "%hoo"), true)
checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAny("%foo%", "%oo%"), false)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%Foo%", "%hoo"), true)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%oo%"), false)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", nullStr), null)
checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%feo%", nullStr), true)
checkEvaluation(Literal.create(null, binaryCollation).notLikeAny("%foo%", "%oo"), null)
case class NotLikeAnyTestCase[R](l: String, p1: String, p2: String, collation: String,
expectedNotLikeAny: R)
val notLikeAnyTestCases = Seq(
NotLikeAnyTestCase("foo", "%foo%", "%hoo", "UTF8_BINARY", true),
NotLikeAnyTestCase("foo", "%foo%", "%oo%", "UTF8_BINARY", false),
NotLikeAnyTestCase("Foo", "%Foo%", "%hoo", "UTF8_LCASE", true),
NotLikeAnyTestCase("Foo", "%foo%", "%oo%", "UTF8_LCASE", false),
NotLikeAnyTestCase("foo", "%Foo%", "%hoo", "UTF8_BINARY", true),
NotLikeAnyTestCase("foo", "%foo%", "%oo%", "UTF8_BINARY", false),
NotLikeAnyTestCase("foo", "%foo%", null, "UTF8_BINARY", null),
NotLikeAnyTestCase("foo", "%feo%", null, "UTF8_BINARY", true),
NotLikeAnyTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
)
notLikeAnyTestCases.foreach(t => {
checkEvaluation(NotLikeAny(
Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), t.expectedNotLikeAny)
})
}

}