Skip to content

Commit 8566bc6

Browse files
uros-dbMaxGekk
authored andcommitted
[SPARK-49265][SQL][TESTS] Add collation support unit tests for Upper, Lower, and InitCap
### What changes were proposed in this pull request? Add collation support unit tests for: - Upper - Lower - InitCap This PR contains test-only changes, providing additional test coverage for cases such as: - case and accent variation - one-to-many case mapping - conditional case mapping - surrogate pairs - etc. ### Why are the changes needed? Improve collation support testing. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit tests in `CollationSupportSuite`. ### Was this patch authored or co-authored using generative AI tooling? Yes. Closes #47727 from uros-db/unit-tests-3. Authored-by: Uros Bojanic <157381213+uros-db@users.noreply.github.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
1 parent 291647d commit 8566bc6

File tree

1 file changed

+193
-123
lines changed

1 file changed

+193
-123
lines changed

common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java

Lines changed: 193 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -821,8 +821,12 @@ public void testStringSplitSQL() throws SparkException {
821821
assertStringSplitSQL("A𐐭B", "𐐅", "UNICODE_CI", array_A_B);
822822
}
823823

824+
/**
825+
* Verify the behaviour of the `Upper` collation support class.
826+
*/
827+
824828
private void assertUpper(String target, String collationName, String expected)
825-
throws SparkException {
829+
throws SparkException {
826830
UTF8String target_utf8 = UTF8String.fromString(target);
827831
UTF8String expected_utf8 = UTF8String.fromString(expected);
828832
int collationId = CollationFactory.collationNameToId(collationName);
@@ -835,52 +839,57 @@ private void assertUpper(String target, String collationName, String expected)
835839

836840
@Test
837841
public void testUpper() throws SparkException {
838-
// Edge cases
839-
assertUpper("", "UTF8_BINARY", "");
840-
assertUpper("", "UTF8_LCASE", "");
841-
assertUpper("", "UNICODE", "");
842-
assertUpper("", "UNICODE_CI", "");
843-
// Basic tests
844-
assertUpper("abcde", "UTF8_BINARY", "ABCDE");
845-
assertUpper("abcde", "UTF8_LCASE", "ABCDE");
846-
assertUpper("abcde", "UNICODE", "ABCDE");
847-
assertUpper("abcde", "UNICODE_CI", "ABCDE");
848-
// Uppercase present
849-
assertUpper("AbCdE", "UTF8_BINARY", "ABCDE");
850-
assertUpper("aBcDe", "UTF8_BINARY", "ABCDE");
851-
assertUpper("AbCdE", "UTF8_LCASE", "ABCDE");
852-
assertUpper("aBcDe", "UTF8_LCASE", "ABCDE");
853-
assertUpper("AbCdE", "UNICODE", "ABCDE");
854-
assertUpper("aBcDe", "UNICODE", "ABCDE");
855-
assertUpper("AbCdE", "UNICODE_CI", "ABCDE");
856-
assertUpper("aBcDe", "UNICODE_CI", "ABCDE");
857-
// Accent letters
858-
assertUpper("aBćDe","UTF8_BINARY", "ABĆDE");
859-
assertUpper("aBćDe","UTF8_LCASE", "ABĆDE");
860-
assertUpper("aBćDe","UNICODE", "ABĆDE");
861-
assertUpper("aBćDe","UNICODE_CI", "ABĆDE");
862-
// Variable byte length characters
863-
assertUpper("ab世De", "UTF8_BINARY", "AB世DE");
864-
assertUpper("äbćδe", "UTF8_BINARY", "ÄBĆΔE");
865-
assertUpper("ab世De", "UTF8_LCASE", "AB世DE");
866-
assertUpper("äbćδe", "UTF8_LCASE", "ÄBĆΔE");
867-
assertUpper("ab世De", "UNICODE", "AB世DE");
868-
assertUpper("äbćδe", "UNICODE", "ÄBĆΔE");
869-
assertUpper("ab世De", "UNICODE_CI", "AB世DE");
870-
assertUpper("äbćδe", "UNICODE_CI", "ÄBĆΔE");
871-
// Case-variable character length
872-
assertUpper("i\u0307o", "UTF8_BINARY","I\u0307O");
873-
assertUpper("i\u0307o", "UTF8_LCASE","I\u0307O");
874-
assertUpper("i\u0307o", "UNICODE","I\u0307O");
875-
assertUpper("i\u0307o", "UNICODE_CI","I\u0307O");
876-
assertUpper("ß fi ffi ff st ῗ", "UTF8_BINARY","SS FI FFI FF ST \u0399\u0308\u0342");
877-
assertUpper("ß fi ffi ff st ῗ", "UTF8_LCASE","SS FI FFI FF ST \u0399\u0308\u0342");
878-
assertUpper("ß fi ffi ff st ῗ", "UNICODE","SS FI FFI FF ST \u0399\u0308\u0342");
879-
assertUpper("ß fi ffi ff st ῗ", "UNICODE","SS FI FFI FF ST \u0399\u0308\u0342");
842+
for (String collationName: testSupportedCollations) {
843+
// Empty strings.
844+
assertUpper("", collationName, "");
845+
// Basic tests.
846+
assertUpper("abcde", collationName, "ABCDE");
847+
assertUpper("AbCdE", collationName, "ABCDE");
848+
assertUpper("aBcDe", collationName, "ABCDE");
849+
assertUpper("ABCDE", collationName, "ABCDE");
850+
// Advanced tests.
851+
assertUpper("aBćDe", collationName, "ABĆDE");
852+
assertUpper("ab世De", collationName, "AB世DE");
853+
assertUpper("äbćδe", collationName, "ÄBĆΔE");
854+
assertUpper("AbĆdE", collationName, "ABĆDE");
855+
assertUpper("aB世De", collationName, "AB世DE");
856+
assertUpper("ÄBĆΔE", collationName, "ÄBĆΔE");
857+
// One-to-many case mapping (e.g. Turkish dotted I).
858+
assertUpper("İ", collationName, "İ");
859+
assertUpper("i\u0307", collationName,"I\u0307");
860+
assertUpper("İonic", collationName, "İONIC");
861+
assertUpper("i\u0307onic", collationName,"I\u0307ONIC");
862+
assertUpper("FIDELİO", collationName, "FIDELİO");
863+
// Conditional case mapping (e.g. Greek sigmas).
864+
assertUpper("σ", collationName, "Σ");
865+
assertUpper("σ", collationName, "Σ");
866+
assertUpper("ς", collationName, "Σ");
867+
assertUpper("Σ", collationName, "Σ");
868+
assertUpper("ΣΑΛΑΤΑ", collationName, "ΣΑΛΑΤΑ");
869+
assertUpper("σαλατα", collationName, "ΣΑΛΑΤΑ");
870+
assertUpper("ςαλατα", collationName, "ΣΑΛΑΤΑ");
871+
assertUpper("ΘΑΛΑΣΣΙΝΟΣ", collationName, "ΘΑΛΑΣΣΙΝΟΣ");
872+
assertUpper("θαλασσινοσ", collationName, "ΘΑΛΑΣΣΙΝΟΣ");
873+
assertUpper("θαλασσινος", collationName, "ΘΑΛΑΣΣΙΝΟΣ");
874+
// Surrogate pairs.
875+
assertUpper("a🙃B🙃c", collationName, "A🙃B🙃C");
876+
assertUpper("😄 😆", collationName, "😄 😆");
877+
assertUpper("😀😆😃😄", collationName, "😀😆😃😄");
878+
assertUpper("𝔸", collationName, "𝔸");
879+
assertUpper("𐐅", collationName, "𐐅");
880+
assertUpper("𐐭", collationName, "𐐅");
881+
assertUpper("𐐭𝔸", collationName, "𐐅𝔸");
882+
// Ligatures.
883+
assertUpper("ß fi ffi ff st ῗ", collationName,"SS FI FFI FF ST \u0399\u0308\u0342");
884+
}
880885
}
881886

887+
/**
888+
* Verify the behaviour of the `Lower` collation support class.
889+
*/
890+
882891
private void assertLower(String target, String collationName, String expected)
883-
throws SparkException {
892+
throws SparkException {
884893
UTF8String target_utf8 = UTF8String.fromString(target);
885894
UTF8String expected_utf8 = UTF8String.fromString(expected);
886895
int collationId = CollationFactory.collationNameToId(collationName);
@@ -893,48 +902,56 @@ private void assertLower(String target, String collationName, String expected)
893902

894903
@Test
895904
public void testLower() throws SparkException {
896-
// Edge cases
897-
assertLower("", "UTF8_BINARY", "");
898-
assertLower("", "UTF8_LCASE", "");
899-
assertLower("", "UNICODE", "");
900-
assertLower("", "UNICODE_CI", "");
901-
// Basic tests
902-
assertLower("ABCDE", "UTF8_BINARY", "abcde");
903-
assertLower("ABCDE", "UTF8_LCASE", "abcde");
904-
assertLower("ABCDE", "UNICODE", "abcde");
905-
assertLower("ABCDE", "UNICODE_CI", "abcde");
906-
// Uppercase present
907-
assertLower("AbCdE", "UTF8_BINARY", "abcde");
908-
assertLower("aBcDe", "UTF8_BINARY", "abcde");
909-
assertLower("AbCdE", "UTF8_LCASE", "abcde");
910-
assertLower("aBcDe", "UTF8_LCASE", "abcde");
911-
assertLower("AbCdE", "UNICODE", "abcde");
912-
assertLower("aBcDe", "UNICODE", "abcde");
913-
assertLower("AbCdE", "UNICODE_CI", "abcde");
914-
assertLower("aBcDe", "UNICODE_CI", "abcde");
915-
// Accent letters
916-
assertLower("AbĆdE","UTF8_BINARY", "abćde");
917-
assertLower("AbĆdE","UTF8_LCASE", "abćde");
918-
assertLower("AbĆdE","UNICODE", "abćde");
919-
assertLower("AbĆdE","UNICODE_CI", "abćde");
920-
// Variable byte length characters
921-
assertLower("aB世De", "UTF8_BINARY", "ab世de");
922-
assertLower("ÄBĆΔE", "UTF8_BINARY", "äbćδe");
923-
assertLower("aB世De", "UTF8_LCASE", "ab世de");
924-
assertLower("ÄBĆΔE", "UTF8_LCASE", "äbćδe");
925-
assertLower("aB世De", "UNICODE", "ab世de");
926-
assertLower("ÄBĆΔE", "UNICODE", "äbćδe");
927-
assertLower("aB世De", "UNICODE_CI", "ab世de");
928-
assertLower("ÄBĆΔE", "UNICODE_CI", "äbćδe");
929-
// Case-variable character length
930-
assertLower("İo", "UTF8_BINARY","i\u0307o");
931-
assertLower("İo", "UTF8_LCASE","i\u0307o");
932-
assertLower("İo", "UNICODE","i\u0307o");
933-
assertLower("İo", "UNICODE_CI","i\u0307o");
905+
for (String collationName: testSupportedCollations) {
906+
// Empty strings.
907+
assertLower("", collationName, "");
908+
// Basic tests.
909+
assertLower("abcde", collationName, "abcde");
910+
assertLower("AbCdE", collationName, "abcde");
911+
assertLower("aBcDe", collationName, "abcde");
912+
assertLower("ABCDE", collationName, "abcde");
913+
// Advanced tests.
914+
assertUpper("aBćDe", collationName, "ABĆDE");
915+
assertUpper("ab世De", collationName, "AB世DE");
916+
assertUpper("äbćδe", collationName, "ÄBĆΔE");
917+
assertLower("AbĆdE", collationName, "abćde");
918+
assertLower("aB世De", collationName, "ab世de");
919+
assertLower("ÄBĆΔE", collationName, "äbćδe");
920+
// One-to-many case mapping (e.g. Turkish dotted I).
921+
assertLower("İ", collationName, "i\u0307");
922+
assertLower("I\u0307", collationName,"i\u0307");
923+
assertLower("İonic", collationName, "i\u0307onic");
924+
assertLower("i\u0307onic", collationName,"i\u0307onic");
925+
assertLower("FIDELİO", collationName, "fideli\u0307o");
926+
// Conditional case mapping (e.g. Greek sigmas).
927+
assertLower("σ", collationName, "σ");
928+
assertLower("ς", collationName, "ς");
929+
assertLower("Σ", collationName, "σ");
930+
assertLower("ΣΑΛΑΤΑ", collationName, "σαλατα");
931+
assertLower("σαλατα", collationName, "σαλατα");
932+
assertLower("ςαλατα", collationName, "ςαλατα");
933+
assertLower("ΘΑΛΑΣΣΙΝΟΣ", collationName, "θαλασσινος");
934+
assertLower("θαλασσινοσ", collationName, "θαλασσινοσ");
935+
assertLower("θαλασσινος", collationName, "θαλασσινος");
936+
// Surrogate pairs.
937+
assertLower("a🙃B🙃c", collationName, "a🙃b🙃c");
938+
assertLower("😄 😆", collationName, "😄 😆");
939+
assertLower("😀😆😃😄", collationName, "😀😆😃😄");
940+
assertLower("𝔸", collationName, "𝔸");
941+
assertLower("𐐅", collationName, "𐐭");
942+
assertLower("𐐭", collationName, "𐐭");
943+
assertLower("𐐭𝔸", collationName, "𐐭𝔸");
944+
// Ligatures.
945+
assertLower("ß fi ffi ff st ῗ", collationName,"ß fi ffi ff st ῗ");
946+
}
934947
}
935948

949+
/**
950+
* Verify the behaviour of the `InitCap` collation support class.
951+
*/
952+
936953
private void assertInitCap(String target, String collationName, String expected)
937-
throws SparkException {
954+
throws SparkException {
938955
UTF8String target_utf8 = UTF8String.fromString(target);
939956
UTF8String expected_utf8 = UTF8String.fromString(expected);
940957
int collationId = CollationFactory.collationNameToId(collationName);
@@ -947,49 +964,102 @@ private void assertInitCap(String target, String collationName, String expected)
947964

948965
@Test
949966
public void testInitCap() throws SparkException {
950-
// Edge cases
951-
assertInitCap("", "UTF8_BINARY", "");
952-
assertInitCap("", "UTF8_LCASE", "");
953-
assertInitCap("", "UNICODE", "");
954-
assertInitCap("", "UNICODE_CI", "");
955-
// Basic tests
956-
assertInitCap("ABCDE", "UTF8_BINARY", "Abcde");
957-
assertInitCap("ABCDE", "UTF8_LCASE", "Abcde");
958-
assertInitCap("ABCDE", "UNICODE", "Abcde");
959-
assertInitCap("ABCDE", "UNICODE_CI", "Abcde");
960-
// Uppercase present
961-
assertInitCap("AbCdE", "UTF8_BINARY", "Abcde");
962-
assertInitCap("aBcDe", "UTF8_BINARY", "Abcde");
963-
assertInitCap("AbCdE", "UTF8_LCASE", "Abcde");
964-
assertInitCap("aBcDe", "UTF8_LCASE", "Abcde");
965-
assertInitCap("AbCdE", "UNICODE", "Abcde");
966-
assertInitCap("aBcDe", "UNICODE", "Abcde");
967-
assertInitCap("AbCdE", "UNICODE_CI", "Abcde");
968-
assertInitCap("aBcDe", "UNICODE_CI", "Abcde");
969-
// Accent letters
970-
assertInitCap("AbĆdE", "UTF8_BINARY", "Abćde");
971-
assertInitCap("AbĆdE", "UTF8_LCASE", "Abćde");
972-
assertInitCap("AbĆdE", "UNICODE", "Abćde");
973-
assertInitCap("AbĆdE", "UNICODE_CI", "Abćde");
974-
// Variable byte length characters
975-
assertInitCap("aB 世 De", "UTF8_BINARY", "Ab 世 De");
967+
for (String collationName: testSupportedCollations) {
968+
// Empty strings.
969+
assertInitCap("", collationName, "");
970+
// Basic tests.
971+
assertInitCap("abcde", collationName, "Abcde");
972+
assertInitCap("AbCdE", collationName, "Abcde");
973+
assertInitCap("aBcDe", collationName, "Abcde");
974+
assertInitCap("ABCDE", collationName, "Abcde");
975+
// Conditional case mapping (e.g. Greek sigmas).
976+
assertInitCap("σ", collationName, "Σ");
977+
assertInitCap("ς", collationName, "Σ");
978+
assertInitCap("Σ", collationName, "Σ");
979+
assertInitCap("ΣΑΛΑΤΑ", collationName, "Σαλατα");
980+
assertInitCap("σαλατα", collationName, "Σαλατα");
981+
assertInitCap("ςαλατα", collationName, "Σαλατα");
982+
assertInitCap("ΘΑΛΑΣΣΙΝΟΣ", collationName, "Θαλασσινος");
983+
assertInitCap("θαλασσινοσ", collationName, "Θαλασσινοσ");
984+
assertInitCap("θαλασσινος", collationName, "Θαλασσινος");
985+
}
986+
// Advanced tests.
987+
assertInitCap("aBćDe", "UTF8_BINARY", "Abćde");
988+
assertInitCap("aBćDe", "UTF8_LCASE", "Abćde");
989+
assertInitCap("aBćDe", "UNICODE", "Abćde");
990+
assertInitCap("aBćDe", "UNICODE_CI", "Abćde");
991+
assertInitCap("ab世De", "UTF8_BINARY", "Ab世de");
992+
assertInitCap("ab世De", "UTF8_LCASE", "Ab世De");
993+
assertInitCap("ab世De", "UNICODE", "Ab世De");
994+
assertInitCap("ab世De", "UNICODE_CI", "Ab世De");
995+
assertInitCap("äbćδe", "UTF8_BINARY", "Äbćδe");
996+
assertInitCap("äbćδe", "UTF8_LCASE", "Äbćδe");
997+
assertInitCap("äbćδe", "UNICODE", "Äbćδe");
998+
assertInitCap("äbćδe", "UNICODE_CI", "Äbćδe");
976999
assertInitCap("ÄBĆΔE", "UTF8_BINARY", "Äbćδe");
977-
assertInitCap("aB 世 De", "UTF8_LCASE", "Ab 世 De");
9781000
assertInitCap("ÄBĆΔE", "UTF8_LCASE", "Äbćδe");
979-
assertInitCap("aB 世 De", "UNICODE", "Ab 世 De");
9801001
assertInitCap("ÄBĆΔE", "UNICODE", "Äbćδe");
981-
assertInitCap("aB 世 de", "UNICODE_CI", "Ab 世 De");
9821002
assertInitCap("ÄBĆΔE", "UNICODE_CI", "Äbćδe");
983-
// Case-variable character length
984-
assertInitCap("İo", "UTF8_BINARY", "I\u0307o");
985-
assertInitCap("İo", "UTF8_LCASE", "İo");
986-
assertInitCap("İo", "UNICODE", "İo");
987-
assertInitCap("İo", "UNICODE_CI", "İo");
988-
assertInitCap("i\u0307o", "UTF8_BINARY", "I\u0307o");
989-
assertInitCap("i\u0307o", "UTF8_LCASE", "I\u0307o");
990-
assertInitCap("i\u0307o", "UNICODE", "I\u0307o");
991-
assertInitCap("i\u0307o", "UNICODE_CI", "I\u0307o");
992-
// Different possible word boundaries
1003+
assertInitCap("aB 世 de", "UTF8_BINARY", "Ab 世 De");
1004+
assertInitCap("aB 世 de", "UTF8_LCASE", "Ab 世 De");
1005+
assertInitCap("aB 世 de", "UNICODE", "Ab 世 De");
1006+
assertInitCap("aB 世 de", "UNICODE_CI", "Ab 世 De");
1007+
// One-to-many case mapping (e.g. Turkish dotted I).
1008+
assertInitCap("İ", "UTF8_BINARY", "I\u0307");
1009+
assertInitCap("İ", "UTF8_LCASE", "İ");
1010+
assertInitCap("İ", "UNICODE", "İ");
1011+
assertInitCap("İ", "UNICODE_CI", "İ");
1012+
assertInitCap("I\u0307", "UTF8_BINARY","I\u0307");
1013+
assertInitCap("I\u0307", "UTF8_LCASE","I\u0307");
1014+
assertInitCap("I\u0307", "UNICODE","I\u0307");
1015+
assertInitCap("I\u0307", "UNICODE_CI","I\u0307");
1016+
assertInitCap("İonic", "UTF8_BINARY", "I\u0307onic");
1017+
assertInitCap("İonic", "UTF8_LCASE", "İonic");
1018+
assertInitCap("İonic", "UNICODE", "İonic");
1019+
assertInitCap("İonic", "UNICODE_CI", "İonic");
1020+
assertInitCap("i\u0307onic", "UTF8_BINARY","I\u0307onic");
1021+
assertInitCap("i\u0307onic", "UTF8_LCASE","I\u0307onic");
1022+
assertInitCap("i\u0307onic", "UNICODE","I\u0307onic");
1023+
assertInitCap("i\u0307onic", "UNICODE_CI","I\u0307onic");
1024+
assertInitCap("FIDELİO", "UTF8_BINARY", "Fideli\u0307o");
1025+
assertInitCap("FIDELİO", "UTF8_LCASE", "Fideli\u0307o");
1026+
assertInitCap("FIDELİO", "UNICODE", "Fideli\u0307o");
1027+
assertInitCap("FIDELİO", "UNICODE_CI", "Fideli\u0307o");
1028+
// Surrogate pairs.
1029+
assertInitCap("a🙃B🙃c", "UTF8_BINARY", "A🙃b🙃c");
1030+
assertInitCap("a🙃B🙃c", "UTF8_LCASE", "A🙃B🙃C");
1031+
assertInitCap("a🙃B🙃c", "UNICODE", "A🙃B🙃C");
1032+
assertInitCap("a🙃B🙃c", "UNICODE_CI", "A🙃B🙃C");
1033+
assertInitCap("😄 😆", "UTF8_BINARY", "😄 😆");
1034+
assertInitCap("😄 😆", "UTF8_LCASE", "😄 😆");
1035+
assertInitCap("😄 😆", "UNICODE", "😄 😆");
1036+
assertInitCap("😄 😆", "UNICODE_CI", "😄 😆");
1037+
assertInitCap("😀😆😃😄", "UTF8_BINARY", "😀😆😃😄");
1038+
assertInitCap("😀😆😃😄", "UTF8_LCASE", "😀😆😃😄");
1039+
assertInitCap("😀😆😃😄", "UNICODE", "😀😆😃😄");
1040+
assertInitCap("😀😆😃😄", "UNICODE_CI", "😀😆😃😄");
1041+
assertInitCap("𝔸", "UTF8_BINARY", "𝔸");
1042+
assertInitCap("𝔸", "UTF8_LCASE", "𝔸");
1043+
assertInitCap("𝔸", "UNICODE", "𝔸");
1044+
assertInitCap("𝔸", "UNICODE_CI", "𝔸");
1045+
assertInitCap("𐐅", "UTF8_BINARY", "𐐭");
1046+
assertInitCap("𐐅", "UTF8_LCASE", "𐐅");
1047+
assertInitCap("𐐅", "UNICODE", "𐐅");
1048+
assertInitCap("𐐅", "UNICODE_CI", "𐐅");
1049+
assertInitCap("𐐭", "UTF8_BINARY", "𐐭");
1050+
assertInitCap("𐐭", "UTF8_LCASE", "𐐅");
1051+
assertInitCap("𐐭", "UNICODE", "𐐅");
1052+
assertInitCap("𐐭", "UNICODE_CI", "𐐅");
1053+
assertInitCap("𐐭𝔸", "UTF8_BINARY", "𐐭𝔸");
1054+
assertInitCap("𐐭𝔸", "UTF8_LCASE", "𐐅𝔸");
1055+
assertInitCap("𐐭𝔸", "UNICODE", "𐐅𝔸");
1056+
assertInitCap("𐐭𝔸", "UNICODE_CI", "𐐅𝔸");
1057+
// Ligatures.
1058+
assertInitCap("ß fi ffi ff st ῗ", "UTF8_BINARY","ß fi ffi ff st ῗ");
1059+
assertInitCap("ß fi ffi ff st ῗ", "UTF8_LCASE","Ss Fi Ffi Ff St \u0399\u0308\u0342");
1060+
assertInitCap("ß fi ffi ff st ῗ", "UNICODE","Ss Fi Ffi Ff St \u0399\u0308\u0342");
1061+
assertInitCap("ß fi ffi ff st ῗ", "UNICODE","Ss Fi Ffi Ff St \u0399\u0308\u0342");
1062+
// Different possible word boundaries.
9931063
assertInitCap("a b c", "UTF8_BINARY", "A B C");
9941064
assertInitCap("a b c", "UNICODE", "A B C");
9951065
assertInitCap("a b c", "UTF8_LCASE", "A B C");
@@ -1006,7 +1076,7 @@ public void testInitCap() throws SparkException {
10061076
assertInitCap("a?b世c", "UNICODE", "A?B世C");
10071077
assertInitCap("a?b世c", "UTF8_LCASE", "A?B世C");
10081078
assertInitCap("a?b世c", "UNICODE_CI", "A?B世C");
1009-
// Titlecase characters that are different from uppercase characters
1079+
// Titlecase characters that are different from uppercase characters.
10101080
assertInitCap("dzDZDz", "UTF8_BINARY", "Dzdzdz");
10111081
assertInitCap("dzDZDz", "UNICODE", "Dzdzdz");
10121082
assertInitCap("dzDZDz", "UTF8_LCASE", "Dzdzdz");

0 commit comments

Comments
 (0)