@@ -821,8 +821,12 @@ public void testStringSplitSQL() throws SparkException {
821821 assertStringSplitSQL ("A𐐭B" , "𐐅" , "UNICODE_CI" , array_A_B );
822822 }
823823
824+ /**
825+ * Verify the behaviour of the `Upper` collation support class.
826+ */
827+
824828 private void assertUpper (String target , String collationName , String expected )
825- throws SparkException {
829+ throws SparkException {
826830 UTF8String target_utf8 = UTF8String .fromString (target );
827831 UTF8String expected_utf8 = UTF8String .fromString (expected );
828832 int collationId = CollationFactory .collationNameToId (collationName );
@@ -835,52 +839,57 @@ private void assertUpper(String target, String collationName, String expected)
835839
836840 @ Test
837841 public void testUpper () throws SparkException {
838- // Edge cases
839- assertUpper ("" , "UTF8_BINARY" , "" );
840- assertUpper ("" , "UTF8_LCASE" , "" );
841- assertUpper ("" , "UNICODE" , "" );
842- assertUpper ("" , "UNICODE_CI" , "" );
843- // Basic tests
844- assertUpper ("abcde" , "UTF8_BINARY" , "ABCDE" );
845- assertUpper ("abcde" , "UTF8_LCASE" , "ABCDE" );
846- assertUpper ("abcde" , "UNICODE" , "ABCDE" );
847- assertUpper ("abcde" , "UNICODE_CI" , "ABCDE" );
848- // Uppercase present
849- assertUpper ("AbCdE" , "UTF8_BINARY" , "ABCDE" );
850- assertUpper ("aBcDe" , "UTF8_BINARY" , "ABCDE" );
851- assertUpper ("AbCdE" , "UTF8_LCASE" , "ABCDE" );
852- assertUpper ("aBcDe" , "UTF8_LCASE" , "ABCDE" );
853- assertUpper ("AbCdE" , "UNICODE" , "ABCDE" );
854- assertUpper ("aBcDe" , "UNICODE" , "ABCDE" );
855- assertUpper ("AbCdE" , "UNICODE_CI" , "ABCDE" );
856- assertUpper ("aBcDe" , "UNICODE_CI" , "ABCDE" );
857- // Accent letters
858- assertUpper ("aBćDe" ,"UTF8_BINARY" , "ABĆDE" );
859- assertUpper ("aBćDe" ,"UTF8_LCASE" , "ABĆDE" );
860- assertUpper ("aBćDe" ,"UNICODE" , "ABĆDE" );
861- assertUpper ("aBćDe" ,"UNICODE_CI" , "ABĆDE" );
862- // Variable byte length characters
863- assertUpper ("ab世De" , "UTF8_BINARY" , "AB世DE" );
864- assertUpper ("äbćδe" , "UTF8_BINARY" , "ÄBĆΔE" );
865- assertUpper ("ab世De" , "UTF8_LCASE" , "AB世DE" );
866- assertUpper ("äbćδe" , "UTF8_LCASE" , "ÄBĆΔE" );
867- assertUpper ("ab世De" , "UNICODE" , "AB世DE" );
868- assertUpper ("äbćδe" , "UNICODE" , "ÄBĆΔE" );
869- assertUpper ("ab世De" , "UNICODE_CI" , "AB世DE" );
870- assertUpper ("äbćδe" , "UNICODE_CI" , "ÄBĆΔE" );
871- // Case-variable character length
872- assertUpper ("i\u0307 o" , "UTF8_BINARY" ,"I\u0307 O" );
873- assertUpper ("i\u0307 o" , "UTF8_LCASE" ,"I\u0307 O" );
874- assertUpper ("i\u0307 o" , "UNICODE" ,"I\u0307 O" );
875- assertUpper ("i\u0307 o" , "UNICODE_CI" ,"I\u0307 O" );
876- assertUpper ("ß fi ffi ff st ῗ" , "UTF8_BINARY" ,"SS FI FFI FF ST \u0399 \u0308 \u0342 " );
877- assertUpper ("ß fi ffi ff st ῗ" , "UTF8_LCASE" ,"SS FI FFI FF ST \u0399 \u0308 \u0342 " );
878- assertUpper ("ß fi ffi ff st ῗ" , "UNICODE" ,"SS FI FFI FF ST \u0399 \u0308 \u0342 " );
879- assertUpper ("ß fi ffi ff st ῗ" , "UNICODE" ,"SS FI FFI FF ST \u0399 \u0308 \u0342 " );
842+ for (String collationName : testSupportedCollations ) {
843+ // Empty strings.
844+ assertUpper ("" , collationName , "" );
845+ // Basic tests.
846+ assertUpper ("abcde" , collationName , "ABCDE" );
847+ assertUpper ("AbCdE" , collationName , "ABCDE" );
848+ assertUpper ("aBcDe" , collationName , "ABCDE" );
849+ assertUpper ("ABCDE" , collationName , "ABCDE" );
850+ // Advanced tests.
851+ assertUpper ("aBćDe" , collationName , "ABĆDE" );
852+ assertUpper ("ab世De" , collationName , "AB世DE" );
853+ assertUpper ("äbćδe" , collationName , "ÄBĆΔE" );
854+ assertUpper ("AbĆdE" , collationName , "ABĆDE" );
855+ assertUpper ("aB世De" , collationName , "AB世DE" );
856+ assertUpper ("ÄBĆΔE" , collationName , "ÄBĆΔE" );
857+ // One-to-many case mapping (e.g. Turkish dotted I).
858+ assertUpper ("İ" , collationName , "İ" );
859+ assertUpper ("i\u0307 " , collationName ,"I\u0307 " );
860+ assertUpper ("İonic" , collationName , "İONIC" );
861+ assertUpper ("i\u0307 onic" , collationName ,"I\u0307 ONIC" );
862+ assertUpper ("FIDELİO" , collationName , "FIDELİO" );
863+ // Conditional case mapping (e.g. Greek sigmas).
864+ assertUpper ("σ" , collationName , "Σ" );
865+ assertUpper ("σ" , collationName , "Σ" );
866+ assertUpper ("ς" , collationName , "Σ" );
867+ assertUpper ("Σ" , collationName , "Σ" );
868+ assertUpper ("ΣΑΛΑΤΑ" , collationName , "ΣΑΛΑΤΑ" );
869+ assertUpper ("σαλατα" , collationName , "ΣΑΛΑΤΑ" );
870+ assertUpper ("ςαλατα" , collationName , "ΣΑΛΑΤΑ" );
871+ assertUpper ("ΘΑΛΑΣΣΙΝΟΣ" , collationName , "ΘΑΛΑΣΣΙΝΟΣ" );
872+ assertUpper ("θαλασσινοσ" , collationName , "ΘΑΛΑΣΣΙΝΟΣ" );
873+ assertUpper ("θαλασσινος" , collationName , "ΘΑΛΑΣΣΙΝΟΣ" );
874+ // Surrogate pairs.
875+ assertUpper ("a🙃B🙃c" , collationName , "A🙃B🙃C" );
876+ assertUpper ("😄 😆" , collationName , "😄 😆" );
877+ assertUpper ("😀😆😃😄" , collationName , "😀😆😃😄" );
878+ assertUpper ("𝔸" , collationName , "𝔸" );
879+ assertUpper ("𐐅" , collationName , "𐐅" );
880+ assertUpper ("𐐭" , collationName , "𐐅" );
881+ assertUpper ("𐐭𝔸" , collationName , "𐐅𝔸" );
882+ // Ligatures.
883+ assertUpper ("ß fi ffi ff st ῗ" , collationName ,"SS FI FFI FF ST \u0399 \u0308 \u0342 " );
884+ }
880885 }
881886
887+ /**
888+ * Verify the behaviour of the `Lower` collation support class.
889+ */
890+
882891 private void assertLower (String target , String collationName , String expected )
883- throws SparkException {
892+ throws SparkException {
884893 UTF8String target_utf8 = UTF8String .fromString (target );
885894 UTF8String expected_utf8 = UTF8String .fromString (expected );
886895 int collationId = CollationFactory .collationNameToId (collationName );
@@ -893,48 +902,56 @@ private void assertLower(String target, String collationName, String expected)
893902
894903 @ Test
895904 public void testLower () throws SparkException {
896- // Edge cases
897- assertLower ("" , "UTF8_BINARY" , "" );
898- assertLower ("" , "UTF8_LCASE" , "" );
899- assertLower ("" , "UNICODE" , "" );
900- assertLower ("" , "UNICODE_CI" , "" );
901- // Basic tests
902- assertLower ("ABCDE" , "UTF8_BINARY" , "abcde" );
903- assertLower ("ABCDE" , "UTF8_LCASE" , "abcde" );
904- assertLower ("ABCDE" , "UNICODE" , "abcde" );
905- assertLower ("ABCDE" , "UNICODE_CI" , "abcde" );
906- // Uppercase present
907- assertLower ("AbCdE" , "UTF8_BINARY" , "abcde" );
908- assertLower ("aBcDe" , "UTF8_BINARY" , "abcde" );
909- assertLower ("AbCdE" , "UTF8_LCASE" , "abcde" );
910- assertLower ("aBcDe" , "UTF8_LCASE" , "abcde" );
911- assertLower ("AbCdE" , "UNICODE" , "abcde" );
912- assertLower ("aBcDe" , "UNICODE" , "abcde" );
913- assertLower ("AbCdE" , "UNICODE_CI" , "abcde" );
914- assertLower ("aBcDe" , "UNICODE_CI" , "abcde" );
915- // Accent letters
916- assertLower ("AbĆdE" ,"UTF8_BINARY" , "abćde" );
917- assertLower ("AbĆdE" ,"UTF8_LCASE" , "abćde" );
918- assertLower ("AbĆdE" ,"UNICODE" , "abćde" );
919- assertLower ("AbĆdE" ,"UNICODE_CI" , "abćde" );
920- // Variable byte length characters
921- assertLower ("aB世De" , "UTF8_BINARY" , "ab世de" );
922- assertLower ("ÄBĆΔE" , "UTF8_BINARY" , "äbćδe" );
923- assertLower ("aB世De" , "UTF8_LCASE" , "ab世de" );
924- assertLower ("ÄBĆΔE" , "UTF8_LCASE" , "äbćδe" );
925- assertLower ("aB世De" , "UNICODE" , "ab世de" );
926- assertLower ("ÄBĆΔE" , "UNICODE" , "äbćδe" );
927- assertLower ("aB世De" , "UNICODE_CI" , "ab世de" );
928- assertLower ("ÄBĆΔE" , "UNICODE_CI" , "äbćδe" );
929- // Case-variable character length
930- assertLower ("İo" , "UTF8_BINARY" ,"i\u0307 o" );
931- assertLower ("İo" , "UTF8_LCASE" ,"i\u0307 o" );
932- assertLower ("İo" , "UNICODE" ,"i\u0307 o" );
933- assertLower ("İo" , "UNICODE_CI" ,"i\u0307 o" );
905+ for (String collationName : testSupportedCollations ) {
906+ // Empty strings.
907+ assertLower ("" , collationName , "" );
908+ // Basic tests.
909+ assertLower ("abcde" , collationName , "abcde" );
910+ assertLower ("AbCdE" , collationName , "abcde" );
911+ assertLower ("aBcDe" , collationName , "abcde" );
912+ assertLower ("ABCDE" , collationName , "abcde" );
913+ // Advanced tests.
914+ assertUpper ("aBćDe" , collationName , "ABĆDE" );
915+ assertUpper ("ab世De" , collationName , "AB世DE" );
916+ assertUpper ("äbćδe" , collationName , "ÄBĆΔE" );
917+ assertLower ("AbĆdE" , collationName , "abćde" );
918+ assertLower ("aB世De" , collationName , "ab世de" );
919+ assertLower ("ÄBĆΔE" , collationName , "äbćδe" );
920+ // One-to-many case mapping (e.g. Turkish dotted I).
921+ assertLower ("İ" , collationName , "i\u0307 " );
922+ assertLower ("I\u0307 " , collationName ,"i\u0307 " );
923+ assertLower ("İonic" , collationName , "i\u0307 onic" );
924+ assertLower ("i\u0307 onic" , collationName ,"i\u0307 onic" );
925+ assertLower ("FIDELİO" , collationName , "fideli\u0307 o" );
926+ // Conditional case mapping (e.g. Greek sigmas).
927+ assertLower ("σ" , collationName , "σ" );
928+ assertLower ("ς" , collationName , "ς" );
929+ assertLower ("Σ" , collationName , "σ" );
930+ assertLower ("ΣΑΛΑΤΑ" , collationName , "σαλατα" );
931+ assertLower ("σαλατα" , collationName , "σαλατα" );
932+ assertLower ("ςαλατα" , collationName , "ςαλατα" );
933+ assertLower ("ΘΑΛΑΣΣΙΝΟΣ" , collationName , "θαλασσινος" );
934+ assertLower ("θαλασσινοσ" , collationName , "θαλασσινοσ" );
935+ assertLower ("θαλασσινος" , collationName , "θαλασσινος" );
936+ // Surrogate pairs.
937+ assertLower ("a🙃B🙃c" , collationName , "a🙃b🙃c" );
938+ assertLower ("😄 😆" , collationName , "😄 😆" );
939+ assertLower ("😀😆😃😄" , collationName , "😀😆😃😄" );
940+ assertLower ("𝔸" , collationName , "𝔸" );
941+ assertLower ("𐐅" , collationName , "𐐭" );
942+ assertLower ("𐐭" , collationName , "𐐭" );
943+ assertLower ("𐐭𝔸" , collationName , "𐐭𝔸" );
944+ // Ligatures.
945+ assertLower ("ß fi ffi ff st ῗ" , collationName ,"ß fi ffi ff st ῗ" );
946+ }
934947 }
935948
949+ /**
950+ * Verify the behaviour of the `InitCap` collation support class.
951+ */
952+
936953 private void assertInitCap (String target , String collationName , String expected )
937- throws SparkException {
954+ throws SparkException {
938955 UTF8String target_utf8 = UTF8String .fromString (target );
939956 UTF8String expected_utf8 = UTF8String .fromString (expected );
940957 int collationId = CollationFactory .collationNameToId (collationName );
@@ -947,49 +964,102 @@ private void assertInitCap(String target, String collationName, String expected)
947964
948965 @ Test
949966 public void testInitCap () throws SparkException {
950- // Edge cases
951- assertInitCap ("" , "UTF8_BINARY" , "" );
952- assertInitCap ("" , "UTF8_LCASE" , "" );
953- assertInitCap ("" , "UNICODE" , "" );
954- assertInitCap ("" , "UNICODE_CI" , "" );
955- // Basic tests
956- assertInitCap ("ABCDE" , "UTF8_BINARY" , "Abcde" );
957- assertInitCap ("ABCDE" , "UTF8_LCASE" , "Abcde" );
958- assertInitCap ("ABCDE" , "UNICODE" , "Abcde" );
959- assertInitCap ("ABCDE" , "UNICODE_CI" , "Abcde" );
960- // Uppercase present
961- assertInitCap ("AbCdE" , "UTF8_BINARY" , "Abcde" );
962- assertInitCap ("aBcDe" , "UTF8_BINARY" , "Abcde" );
963- assertInitCap ("AbCdE" , "UTF8_LCASE" , "Abcde" );
964- assertInitCap ("aBcDe" , "UTF8_LCASE" , "Abcde" );
965- assertInitCap ("AbCdE" , "UNICODE" , "Abcde" );
966- assertInitCap ("aBcDe" , "UNICODE" , "Abcde" );
967- assertInitCap ("AbCdE" , "UNICODE_CI" , "Abcde" );
968- assertInitCap ("aBcDe" , "UNICODE_CI" , "Abcde" );
969- // Accent letters
970- assertInitCap ("AbĆdE" , "UTF8_BINARY" , "Abćde" );
971- assertInitCap ("AbĆdE" , "UTF8_LCASE" , "Abćde" );
972- assertInitCap ("AbĆdE" , "UNICODE" , "Abćde" );
973- assertInitCap ("AbĆdE" , "UNICODE_CI" , "Abćde" );
974- // Variable byte length characters
975- assertInitCap ("aB 世 De" , "UTF8_BINARY" , "Ab 世 De" );
967+ for (String collationName : testSupportedCollations ) {
968+ // Empty strings.
969+ assertInitCap ("" , collationName , "" );
970+ // Basic tests.
971+ assertInitCap ("abcde" , collationName , "Abcde" );
972+ assertInitCap ("AbCdE" , collationName , "Abcde" );
973+ assertInitCap ("aBcDe" , collationName , "Abcde" );
974+ assertInitCap ("ABCDE" , collationName , "Abcde" );
975+ // Conditional case mapping (e.g. Greek sigmas).
976+ assertInitCap ("σ" , collationName , "Σ" );
977+ assertInitCap ("ς" , collationName , "Σ" );
978+ assertInitCap ("Σ" , collationName , "Σ" );
979+ assertInitCap ("ΣΑΛΑΤΑ" , collationName , "Σαλατα" );
980+ assertInitCap ("σαλατα" , collationName , "Σαλατα" );
981+ assertInitCap ("ςαλατα" , collationName , "Σαλατα" );
982+ assertInitCap ("ΘΑΛΑΣΣΙΝΟΣ" , collationName , "Θαλασσινος" );
983+ assertInitCap ("θαλασσινοσ" , collationName , "Θαλασσινοσ" );
984+ assertInitCap ("θαλασσινος" , collationName , "Θαλασσινος" );
985+ }
986+ // Advanced tests.
987+ assertInitCap ("aBćDe" , "UTF8_BINARY" , "Abćde" );
988+ assertInitCap ("aBćDe" , "UTF8_LCASE" , "Abćde" );
989+ assertInitCap ("aBćDe" , "UNICODE" , "Abćde" );
990+ assertInitCap ("aBćDe" , "UNICODE_CI" , "Abćde" );
991+ assertInitCap ("ab世De" , "UTF8_BINARY" , "Ab世de" );
992+ assertInitCap ("ab世De" , "UTF8_LCASE" , "Ab世De" );
993+ assertInitCap ("ab世De" , "UNICODE" , "Ab世De" );
994+ assertInitCap ("ab世De" , "UNICODE_CI" , "Ab世De" );
995+ assertInitCap ("äbćδe" , "UTF8_BINARY" , "Äbćδe" );
996+ assertInitCap ("äbćδe" , "UTF8_LCASE" , "Äbćδe" );
997+ assertInitCap ("äbćδe" , "UNICODE" , "Äbćδe" );
998+ assertInitCap ("äbćδe" , "UNICODE_CI" , "Äbćδe" );
976999 assertInitCap ("ÄBĆΔE" , "UTF8_BINARY" , "Äbćδe" );
977- assertInitCap ("aB 世 De" , "UTF8_LCASE" , "Ab 世 De" );
9781000 assertInitCap ("ÄBĆΔE" , "UTF8_LCASE" , "Äbćδe" );
979- assertInitCap ("aB 世 De" , "UNICODE" , "Ab 世 De" );
9801001 assertInitCap ("ÄBĆΔE" , "UNICODE" , "Äbćδe" );
981- assertInitCap ("aB 世 de" , "UNICODE_CI" , "Ab 世 De" );
9821002 assertInitCap ("ÄBĆΔE" , "UNICODE_CI" , "Äbćδe" );
983- // Case-variable character length
984- assertInitCap ("İo" , "UTF8_BINARY" , "I\u0307 o" );
985- assertInitCap ("İo" , "UTF8_LCASE" , "İo" );
986- assertInitCap ("İo" , "UNICODE" , "İo" );
987- assertInitCap ("İo" , "UNICODE_CI" , "İo" );
988- assertInitCap ("i\u0307 o" , "UTF8_BINARY" , "I\u0307 o" );
989- assertInitCap ("i\u0307 o" , "UTF8_LCASE" , "I\u0307 o" );
990- assertInitCap ("i\u0307 o" , "UNICODE" , "I\u0307 o" );
991- assertInitCap ("i\u0307 o" , "UNICODE_CI" , "I\u0307 o" );
992- // Different possible word boundaries
1003+ assertInitCap ("aB 世 de" , "UTF8_BINARY" , "Ab 世 De" );
1004+ assertInitCap ("aB 世 de" , "UTF8_LCASE" , "Ab 世 De" );
1005+ assertInitCap ("aB 世 de" , "UNICODE" , "Ab 世 De" );
1006+ assertInitCap ("aB 世 de" , "UNICODE_CI" , "Ab 世 De" );
1007+ // One-to-many case mapping (e.g. Turkish dotted I).
1008+ assertInitCap ("İ" , "UTF8_BINARY" , "I\u0307 " );
1009+ assertInitCap ("İ" , "UTF8_LCASE" , "İ" );
1010+ assertInitCap ("İ" , "UNICODE" , "İ" );
1011+ assertInitCap ("İ" , "UNICODE_CI" , "İ" );
1012+ assertInitCap ("I\u0307 " , "UTF8_BINARY" ,"I\u0307 " );
1013+ assertInitCap ("I\u0307 " , "UTF8_LCASE" ,"I\u0307 " );
1014+ assertInitCap ("I\u0307 " , "UNICODE" ,"I\u0307 " );
1015+ assertInitCap ("I\u0307 " , "UNICODE_CI" ,"I\u0307 " );
1016+ assertInitCap ("İonic" , "UTF8_BINARY" , "I\u0307 onic" );
1017+ assertInitCap ("İonic" , "UTF8_LCASE" , "İonic" );
1018+ assertInitCap ("İonic" , "UNICODE" , "İonic" );
1019+ assertInitCap ("İonic" , "UNICODE_CI" , "İonic" );
1020+ assertInitCap ("i\u0307 onic" , "UTF8_BINARY" ,"I\u0307 onic" );
1021+ assertInitCap ("i\u0307 onic" , "UTF8_LCASE" ,"I\u0307 onic" );
1022+ assertInitCap ("i\u0307 onic" , "UNICODE" ,"I\u0307 onic" );
1023+ assertInitCap ("i\u0307 onic" , "UNICODE_CI" ,"I\u0307 onic" );
1024+ assertInitCap ("FIDELİO" , "UTF8_BINARY" , "Fideli\u0307 o" );
1025+ assertInitCap ("FIDELİO" , "UTF8_LCASE" , "Fideli\u0307 o" );
1026+ assertInitCap ("FIDELİO" , "UNICODE" , "Fideli\u0307 o" );
1027+ assertInitCap ("FIDELİO" , "UNICODE_CI" , "Fideli\u0307 o" );
1028+ // Surrogate pairs.
1029+ assertInitCap ("a🙃B🙃c" , "UTF8_BINARY" , "A🙃b🙃c" );
1030+ assertInitCap ("a🙃B🙃c" , "UTF8_LCASE" , "A🙃B🙃C" );
1031+ assertInitCap ("a🙃B🙃c" , "UNICODE" , "A🙃B🙃C" );
1032+ assertInitCap ("a🙃B🙃c" , "UNICODE_CI" , "A🙃B🙃C" );
1033+ assertInitCap ("😄 😆" , "UTF8_BINARY" , "😄 😆" );
1034+ assertInitCap ("😄 😆" , "UTF8_LCASE" , "😄 😆" );
1035+ assertInitCap ("😄 😆" , "UNICODE" , "😄 😆" );
1036+ assertInitCap ("😄 😆" , "UNICODE_CI" , "😄 😆" );
1037+ assertInitCap ("😀😆😃😄" , "UTF8_BINARY" , "😀😆😃😄" );
1038+ assertInitCap ("😀😆😃😄" , "UTF8_LCASE" , "😀😆😃😄" );
1039+ assertInitCap ("😀😆😃😄" , "UNICODE" , "😀😆😃😄" );
1040+ assertInitCap ("😀😆😃😄" , "UNICODE_CI" , "😀😆😃😄" );
1041+ assertInitCap ("𝔸" , "UTF8_BINARY" , "𝔸" );
1042+ assertInitCap ("𝔸" , "UTF8_LCASE" , "𝔸" );
1043+ assertInitCap ("𝔸" , "UNICODE" , "𝔸" );
1044+ assertInitCap ("𝔸" , "UNICODE_CI" , "𝔸" );
1045+ assertInitCap ("𐐅" , "UTF8_BINARY" , "𐐭" );
1046+ assertInitCap ("𐐅" , "UTF8_LCASE" , "𐐅" );
1047+ assertInitCap ("𐐅" , "UNICODE" , "𐐅" );
1048+ assertInitCap ("𐐅" , "UNICODE_CI" , "𐐅" );
1049+ assertInitCap ("𐐭" , "UTF8_BINARY" , "𐐭" );
1050+ assertInitCap ("𐐭" , "UTF8_LCASE" , "𐐅" );
1051+ assertInitCap ("𐐭" , "UNICODE" , "𐐅" );
1052+ assertInitCap ("𐐭" , "UNICODE_CI" , "𐐅" );
1053+ assertInitCap ("𐐭𝔸" , "UTF8_BINARY" , "𐐭𝔸" );
1054+ assertInitCap ("𐐭𝔸" , "UTF8_LCASE" , "𐐅𝔸" );
1055+ assertInitCap ("𐐭𝔸" , "UNICODE" , "𐐅𝔸" );
1056+ assertInitCap ("𐐭𝔸" , "UNICODE_CI" , "𐐅𝔸" );
1057+ // Ligatures.
1058+ assertInitCap ("ß fi ffi ff st ῗ" , "UTF8_BINARY" ,"ß fi ffi ff st ῗ" );
1059+ assertInitCap ("ß fi ffi ff st ῗ" , "UTF8_LCASE" ,"Ss Fi Ffi Ff St \u0399 \u0308 \u0342 " );
1060+ assertInitCap ("ß fi ffi ff st ῗ" , "UNICODE" ,"Ss Fi Ffi Ff St \u0399 \u0308 \u0342 " );
1061+ assertInitCap ("ß fi ffi ff st ῗ" , "UNICODE" ,"Ss Fi Ffi Ff St \u0399 \u0308 \u0342 " );
1062+ // Different possible word boundaries.
9931063 assertInitCap ("a b c" , "UTF8_BINARY" , "A B C" );
9941064 assertInitCap ("a b c" , "UNICODE" , "A B C" );
9951065 assertInitCap ("a b c" , "UTF8_LCASE" , "A B C" );
@@ -1006,7 +1076,7 @@ public void testInitCap() throws SparkException {
10061076 assertInitCap ("a?b世c" , "UNICODE" , "A?B世C" );
10071077 assertInitCap ("a?b世c" , "UTF8_LCASE" , "A?B世C" );
10081078 assertInitCap ("a?b世c" , "UNICODE_CI" , "A?B世C" );
1009- // Titlecase characters that are different from uppercase characters
1079+ // Titlecase characters that are different from uppercase characters.
10101080 assertInitCap ("dzDZDz" , "UTF8_BINARY" , "Dzdzdz" );
10111081 assertInitCap ("dzDZDz" , "UNICODE" , "Dzdzdz" );
10121082 assertInitCap ("dzDZDz" , "UTF8_LCASE" , "Dzdzdz" );
0 commit comments