@@ -859,16 +859,30 @@ def test_ignore_case(self):
859
859
self .assertEqual (re .match (r"((a)\s(abc|a))" , "a a" , re .I ).group (1 ), "a a" )
860
860
self .assertEqual (re .match (r"((a)\s(abc|a)*)" , "a aa" , re .I ).group (1 ), "a aa" )
861
861
862
- assert '\u212a ' .lower () == 'k' # 'K'
862
+ # Two different characters have the same lowercase.
863
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
863
864
self .assertTrue (re .match (r'K' , '\u212a ' , re .I ))
864
865
self .assertTrue (re .match (r'k' , '\u212a ' , re .I ))
865
866
self .assertTrue (re .match (r'\u212a' , 'K' , re .I ))
866
867
self .assertTrue (re .match (r'\u212a' , 'k' , re .I ))
867
- assert '\u017f ' .upper () == 'S' # 'ſ'
868
+
869
+ # Two different characters have the same uppercase.
870
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
868
871
self .assertTrue (re .match (r'S' , '\u017f ' , re .I ))
869
872
self .assertTrue (re .match (r's' , '\u017f ' , re .I ))
870
873
self .assertTrue (re .match (r'\u017f' , 'S' , re .I ))
871
874
self .assertTrue (re .match (r'\u017f' , 's' , re .I ))
875
+
876
+ # Two different characters have the same uppercase. Unicode 9.0+.
877
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
878
+ self .assertTrue (re .match (r'\u0412' , '\u0432 ' , re .I ))
879
+ self .assertTrue (re .match (r'\u0412' , '\u1c80 ' , re .I ))
880
+ self .assertTrue (re .match (r'\u0432' , '\u0412 ' , re .I ))
881
+ self .assertTrue (re .match (r'\u0432' , '\u1c80 ' , re .I ))
882
+ self .assertTrue (re .match (r'\u1c80' , '\u0412 ' , re .I ))
883
+ self .assertTrue (re .match (r'\u1c80' , '\u0432 ' , re .I ))
884
+
885
+ # Two different characters have the same multicharacter uppercase.
872
886
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
873
887
self .assertTrue (re .match (r'\ufb05' , '\ufb06 ' , re .I ))
874
888
self .assertTrue (re .match (r'\ufb06' , '\ufb05 ' , re .I ))
@@ -882,16 +896,31 @@ def test_ignore_case_set(self):
882
896
self .assertTrue (re .match (br'[19a]' , b'a' , re .I ))
883
897
self .assertTrue (re .match (br'[19a]' , b'A' , re .I ))
884
898
self .assertTrue (re .match (br'[19A]' , b'a' , re .I ))
885
- assert '\u212a ' .lower () == 'k' # 'K'
899
+
900
+ # Two different characters have the same lowercase.
901
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
886
902
self .assertTrue (re .match (r'[19K]' , '\u212a ' , re .I ))
887
903
self .assertTrue (re .match (r'[19k]' , '\u212a ' , re .I ))
888
904
self .assertTrue (re .match (r'[19\u212a]' , 'K' , re .I ))
889
905
self .assertTrue (re .match (r'[19\u212a]' , 'k' , re .I ))
890
- assert '\u017f ' .upper () == 'S' # 'ſ'
906
+
907
+ # Two different characters have the same uppercase.
908
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
891
909
self .assertTrue (re .match (r'[19S]' , '\u017f ' , re .I ))
892
910
self .assertTrue (re .match (r'[19s]' , '\u017f ' , re .I ))
893
911
self .assertTrue (re .match (r'[19\u017f]' , 'S' , re .I ))
894
912
self .assertTrue (re .match (r'[19\u017f]' , 's' , re .I ))
913
+
914
+ # Two different characters have the same uppercase. Unicode 9.0+.
915
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
916
+ self .assertTrue (re .match (r'[19\u0412]' , '\u0432 ' , re .I ))
917
+ self .assertTrue (re .match (r'[19\u0412]' , '\u1c80 ' , re .I ))
918
+ self .assertTrue (re .match (r'[19\u0432]' , '\u0412 ' , re .I ))
919
+ self .assertTrue (re .match (r'[19\u0432]' , '\u1c80 ' , re .I ))
920
+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0412 ' , re .I ))
921
+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0432 ' , re .I ))
922
+
923
+ # Two different characters have the same multicharacter uppercase.
895
924
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
896
925
self .assertTrue (re .match (r'[19\ufb05]' , '\ufb06 ' , re .I ))
897
926
self .assertTrue (re .match (r'[19\ufb06]' , '\ufb05 ' , re .I ))
@@ -915,16 +944,30 @@ def test_ignore_case_range(self):
915
944
self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010428 ' , re .I ))
916
945
self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010400 ' , re .I ))
917
946
918
- assert '\u212a ' .lower () == 'k' # 'K'
947
+ # Two different characters have the same lowercase.
948
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
919
949
self .assertTrue (re .match (r'[J-M]' , '\u212a ' , re .I ))
920
950
self .assertTrue (re .match (r'[j-m]' , '\u212a ' , re .I ))
921
951
self .assertTrue (re .match (r'[\u2129-\u212b]' , 'K' , re .I ))
922
952
self .assertTrue (re .match (r'[\u2129-\u212b]' , 'k' , re .I ))
923
- assert '\u017f ' .upper () == 'S' # 'ſ'
953
+
954
+ # Two different characters have the same uppercase.
955
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
924
956
self .assertTrue (re .match (r'[R-T]' , '\u017f ' , re .I ))
925
957
self .assertTrue (re .match (r'[r-t]' , '\u017f ' , re .I ))
926
958
self .assertTrue (re .match (r'[\u017e-\u0180]' , 'S' , re .I ))
927
959
self .assertTrue (re .match (r'[\u017e-\u0180]' , 's' , re .I ))
960
+
961
+ # Two different characters have the same uppercase. Unicode 9.0+.
962
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
963
+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u0432 ' , re .I ))
964
+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u1c80 ' , re .I ))
965
+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u0412 ' , re .I ))
966
+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u1c80 ' , re .I ))
967
+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0412 ' , re .I ))
968
+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0432 ' , re .I ))
969
+
970
+ # Two different characters have the same multicharacter uppercase.
928
971
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
929
972
self .assertTrue (re .match (r'[\ufb04-\ufb05]' , '\ufb06 ' , re .I ))
930
973
self .assertTrue (re .match (r'[\ufb06-\ufb07]' , '\ufb05 ' , re .I ))
0 commit comments