@@ -2870,6 +2870,53 @@ def test_asutf8andsize(self):
2870
2870
self .assertEqual (unicode_asutf8andsize (nonbmp ), (b'\xf4 \x8f \xbf \xbf ' , 4 ))
2871
2871
self .assertRaises (UnicodeEncodeError , unicode_asutf8andsize , 'a\ud800 b\udfff c' )
2872
2872
2873
+ @support .cpython_only
2874
+ def test_decodeutf8 (self ):
2875
+ """Test PyUnicode_DecodeUTF8()"""
2876
+ import _testcapi
2877
+ decodeutf8 = _testcapi .unicode_decodeutf8
2878
+
2879
+ for s in ['abc' , '\xa1 \xa2 ' , '\u4f60 \u597d ' , 'a\U0001f600 ' ]:
2880
+ b = s .encode ('utf-8' )
2881
+ self .assertEqual (decodeutf8 (b ), s )
2882
+ self .assertEqual (decodeutf8 (b , 'strict' ), s )
2883
+
2884
+ self .assertRaises (UnicodeDecodeError , decodeutf8 , b'\x80 ' )
2885
+ self .assertRaises (UnicodeDecodeError , decodeutf8 , b'\xc0 ' )
2886
+ self .assertRaises (UnicodeDecodeError , decodeutf8 , b'\xff ' )
2887
+ self .assertRaises (UnicodeDecodeError , decodeutf8 , b'a\xf0 \x9f ' )
2888
+ self .assertEqual (decodeutf8 (b'a\xf0 \x9f ' , 'replace' ), 'a\ufffd ' )
2889
+ self .assertEqual (decodeutf8 (b'a\xf0 \x9f b' , 'replace' ), 'a\ufffd b' )
2890
+
2891
+ self .assertRaises (LookupError , decodeutf8 , b'a\x80 ' , 'foo' )
2892
+ # TODO: Test PyUnicode_DecodeUTF8() with NULL as data and
2893
+ # negative size.
2894
+
2895
+ @support .cpython_only
2896
+ def test_decodeutf8stateful (self ):
2897
+ """Test PyUnicode_DecodeUTF8Stateful()"""
2898
+ import _testcapi
2899
+ decodeutf8stateful = _testcapi .unicode_decodeutf8stateful
2900
+
2901
+ for s in ['abc' , '\xa1 \xa2 ' , '\u4f60 \u597d ' , 'a\U0001f600 ' ]:
2902
+ b = s .encode ('utf-8' )
2903
+ self .assertEqual (decodeutf8stateful (b ), (s , len (b )))
2904
+ self .assertEqual (decodeutf8stateful (b , 'strict' ), (s , len (b )))
2905
+
2906
+ self .assertRaises (UnicodeDecodeError , decodeutf8stateful , b'\x80 ' )
2907
+ self .assertRaises (UnicodeDecodeError , decodeutf8stateful , b'\xc0 ' )
2908
+ self .assertRaises (UnicodeDecodeError , decodeutf8stateful , b'\xff ' )
2909
+ self .assertEqual (decodeutf8stateful (b'a\xf0 \x9f ' ), ('a' , 1 ))
2910
+ self .assertEqual (decodeutf8stateful (b'a\xf0 \x9f ' , 'replace' ), ('a' , 1 ))
2911
+ self .assertRaises (UnicodeDecodeError , decodeutf8stateful , b'a\xf0 \x9f b' )
2912
+ self .assertEqual (decodeutf8stateful (b'a\xf0 \x9f b' , 'replace' ), ('a\ufffd b' , 4 ))
2913
+
2914
+ self .assertRaises (LookupError , decodeutf8stateful , b'a\x80 ' , 'foo' )
2915
+ # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and
2916
+ # negative size.
2917
+ # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of
2918
+ # "consumed".
2919
+
2873
2920
# Test PyUnicode_FindChar()
2874
2921
@support .cpython_only
2875
2922
def test_findchar (self ):
0 commit comments