@@ -358,29 +358,50 @@ fn eu_iterator_specializations() {
358
358
359
359
#[ test]
360
360
fn test_decode_utf8 ( ) {
361
- use core:: char:: * ;
362
- use core:: iter:: FromIterator ;
363
-
364
- for & ( str, bs) in [ ( "" , & [ ] as & [ u8 ] ) ,
365
- ( "A" , & [ 0x41u8 ] as & [ u8 ] ) ,
366
- ( "�" , & [ 0xC1u8 , 0x81u8 ] as & [ u8 ] ) ,
367
- ( "♥" , & [ 0xE2u8 , 0x99u8 , 0xA5u8 ] ) ,
368
- ( "♥A" , & [ 0xE2u8 , 0x99u8 , 0xA5u8 , 0x41u8 ] as & [ u8 ] ) ,
369
- ( "�" , & [ 0xE2u8 , 0x99u8 ] as & [ u8 ] ) ,
370
- ( "�A" , & [ 0xE2u8 , 0x99u8 , 0x41u8 ] as & [ u8 ] ) ,
371
- ( "�" , & [ 0xC0u8 ] as & [ u8 ] ) ,
372
- ( "�A" , & [ 0xC0u8 , 0x41u8 ] as & [ u8 ] ) ,
373
- ( "�" , & [ 0x80u8 ] as & [ u8 ] ) ,
374
- ( "�A" , & [ 0x80u8 , 0x41u8 ] as & [ u8 ] ) ,
375
- ( "�" , & [ 0xFEu8 ] as & [ u8 ] ) ,
376
- ( "�A" , & [ 0xFEu8 , 0x41u8 ] as & [ u8 ] ) ,
377
- ( "�" , & [ 0xFFu8 ] as & [ u8 ] ) ,
378
- ( "�A" , & [ 0xFFu8 , 0x41u8 ] as & [ u8 ] ) ] . into_iter ( ) {
379
- assert ! ( Iterator :: eq( str . chars( ) ,
380
- decode_utf8( bs. into_iter( ) . map( |& b|b) )
381
- . map( |r_b| r_b. unwrap_or( '\u{FFFD}' ) ) ) ,
382
- "chars = {}, bytes = {:?}, decoded = {:?}" , str , bs,
383
- Vec :: from_iter( decode_utf8( bs. into_iter( ) . map( |& b|b) )
384
- . map( |r_b| r_b. unwrap_or( '\u{FFFD}' ) ) ) ) ;
361
+ macro_rules! assert_decode_utf8 {
362
+ ( $input_bytes: expr, $expected_str: expr) => {
363
+ let input_bytes: & [ u8 ] = & $input_bytes;
364
+ let s = char :: decode_utf8( input_bytes. iter( ) . cloned( ) )
365
+ . map( |r_b| r_b. unwrap_or( '\u{FFFD}' ) )
366
+ . collect:: <String >( ) ;
367
+ assert_eq!( s, $expected_str,
368
+ "input bytes: {:?}, expected str: {:?}, result: {:?}" ,
369
+ input_bytes, $expected_str, s) ;
370
+ assert_eq!( String :: from_utf8_lossy( & $input_bytes) , $expected_str) ;
371
+ }
385
372
}
373
+
374
+ assert_decode_utf8 ! ( [ ] , "" ) ;
375
+ assert_decode_utf8 ! ( [ 0x41 ] , "A" ) ;
376
+ assert_decode_utf8 ! ( [ 0xC1 , 0x81 ] , "��" ) ;
377
+ assert_decode_utf8 ! ( [ 0xE2 , 0x99 , 0xA5 ] , "♥" ) ;
378
+ assert_decode_utf8 ! ( [ 0xE2 , 0x99 , 0xA5 , 0x41 ] , "♥A" ) ;
379
+ assert_decode_utf8 ! ( [ 0xE2 , 0x99 ] , "�" ) ;
380
+ assert_decode_utf8 ! ( [ 0xE2 , 0x99 , 0x41 ] , "�A" ) ;
381
+ assert_decode_utf8 ! ( [ 0xC0 ] , "�" ) ;
382
+ assert_decode_utf8 ! ( [ 0xC0 , 0x41 ] , "�A" ) ;
383
+ assert_decode_utf8 ! ( [ 0x80 ] , "�" ) ;
384
+ assert_decode_utf8 ! ( [ 0x80 , 0x41 ] , "�A" ) ;
385
+ assert_decode_utf8 ! ( [ 0xFE ] , "�" ) ;
386
+ assert_decode_utf8 ! ( [ 0xFE , 0x41 ] , "�A" ) ;
387
+ assert_decode_utf8 ! ( [ 0xFF ] , "�" ) ;
388
+ assert_decode_utf8 ! ( [ 0xFF , 0x41 ] , "�A" ) ;
389
+ assert_decode_utf8 ! ( [ 0xC0 , 0x80 ] , "��" ) ;
390
+
391
+ // Surrogates
392
+ assert_decode_utf8 ! ( [ 0xED , 0x9F , 0xBF ] , "\u{D7FF} " ) ;
393
+ assert_decode_utf8 ! ( [ 0xED , 0xA0 , 0x80 ] , "���" ) ;
394
+ assert_decode_utf8 ! ( [ 0xED , 0xBF , 0x80 ] , "���" ) ;
395
+ assert_decode_utf8 ! ( [ 0xEE , 0x80 , 0x80 ] , "\u{E000} " ) ;
396
+
397
+ // char::MAX
398
+ assert_decode_utf8 ! ( [ 0xF4 , 0x8F , 0xBF , 0xBF ] , "\u{10FFFF} " ) ;
399
+ assert_decode_utf8 ! ( [ 0xF4 , 0x8F , 0xBF , 0x41 ] , "�A" ) ;
400
+ assert_decode_utf8 ! ( [ 0xF4 , 0x90 , 0x80 , 0x80 ] , "����" ) ;
401
+
402
+ // 5 and 6 bytes sequence
403
+ // Part of the original design of UTF-8,
404
+ // but invalid now that UTF-8 is artificially restricted to match the range of UTF-16.
405
+ assert_decode_utf8 ! ( [ 0xF8 , 0x80 , 0x80 , 0x80 , 0x80 ] , "�����" ) ;
406
+ assert_decode_utf8 ! ( [ 0xFC , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , "������" ) ;
386
407
}
0 commit comments