@@ -20,11 +20,19 @@ use iter::Iterator;
20
20
use mem;
21
21
use option:: { Option , Some , None } ;
22
22
use slice:: { ImmutableVector , MutableVector , Vector } ;
23
- use str:: { Str , StrAllocating , StrSlice } ;
23
+ use str:: { Str , StrSlice } ;
24
+ use str;
24
25
use string:: String ;
25
26
use to_string:: IntoStr ;
26
27
use vec:: Vec ;
27
28
29
+ #[ deprecated="this trait has been renamed to `AsciiExt`" ]
30
+ pub use StrAsciiExt = self :: AsciiExt ;
31
+
32
+ #[ deprecated="this trait has been renamed to `OwnedAsciiExt`" ]
33
+ pub use OwnedStrAsciiExt = self :: OwnedAsciiExt ;
34
+
35
+
28
36
/// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
29
37
#[ deriving( Clone , PartialEq , PartialOrd , Ord , Eq , Hash ) ]
30
38
pub struct Ascii { chr : u8 }
@@ -366,108 +374,133 @@ impl IntoBytes for Vec<Ascii> {
366
374
}
367
375
}
368
376
377
+
369
378
/// Extension methods for ASCII-subset only operations on owned strings
370
- pub trait OwnedStrAsciiExt {
379
+ pub trait OwnedAsciiExt {
371
380
/// Convert the string to ASCII upper case:
372
381
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
373
382
/// but non-ASCII letters are unchanged.
374
- fn into_ascii_upper ( self ) -> String ;
383
+ fn into_ascii_upper ( self ) -> Self ;
375
384
376
385
/// Convert the string to ASCII lower case:
377
386
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
378
387
/// but non-ASCII letters are unchanged.
379
- fn into_ascii_lower ( self ) -> String ;
388
+ fn into_ascii_lower ( self ) -> Self ;
380
389
}
381
390
382
391
/// Extension methods for ASCII-subset only operations on string slices
383
- pub trait StrAsciiExt {
392
+ pub trait AsciiExt < T > {
384
393
/// Makes a copy of the string in ASCII upper case:
385
394
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
386
395
/// but non-ASCII letters are unchanged.
387
- fn to_ascii_upper ( & self ) -> String ;
396
+ fn to_ascii_upper ( & self ) -> T ;
388
397
389
398
/// Makes a copy of the string in ASCII lower case:
390
399
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
391
400
/// but non-ASCII letters are unchanged.
392
- fn to_ascii_lower ( & self ) -> String ;
401
+ fn to_ascii_lower ( & self ) -> T ;
393
402
394
403
/// Check that two strings are an ASCII case-insensitive match.
395
404
/// Same as `to_ascii_lower(a) == to_ascii_lower(b)`,
396
405
/// but without allocating and copying temporary strings.
397
- fn eq_ignore_ascii_case ( & self , other : & str ) -> bool ;
406
+ fn eq_ignore_ascii_case ( & self , other : Self ) -> bool ;
398
407
}
399
408
400
- impl < ' a > StrAsciiExt for & ' a str {
409
+ impl < ' a > AsciiExt < String > for & ' a str {
401
410
#[ inline]
402
411
fn to_ascii_upper ( & self ) -> String {
403
- unsafe { str_copy_map_bytes ( * self , ASCII_UPPER_MAP ) }
412
+ // Vec<u8>::to_ascii_upper() preserves the UTF-8 invariant.
413
+ unsafe { str:: raw:: from_utf8_owned ( self . as_bytes ( ) . to_ascii_upper ( ) ) }
404
414
}
405
415
406
416
#[ inline]
407
417
fn to_ascii_lower ( & self ) -> String {
408
- unsafe { str_copy_map_bytes ( * self , ASCII_LOWER_MAP ) }
418
+ // Vec<u8>::to_ascii_lower() preserves the UTF-8 invariant.
419
+ unsafe { str:: raw:: from_utf8_owned ( self . as_bytes ( ) . to_ascii_lower ( ) ) }
409
420
}
410
421
411
422
#[ inline]
412
423
fn eq_ignore_ascii_case ( & self , other : & str ) -> bool {
413
- self . len ( ) == other. len ( ) &&
414
- self . as_bytes ( ) . iter ( ) . zip ( other. as_bytes ( ) . iter ( ) ) . all (
415
- |( byte_self, byte_other) | {
416
- ASCII_LOWER_MAP [ * byte_self as uint ] ==
417
- ASCII_LOWER_MAP [ * byte_other as uint ]
418
- } )
424
+ self . as_bytes ( ) . eq_ignore_ascii_case ( other. as_bytes ( ) )
419
425
}
420
426
}
421
427
422
- impl OwnedStrAsciiExt for String {
428
+ impl OwnedAsciiExt for String {
423
429
#[ inline]
424
430
fn into_ascii_upper ( self ) -> String {
425
- unsafe { str_map_bytes ( self , ASCII_UPPER_MAP ) }
431
+ // Vec<u8>::into_ascii_upper() preserves the UTF-8 invariant.
432
+ unsafe { str:: raw:: from_utf8_owned ( self . into_bytes ( ) . into_ascii_upper ( ) ) }
426
433
}
427
434
428
435
#[ inline]
429
436
fn into_ascii_lower ( self ) -> String {
430
- unsafe { str_map_bytes ( self , ASCII_LOWER_MAP ) }
437
+ // Vec<u8>::into_ascii_lower() preserves the UTF-8 invariant.
438
+ unsafe { str:: raw:: from_utf8_owned ( self . into_bytes ( ) . into_ascii_lower ( ) ) }
431
439
}
432
440
}
433
441
434
- #[ inline]
435
- unsafe fn str_map_bytes ( string : String , map : & ' static [ u8 ] ) -> String {
436
- let mut bytes = string. into_bytes ( ) ;
442
+ impl < ' a > AsciiExt < Vec < u8 > > for & ' a [ u8 ] {
443
+ #[ inline]
444
+ fn to_ascii_upper ( & self ) -> Vec < u8 > {
445
+ self . iter ( ) . map ( |& byte| ASCII_UPPER_MAP [ byte as uint ] ) . collect ( )
446
+ }
437
447
438
- for b in bytes. mut_iter ( ) {
439
- * b = map[ * b as uint ] ;
448
+ #[ inline]
449
+ fn to_ascii_lower ( & self ) -> Vec < u8 > {
450
+ self . iter ( ) . map ( |& byte| ASCII_LOWER_MAP [ byte as uint ] ) . collect ( )
440
451
}
441
452
442
- String :: from_utf8 ( bytes) . unwrap ( )
453
+ #[ inline]
454
+ fn eq_ignore_ascii_case ( & self , other : & [ u8 ] ) -> bool {
455
+ self . len ( ) == other. len ( ) &&
456
+ self . iter ( ) . zip ( other. iter ( ) ) . all (
457
+ |( byte_self, byte_other) | {
458
+ ASCII_LOWER_MAP [ * byte_self as uint ] ==
459
+ ASCII_LOWER_MAP [ * byte_other as uint ]
460
+ } )
461
+ }
443
462
}
444
463
445
- #[ inline]
446
- unsafe fn str_copy_map_bytes ( string : & str , map : & ' static [ u8 ] ) -> String {
447
- let mut s = String :: from_str ( string) ;
448
- for b in s. as_mut_bytes ( ) . mut_iter ( ) {
449
- * b = map[ * b as uint ] ;
464
+ impl OwnedAsciiExt for Vec < u8 > {
465
+ #[ inline]
466
+ fn into_ascii_upper ( mut self ) -> Vec < u8 > {
467
+ for byte in self . mut_iter ( ) {
468
+ * byte = ASCII_UPPER_MAP [ * byte as uint ] ;
469
+ }
470
+ self
471
+ }
472
+
473
+ #[ inline]
474
+ fn into_ascii_lower ( mut self ) -> Vec < u8 > {
475
+ for byte in self . mut_iter ( ) {
476
+ * byte = ASCII_LOWER_MAP [ * byte as uint ] ;
477
+ }
478
+ self
450
479
}
451
- s. into_string ( )
452
480
}
453
481
454
- static ASCII_LOWER_MAP : & ' static [ u8 ] = & [
482
+
483
+ pub static ASCII_LOWER_MAP : [ u8 , ..256 ] = [
455
484
0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 ,
456
485
0x08 , 0x09 , 0x0a , 0x0b , 0x0c , 0x0d , 0x0e , 0x0f ,
457
486
0x10 , 0x11 , 0x12 , 0x13 , 0x14 , 0x15 , 0x16 , 0x17 ,
458
487
0x18 , 0x19 , 0x1a , 0x1b , 0x1c , 0x1d , 0x1e , 0x1f ,
459
- 0x20 , 0x21 , 0x22 , 0x23 , 0x24 , 0x25 , 0x26 , 0x27 ,
460
- 0x28 , 0x29 , 0x2a , 0x2b , 0x2c , 0x2d , 0x2e , 0x2f ,
461
- 0x30 , 0x31 , 0x32 , 0x33 , 0x34 , 0x35 , 0x36 , 0x37 ,
462
- 0x38 , 0x39 , 0x3a , 0x3b , 0x3c , 0x3d , 0x3e , 0x3f ,
463
- 0x40 , 0x61 , 0x62 , 0x63 , 0x64 , 0x65 , 0x66 , 0x67 ,
464
- 0x68 , 0x69 , 0x6a , 0x6b , 0x6c , 0x6d , 0x6e , 0x6f ,
465
- 0x70 , 0x71 , 0x72 , 0x73 , 0x74 , 0x75 , 0x76 , 0x77 ,
466
- 0x78 , 0x79 , 0x7a , 0x5b , 0x5c , 0x5d , 0x5e , 0x5f ,
467
- 0x60 , 0x61 , 0x62 , 0x63 , 0x64 , 0x65 , 0x66 , 0x67 ,
468
- 0x68 , 0x69 , 0x6a , 0x6b , 0x6c , 0x6d , 0x6e , 0x6f ,
469
- 0x70 , 0x71 , 0x72 , 0x73 , 0x74 , 0x75 , 0x76 , 0x77 ,
470
- 0x78 , 0x79 , 0x7a , 0x7b , 0x7c , 0x7d , 0x7e , 0x7f ,
488
+ b' ' , b'!' , b'"' , b'#' , b'$' , b'%' , b'&' , b'\'' ,
489
+ b'(' , b')' , b'*' , b'+' , b',' , b'-' , b'.' , b'/' ,
490
+ b'0' , b'1' , b'2' , b'3' , b'4' , b'5' , b'6' , b'7' ,
491
+ b'8' , b'9' , b':' , b';' , b'<' , b'=' , b'>' , b'?' ,
492
+ b'@' ,
493
+
494
+ b'a' , b'b' , b'c' , b'd' , b'e' , b'f' , b'g' ,
495
+ b'h' , b'i' , b'j' , b'k' , b'l' , b'm' , b'n' , b'o' ,
496
+ b'p' , b'q' , b'r' , b's' , b't' , b'u' , b'v' , b'w' ,
497
+ b'x' , b'y' , b'z' ,
498
+
499
+ b'[' , b'\\' , b']' , b'^' , b'_' ,
500
+ b'`' , b'a' , b'b' , b'c' , b'd' , b'e' , b'f' , b'g' ,
501
+ b'h' , b'i' , b'j' , b'k' , b'l' , b'm' , b'n' , b'o' ,
502
+ b'p' , b'q' , b'r' , b's' , b't' , b'u' , b'v' , b'w' ,
503
+ b'x' , b'y' , b'z' , b'{' , b'|' , b'}' , b'~' , 0x7f ,
471
504
0x80 , 0x81 , 0x82 , 0x83 , 0x84 , 0x85 , 0x86 , 0x87 ,
472
505
0x88 , 0x89 , 0x8a , 0x8b , 0x8c , 0x8d , 0x8e , 0x8f ,
473
506
0x90 , 0x91 , 0x92 , 0x93 , 0x94 , 0x95 , 0x96 , 0x97 ,
@@ -486,23 +519,27 @@ static ASCII_LOWER_MAP: &'static [u8] = &[
486
519
0xf8 , 0xf9 , 0xfa , 0xfb , 0xfc , 0xfd , 0xfe , 0xff ,
487
520
] ;
488
521
489
- static ASCII_UPPER_MAP : & ' static [ u8 ] = & [
522
+ pub static ASCII_UPPER_MAP : [ u8 , .. 256 ] = [
490
523
0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 ,
491
524
0x08 , 0x09 , 0x0a , 0x0b , 0x0c , 0x0d , 0x0e , 0x0f ,
492
525
0x10 , 0x11 , 0x12 , 0x13 , 0x14 , 0x15 , 0x16 , 0x17 ,
493
526
0x18 , 0x19 , 0x1a , 0x1b , 0x1c , 0x1d , 0x1e , 0x1f ,
494
- 0x20 , 0x21 , 0x22 , 0x23 , 0x24 , 0x25 , 0x26 , 0x27 ,
495
- 0x28 , 0x29 , 0x2a , 0x2b , 0x2c , 0x2d , 0x2e , 0x2f ,
496
- 0x30 , 0x31 , 0x32 , 0x33 , 0x34 , 0x35 , 0x36 , 0x37 ,
497
- 0x38 , 0x39 , 0x3a , 0x3b , 0x3c , 0x3d , 0x3e , 0x3f ,
498
- 0x40 , 0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 ,
499
- 0x48 , 0x49 , 0x4a , 0x4b , 0x4c , 0x4d , 0x4e , 0x4f ,
500
- 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 ,
501
- 0x58 , 0x59 , 0x5a , 0x5b , 0x5c , 0x5d , 0x5e , 0x5f ,
502
- 0x60 , 0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 ,
503
- 0x48 , 0x49 , 0x4a , 0x4b , 0x4c , 0x4d , 0x4e , 0x4f ,
504
- 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 ,
505
- 0x58 , 0x59 , 0x5a , 0x7b , 0x7c , 0x7d , 0x7e , 0x7f ,
527
+ b' ' , b'!' , b'"' , b'#' , b'$' , b'%' , b'&' , b'\'' ,
528
+ b'(' , b')' , b'*' , b'+' , b',' , b'-' , b'.' , b'/' ,
529
+ b'0' , b'1' , b'2' , b'3' , b'4' , b'5' , b'6' , b'7' ,
530
+ b'8' , b'9' , b':' , b';' , b'<' , b'=' , b'>' , b'?' ,
531
+ b'@' , b'A' , b'B' , b'C' , b'D' , b'E' , b'F' , b'G' ,
532
+ b'H' , b'I' , b'J' , b'K' , b'L' , b'M' , b'N' , b'O' ,
533
+ b'P' , b'Q' , b'R' , b'S' , b'T' , b'U' , b'V' , b'W' ,
534
+ b'X' , b'Y' , b'Z' , b'[' , b'\\' , b']' , b'^' , b'_' ,
535
+ b'`' ,
536
+
537
+ b'A' , b'B' , b'C' , b'D' , b'E' , b'F' , b'G' ,
538
+ b'H' , b'I' , b'J' , b'K' , b'L' , b'M' , b'N' , b'O' ,
539
+ b'P' , b'Q' , b'R' , b'S' , b'T' , b'U' , b'V' , b'W' ,
540
+ b'X' , b'Y' , b'Z' ,
541
+
542
+ b'{' , b'|' , b'}' , b'~' , 0x7f ,
506
543
0x80 , 0x81 , 0x82 , 0x83 , 0x84 , 0x85 , 0x86 , 0x87 ,
507
544
0x88 , 0x89 , 0x8a , 0x8b , 0x8c , 0x8d , 0x8e , 0x8f ,
508
545
0x90 , 0x91 , 0x92 , 0x93 , 0x94 , 0x95 , 0x96 , 0x97 ,
0 commit comments