Skip to content

Commit 692077b

Browse files
committedJul 30, 2014
auto merge of rust-lang#15777 : SimonSapin/rust/pub-ascii-maps, r=alexcrichton
When dealing with HTTP request or responses, many tokens are case-insensitive in the ASCII range but the bytes from the network are not necessarily valid UTF-8. **[breaking-change]** Rather than adding new very similar traits, this re-uses the `std::ascii::OwnedStrAsciiExt` and `std::ascii::StrAsciiExt` traits, but rename to remove `Str` since that does not apply for bytes. This PR also makes `std::ascii::ASCII_UPPER_MAP` and `std::ascii::ASCII_LOWER_MAP`, the lookup table all these methods are based on, public. In case there is something else related to ASCII case we haven’t thought of yet, that can be implemented outside of libstd without duplicating the tables. Although this is a breaking change, I thought this could do without an RFC since the relevant traits are not in the prelude. r? @alexcrichton
2 parents 774d5eb + 235bb3f commit 692077b

File tree

4 files changed

+98
-61
lines changed

4 files changed

+98
-61
lines changed
 

‎src/liblog/directive.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use std::ascii::StrAsciiExt;
11+
use std::ascii::AsciiExt;
1212
use std::cmp;
1313

1414
#[deriving(Show, Clone)]

‎src/librustc/lint/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#![macro_escape]
3232

3333
use std::hash;
34-
use std::ascii::StrAsciiExt;
34+
use std::ascii::AsciiExt;
3535
use syntax::codemap::Span;
3636
use syntax::visit::FnKind;
3737
use syntax::ast;

‎src/libstd/ascii.rs

+95-58
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,19 @@ use iter::Iterator;
2020
use mem;
2121
use option::{Option, Some, None};
2222
use slice::{ImmutableVector, MutableVector, Vector};
23-
use str::{Str, StrAllocating, StrSlice};
23+
use str::{Str, StrSlice};
24+
use str;
2425
use string::String;
2526
use to_string::IntoStr;
2627
use vec::Vec;
2728

29+
#[deprecated="this trait has been renamed to `AsciiExt`"]
30+
pub use StrAsciiExt = self::AsciiExt;
31+
32+
#[deprecated="this trait has been renamed to `OwnedAsciiExt`"]
33+
pub use OwnedStrAsciiExt = self::OwnedAsciiExt;
34+
35+
2836
/// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
2937
#[deriving(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
3038
pub struct Ascii { chr: u8 }
@@ -366,108 +374,133 @@ impl IntoBytes for Vec<Ascii> {
366374
}
367375
}
368376

377+
369378
/// Extension methods for ASCII-subset only operations on owned strings
370-
pub trait OwnedStrAsciiExt {
379+
pub trait OwnedAsciiExt {
371380
/// Convert the string to ASCII upper case:
372381
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
373382
/// but non-ASCII letters are unchanged.
374-
fn into_ascii_upper(self) -> String;
383+
fn into_ascii_upper(self) -> Self;
375384

376385
/// Convert the string to ASCII lower case:
377386
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
378387
/// but non-ASCII letters are unchanged.
379-
fn into_ascii_lower(self) -> String;
388+
fn into_ascii_lower(self) -> Self;
380389
}
381390

382391
/// Extension methods for ASCII-subset only operations on string slices
383-
pub trait StrAsciiExt {
392+
pub trait AsciiExt<T> {
384393
/// Makes a copy of the string in ASCII upper case:
385394
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
386395
/// but non-ASCII letters are unchanged.
387-
fn to_ascii_upper(&self) -> String;
396+
fn to_ascii_upper(&self) -> T;
388397

389398
/// Makes a copy of the string in ASCII lower case:
390399
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
391400
/// but non-ASCII letters are unchanged.
392-
fn to_ascii_lower(&self) -> String;
401+
fn to_ascii_lower(&self) -> T;
393402

394403
/// Check that two strings are an ASCII case-insensitive match.
395404
/// Same as `to_ascii_lower(a) == to_ascii_lower(b)`,
396405
/// but without allocating and copying temporary strings.
397-
fn eq_ignore_ascii_case(&self, other: &str) -> bool;
406+
fn eq_ignore_ascii_case(&self, other: Self) -> bool;
398407
}
399408

400-
impl<'a> StrAsciiExt for &'a str {
409+
impl<'a> AsciiExt<String> for &'a str {
401410
#[inline]
402411
fn to_ascii_upper(&self) -> String {
403-
unsafe { str_copy_map_bytes(*self, ASCII_UPPER_MAP) }
412+
// Vec<u8>::to_ascii_upper() preserves the UTF-8 invariant.
413+
unsafe { str::raw::from_utf8_owned(self.as_bytes().to_ascii_upper()) }
404414
}
405415

406416
#[inline]
407417
fn to_ascii_lower(&self) -> String {
408-
unsafe { str_copy_map_bytes(*self, ASCII_LOWER_MAP) }
418+
// Vec<u8>::to_ascii_lower() preserves the UTF-8 invariant.
419+
unsafe { str::raw::from_utf8_owned(self.as_bytes().to_ascii_lower()) }
409420
}
410421

411422
#[inline]
412423
fn eq_ignore_ascii_case(&self, other: &str) -> bool {
413-
self.len() == other.len() &&
414-
self.as_bytes().iter().zip(other.as_bytes().iter()).all(
415-
|(byte_self, byte_other)| {
416-
ASCII_LOWER_MAP[*byte_self as uint] ==
417-
ASCII_LOWER_MAP[*byte_other as uint]
418-
})
424+
self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
419425
}
420426
}
421427

422-
impl OwnedStrAsciiExt for String {
428+
impl OwnedAsciiExt for String {
423429
#[inline]
424430
fn into_ascii_upper(self) -> String {
425-
unsafe { str_map_bytes(self, ASCII_UPPER_MAP) }
431+
// Vec<u8>::into_ascii_upper() preserves the UTF-8 invariant.
432+
unsafe { str::raw::from_utf8_owned(self.into_bytes().into_ascii_upper()) }
426433
}
427434

428435
#[inline]
429436
fn into_ascii_lower(self) -> String {
430-
unsafe { str_map_bytes(self, ASCII_LOWER_MAP) }
437+
// Vec<u8>::into_ascii_lower() preserves the UTF-8 invariant.
438+
unsafe { str::raw::from_utf8_owned(self.into_bytes().into_ascii_lower()) }
431439
}
432440
}
433441

434-
#[inline]
435-
unsafe fn str_map_bytes(string: String, map: &'static [u8]) -> String {
436-
let mut bytes = string.into_bytes();
442+
impl<'a> AsciiExt<Vec<u8>> for &'a [u8] {
443+
#[inline]
444+
fn to_ascii_upper(&self) -> Vec<u8> {
445+
self.iter().map(|&byte| ASCII_UPPER_MAP[byte as uint]).collect()
446+
}
437447

438-
for b in bytes.mut_iter() {
439-
*b = map[*b as uint];
448+
#[inline]
449+
fn to_ascii_lower(&self) -> Vec<u8> {
450+
self.iter().map(|&byte| ASCII_LOWER_MAP[byte as uint]).collect()
440451
}
441452

442-
String::from_utf8(bytes).unwrap()
453+
#[inline]
454+
fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
455+
self.len() == other.len() &&
456+
self.iter().zip(other.iter()).all(
457+
|(byte_self, byte_other)| {
458+
ASCII_LOWER_MAP[*byte_self as uint] ==
459+
ASCII_LOWER_MAP[*byte_other as uint]
460+
})
461+
}
443462
}
444463

445-
#[inline]
446-
unsafe fn str_copy_map_bytes(string: &str, map: &'static [u8]) -> String {
447-
let mut s = String::from_str(string);
448-
for b in s.as_mut_bytes().mut_iter() {
449-
*b = map[*b as uint];
464+
impl OwnedAsciiExt for Vec<u8> {
465+
#[inline]
466+
fn into_ascii_upper(mut self) -> Vec<u8> {
467+
for byte in self.mut_iter() {
468+
*byte = ASCII_UPPER_MAP[*byte as uint];
469+
}
470+
self
471+
}
472+
473+
#[inline]
474+
fn into_ascii_lower(mut self) -> Vec<u8> {
475+
for byte in self.mut_iter() {
476+
*byte = ASCII_LOWER_MAP[*byte as uint];
477+
}
478+
self
450479
}
451-
s.into_string()
452480
}
453481

454-
static ASCII_LOWER_MAP: &'static [u8] = &[
482+
483+
pub static ASCII_LOWER_MAP: [u8, ..256] = [
455484
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
456485
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
457486
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
458487
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
459-
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
460-
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
461-
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
462-
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
463-
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
464-
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
465-
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
466-
0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
467-
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
468-
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
469-
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
470-
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
488+
b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
489+
b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
490+
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
491+
b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
492+
b'@',
493+
494+
b'a', b'b', b'c', b'd', b'e', b'f', b'g',
495+
b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
496+
b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
497+
b'x', b'y', b'z',
498+
499+
b'[', b'\\', b']', b'^', b'_',
500+
b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
501+
b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
502+
b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
503+
b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
471504
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
472505
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
473506
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
@@ -486,23 +519,27 @@ static ASCII_LOWER_MAP: &'static [u8] = &[
486519
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
487520
];
488521

489-
static ASCII_UPPER_MAP: &'static [u8] = &[
522+
pub static ASCII_UPPER_MAP: [u8, ..256] = [
490523
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
491524
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
492525
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
493526
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
494-
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
495-
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
496-
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
497-
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
498-
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
499-
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
500-
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
501-
0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
502-
0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
503-
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
504-
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
505-
0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
527+
b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
528+
b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
529+
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
530+
b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
531+
b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
532+
b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
533+
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
534+
b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
535+
b'`',
536+
537+
b'A', b'B', b'C', b'D', b'E', b'F', b'G',
538+
b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
539+
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
540+
b'X', b'Y', b'Z',
541+
542+
b'{', b'|', b'}', b'~', 0x7f,
506543
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
507544
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
508545
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,

‎src/test/run-pass/issue-10683.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use std::ascii::StrAsciiExt;
11+
use std::ascii::AsciiExt;
1212

1313
static NAME: &'static str = "hello world";
1414

0 commit comments

Comments
 (0)