Skip to content

Commit

Permalink
rustc: Remove &str indexing from the language.
Browse files Browse the repository at this point in the history
Being able to index into the bytes of a string encourages
poor UTF-8 hygiene. To get a view of `&[u8]` from either
a `String` or `&str` slice, use the `as_bytes()` method.

Closes rust-lang#12710.

[breaking-change]
  • Loading branch information
brson committed Jul 2, 2014
1 parent 44ec28c commit d21336e
Show file tree
Hide file tree
Showing 26 changed files with 101 additions and 87 deletions.
4 changes: 2 additions & 2 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1569,8 +1569,8 @@ mod tests {
let n2: uint = v.len();
assert_eq!(n1, n2);
while i < n1 {
let a: u8 = s1.as_slice()[i];
let b: u8 = s2.as_slice()[i];
let a: u8 = s1.as_bytes()[i];
let b: u8 = s2.as_bytes()[i];
debug!("{}", a);
debug!("{}", b);
assert_eq!(a, b);
Expand Down
2 changes: 1 addition & 1 deletion src/libcollections/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ impl String {
return None
}

let byte = self.as_slice()[len - 1];
let byte = self.as_bytes()[len - 1];
self.vec.set_len(len - 1);
Some(byte)
}
Expand Down
30 changes: 16 additions & 14 deletions src/libcore/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1743,7 +1743,7 @@ impl<'a> StrSlice<'a> for &'a str {
fn lines_any(&self) -> AnyLines<'a> {
self.lines().map(|line| {
let l = line.len();
if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
if l > 0 && line.as_bytes()[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
else { line }
})
}
Expand Down Expand Up @@ -1867,26 +1867,26 @@ impl<'a> StrSlice<'a> for &'a str {
fn is_char_boundary(&self, index: uint) -> bool {
if index == self.len() { return true; }
if index > self.len() { return false; }
let b = self[index];
let b = self.as_bytes()[index];
return b < 128u8 || b >= 192u8;
}

#[inline]
fn char_range_at(&self, i: uint) -> CharRange {
if self[i] < 128u8 {
return CharRange {ch: self[i] as char, next: i + 1 };
if self.as_bytes()[i] < 128u8 {
return CharRange {ch: self.as_bytes()[i] as char, next: i + 1 };
}

// Multibyte case is a fn to allow char_range_at to inline cleanly
fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
let mut val = s[i] as u32;
let mut val = s.as_bytes()[i] as u32;
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
assert!((w != 0));

val = utf8_first_byte!(val, w);
val = utf8_acc_cont_byte!(val, s[i + 1]);
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }

return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
}
Expand All @@ -1899,23 +1899,25 @@ impl<'a> StrSlice<'a> for &'a str {
let mut prev = start;

prev = prev.saturating_sub(1);
if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
if self.as_bytes()[prev] < 128 {
return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
}

// Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
// while there is a previous byte == 10......
while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
while i > 0 && s.as_bytes()[i] & 192u8 == TAG_CONT_U8 {
i -= 1u;
}

let mut val = s[i] as u32;
let mut val = s.as_bytes()[i] as u32;
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
assert!((w != 0));

val = utf8_first_byte!(val, w);
val = utf8_acc_cont_byte!(val, s[i + 1]);
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }

return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
}
Expand Down
4 changes: 2 additions & 2 deletions src/libgetopts/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ impl Matches {
}

fn is_arg(arg: &str) -> bool {
arg.len() > 1 && arg[0] == '-' as u8
arg.len() > 1 && arg.as_bytes()[0] == '-' as u8
}

fn find_opt(opts: &[Opt], nm: Name) -> Option<uint> {
Expand Down Expand Up @@ -553,7 +553,7 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
} else {
let mut names;
let mut i_arg = None;
if cur.as_slice()[1] == '-' as u8 {
if cur.as_bytes()[1] == '-' as u8 {
let tail = cur.as_slice().slice(2, curlen);
let tail_eq: Vec<&str> = tail.split('=').collect();
if tail_eq.len() <= 1 {
Expand Down
10 changes: 5 additions & 5 deletions src/librustc/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,8 +657,8 @@ pub fn sanitize(s: &str) -> String {

// Underscore-qualify anything that didn't start as an ident.
if result.len() > 0u &&
result.as_slice()[0] != '_' as u8 &&
! char::is_XID_start(result.as_slice()[0] as char) {
result.as_bytes()[0] != '_' as u8 &&
! char::is_XID_start(result.as_bytes()[0] as char) {
return format!("_{}", result.as_slice());
}

Expand Down Expand Up @@ -737,9 +737,9 @@ pub fn mangle_exported_name(ccx: &CrateContext, path: PathElems,
let extra2 = id % EXTRA_CHARS.len();
let id = id / EXTRA_CHARS.len();
let extra3 = id % EXTRA_CHARS.len();
hash.push_char(EXTRA_CHARS[extra1] as char);
hash.push_char(EXTRA_CHARS[extra2] as char);
hash.push_char(EXTRA_CHARS[extra3] as char);
hash.push_char(EXTRA_CHARS.as_bytes()[extra1] as char);
hash.push_char(EXTRA_CHARS.as_bytes()[extra2] as char);
hash.push_char(EXTRA_CHARS.as_bytes()[extra3] as char);

exported_name(path,
hash.as_slice(),
Expand Down
6 changes: 3 additions & 3 deletions src/librustc/metadata/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ fn item_sized(item: ebml::Doc) -> ast::Sized {
fn item_method_sort(item: ebml::Doc) -> char {
let mut ret = 'r';
reader::tagged_docs(item, tag_item_trait_method_sort, |doc| {
ret = doc.as_str_slice()[0] as char;
ret = doc.as_str_slice().as_bytes()[0] as char;
false
});
ret
Expand Down Expand Up @@ -757,13 +757,13 @@ fn get_explicit_self(item: ebml::Doc) -> ast::ExplicitSelf_ {
let explicit_self_doc = reader::get_doc(item, tag_item_trait_method_explicit_self);
let string = explicit_self_doc.as_str_slice();

let explicit_self_kind = string[0];
let explicit_self_kind = string.as_bytes()[0];
match explicit_self_kind as char {
's' => ast::SelfStatic,
'v' => ast::SelfValue,
'~' => ast::SelfUniq,
// FIXME(#4846) expl. region
'&' => ast::SelfRegion(None, get_mutability(string[1])),
'&' => ast::SelfRegion(None, get_mutability(string.as_bytes()[1])),
_ => fail!("unknown self type code: `{}`", explicit_self_kind as char)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/librustc/middle/dead.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ struct DeadVisitor<'a> {
impl<'a> DeadVisitor<'a> {
fn should_warn_about_field(&mut self, node: &ast::StructField_) -> bool {
let (is_named, has_leading_underscore) = match node.ident() {
Some(ref ident) => (true, token::get_ident(*ident).get()[0] == ('_' as u8)),
Some(ref ident) => (true, token::get_ident(*ident).get().as_bytes()[0] == ('_' as u8)),
_ => (false, false)
};
let field_type = ty::node_id_to_type(self.tcx, node.id);
Expand Down
2 changes: 1 addition & 1 deletion src/librustc/middle/liveness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,7 @@ impl<'a> Liveness<'a> {

fn should_warn(&self, var: Variable) -> Option<String> {
let name = self.ir.variable_name(var);
if name.len() == 0 || name.as_slice()[0] == ('_' as u8) {
if name.len() == 0 || name.as_bytes()[0] == ('_' as u8) {
None
} else {
Some(name)
Expand Down
7 changes: 1 addition & 6 deletions src/librustc/middle/mem_categorization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ pub enum FieldName {
#[deriving(Clone, PartialEq, Eq, Hash)]
pub enum ElementKind {
VecElement,
StrElement,
OtherElement,
}

Expand Down Expand Up @@ -794,7 +793,7 @@ impl<'t,TYPER:Typer> MemCategorizationContext<'t,TYPER> {
//! - `derefs`: the deref number to be used for
//! the implicit index deref, if any (see above)

let element_ty = match ty::index(base_cmt.ty) {
let element_ty = match ty::array_element_ty(base_cmt.ty) {
Some(ref mt) => mt.ty,
None => {
self.tcx().sess.span_bug(
Expand Down Expand Up @@ -1137,9 +1136,6 @@ impl<'t,TYPER:Typer> MemCategorizationContext<'t,TYPER> {
cat_interior(_, InteriorElement(VecElement)) => {
"vec content".to_string()
}
cat_interior(_, InteriorElement(StrElement)) => {
"str content".to_string()
}
cat_interior(_, InteriorElement(OtherElement)) => {
"indexed content".to_string()
}
Expand Down Expand Up @@ -1320,7 +1316,6 @@ fn element_kind(t: ty::t) -> ElementKind {
ty::ty_rptr(_, ty::mt{ty:ty, ..}) |
ty::ty_uniq(ty) => match ty::get(ty).sty {
ty::ty_vec(_, None) => VecElement,
ty::ty_str => StrElement,
_ => OtherElement
},
ty::ty_vec(..) => VecElement,
Expand Down
15 changes: 15 additions & 0 deletions src/librustc/middle/ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2551,6 +2551,21 @@ pub fn deref(t: t, explicit: bool) -> Option<mt> {

// Returns the type of t[i]
pub fn index(t: t) -> Option<mt> {
match get(t).sty {
ty_vec(mt, Some(_)) => Some(mt),
ty_ptr(mt{ty: t, ..}) | ty_rptr(_, mt{ty: t, ..}) |
ty_box(t) | ty_uniq(t) => match get(t).sty {
ty_vec(mt, None) => Some(mt),
_ => None,
},
_ => None
}
}

// Returns the type of elements contained within an 'array-like' type.
// This is exactly the same as the above, except it supports strings,
// which can't actually be indexed.
pub fn array_element_ty(t: t) -> Option<mt> {
match get(t).sty {
ty_vec(mt, Some(_)) => Some(mt),
ty_ptr(mt{ty: t, ..}) | ty_rptr(_, mt{ty: t, ..}) |
Expand Down
4 changes: 2 additions & 2 deletions src/libstd/io/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1206,8 +1206,8 @@ mod test {
let mut cur = [0u8, .. 2];
for f in files {
let stem = f.filestem_str().unwrap();
let root = stem[0] - ('0' as u8);
let name = stem[1] - ('0' as u8);
let root = stem.as_bytes()[0] - ('0' as u8);
let name = stem.as_bytes()[1] - ('0' as u8);
assert!(cur[root as uint] < name);
cur[root as uint] = name;
}
Expand Down
46 changes: 25 additions & 21 deletions src/libstd/path/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,18 @@ impl GenericPathUnsafe for Path {
fn is_vol_abs(path: &str, prefix: Option<PathPrefix>) -> bool {
// assume prefix is Some(DiskPrefix)
let rest = path.slice_from(prefix_len(prefix));
!rest.is_empty() && rest[0].is_ascii() && is_sep(rest[0] as char)
!rest.is_empty() && rest.as_bytes()[0].is_ascii() && is_sep(rest.as_bytes()[0] as char)
}
fn shares_volume(me: &Path, path: &str) -> bool {
// path is assumed to have a prefix of Some(DiskPrefix)
let repr = me.repr.as_slice();
match me.prefix {
Some(DiskPrefix) => repr[0] == path[0].to_ascii().to_upper().to_byte(),
Some(VerbatimDiskPrefix) => repr[4] == path[0].to_ascii().to_upper().to_byte(),
Some(DiskPrefix) => {
repr.as_bytes()[0] == path.as_bytes()[0].to_ascii().to_upper().to_byte()
}
Some(VerbatimDiskPrefix) => {
repr.as_bytes()[4] == path.as_bytes()[0].to_ascii().to_upper().to_byte()
}
_ => false
}
}
Expand Down Expand Up @@ -279,7 +283,7 @@ impl GenericPathUnsafe for Path {
// if me is "C:" we don't want to add a path separator
match me.prefix {
Some(DiskPrefix) if me.repr.len() == plen => (),
_ if !(me.repr.len() > plen && me.repr.as_slice()[me.repr.len()-1] == SEP_BYTE) => {
_ if !(me.repr.len() > plen && me.repr.as_bytes()[me.repr.len()-1] == SEP_BYTE) => {
s.push_char(SEP);
}
_ => ()
Expand All @@ -302,7 +306,7 @@ impl GenericPathUnsafe for Path {
// absolute path, or cwd-relative and self is not same volume
replace_path(self, path, prefix);
}
None if !path.is_empty() && is_sep_(self.prefix, path[0]) => {
None if !path.is_empty() && is_sep_(self.prefix, path.as_bytes()[0]) => {
// volume-relative path
if self.prefix.is_some() {
// truncate self down to the prefix, then append
Expand Down Expand Up @@ -478,7 +482,7 @@ impl GenericPath for Path {
match self.prefix {
Some(DiskPrefix) => {
let rest = self.repr.as_slice().slice_from(self.prefix_len());
rest.len() > 0 && rest[0] == SEP_BYTE
rest.len() > 0 && rest.as_bytes()[0] == SEP_BYTE
}
Some(_) => true,
None => false
Expand Down Expand Up @@ -638,11 +642,11 @@ impl Path {
let s = match self.prefix {
Some(_) => {
let plen = self.prefix_len();
if repr.len() > plen && repr[plen] == SEP_BYTE {
if repr.len() > plen && repr.as_bytes()[plen] == SEP_BYTE {
repr.slice_from(plen+1)
} else { repr.slice_from(plen) }
}
None if repr[0] == SEP_BYTE => repr.slice_from(1),
None if repr.as_bytes()[0] == SEP_BYTE => repr.slice_from(1),
None => repr
};
let ret = s.split_terminator(SEP).map(Some);
Expand All @@ -665,14 +669,14 @@ impl Path {
match (self.prefix, other.prefix) {
(Some(DiskPrefix), Some(VerbatimDiskPrefix)) => {
self.is_absolute() &&
s_repr[0].to_ascii().eq_ignore_case(o_repr[4].to_ascii())
s_repr.as_bytes()[0].to_ascii().eq_ignore_case(o_repr.as_bytes()[4].to_ascii())
}
(Some(VerbatimDiskPrefix), Some(DiskPrefix)) => {
other.is_absolute() &&
s_repr[4].to_ascii().eq_ignore_case(o_repr[0].to_ascii())
s_repr.as_bytes()[4].to_ascii().eq_ignore_case(o_repr.as_bytes()[0].to_ascii())
}
(Some(VerbatimDiskPrefix), Some(VerbatimDiskPrefix)) => {
s_repr[4].to_ascii().eq_ignore_case(o_repr[4].to_ascii())
s_repr.as_bytes()[4].to_ascii().eq_ignore_case(o_repr.as_bytes()[4].to_ascii())
}
(Some(UNCPrefix(_,_)), Some(VerbatimUNCPrefix(_,_))) => {
s_repr.slice(2, self.prefix_len()) == o_repr.slice(8, other.prefix_len())
Expand Down Expand Up @@ -718,12 +722,12 @@ impl Path {
let mut comps = comps;
match (comps.is_some(),prefix) {
(false, Some(DiskPrefix)) => {
if s[0] >= 'a' as u8 && s[0] <= 'z' as u8 {
if s.as_bytes()[0] >= 'a' as u8 && s.as_bytes()[0] <= 'z' as u8 {
comps = Some(vec![]);
}
}
(false, Some(VerbatimDiskPrefix)) => {
if s[4] >= 'a' as u8 && s[0] <= 'z' as u8 {
if s.as_bytes()[4] >= 'a' as u8 && s.as_bytes()[0] <= 'z' as u8 {
comps = Some(vec![]);
}
}
Expand Down Expand Up @@ -778,12 +782,12 @@ impl Path {
let mut s = String::with_capacity(n);
match prefix {
Some(DiskPrefix) => {
s.push_char(prefix_[0].to_ascii().to_upper().to_char());
s.push_char(prefix_.as_bytes()[0].to_ascii().to_upper().to_char());
s.push_char(':');
}
Some(VerbatimDiskPrefix) => {
s.push_str(prefix_.slice_to(4));
s.push_char(prefix_[4].to_ascii().to_upper().to_char());
s.push_char(prefix_.as_bytes()[4].to_ascii().to_upper().to_char());
s.push_str(prefix_.slice_from(5));
}
Some(UNCPrefix(a,b)) => {
Expand Down Expand Up @@ -845,7 +849,7 @@ impl Path {

fn has_nonsemantic_trailing_slash(&self) -> bool {
is_verbatim(self) && self.repr.len() > self.prefix_len()+1 &&
self.repr.as_slice()[self.repr.len()-1] == SEP_BYTE
self.repr.as_bytes()[self.repr.len()-1] == SEP_BYTE
}

fn update_normalized<S: Str>(&mut self, s: S) {
Expand All @@ -861,7 +865,7 @@ impl Path {
/// but absolute within that volume.
#[inline]
pub fn is_vol_relative(path: &Path) -> bool {
path.prefix.is_none() && is_sep_byte(&path.repr.as_slice()[0])
path.prefix.is_none() && is_sep_byte(&path.repr.as_bytes()[0])
}

/// Returns whether the path is considered "cwd-relative", which means a path
Expand Down Expand Up @@ -991,8 +995,8 @@ fn parse_prefix<'a>(mut path: &'a str) -> Option<PathPrefix> {
} else {
// \\?\path
let idx = path.find('\\');
if idx == Some(2) && path[1] == ':' as u8 {
let c = path[0];
if idx == Some(2) && path.as_bytes()[1] == ':' as u8 {
let c = path.as_bytes()[0];
if c.is_ascii() && ::char::is_alphabetic(c as char) {
// \\?\C:\ path
return Some(VerbatimDiskPrefix);
Expand All @@ -1014,9 +1018,9 @@ fn parse_prefix<'a>(mut path: &'a str) -> Option<PathPrefix> {
}
_ => ()
}
} else if path.len() > 1 && path[1] == ':' as u8 {
} else if path.len() > 1 && path.as_bytes()[1] == ':' as u8 {
// C:
let c = path[0];
let c = path.as_bytes()[0];
if c.is_ascii() && ::char::is_alphabetic(c as char) {
return Some(DiskPrefix);
}
Expand Down
Loading

1 comment on commit d21336e

@brson
Copy link
Owner Author

@brson brson commented on d21336e Jul 2, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

r=alexcrichton

Please sign in to comment.