Skip to content

Fix uint overflow bugs in std::{at_vec, vec, str} #9108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions src/libstd/at_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,16 @@ pub mod raw {
// Implementation detail. Shouldn't be public
#[allow(missing_doc)]
pub fn reserve_raw(ty: *TyDesc, ptr: *mut *mut Box<Vec<()>>, n: uint) {

// check for `uint` overflow
unsafe {
let size_in_bytes = n * (*ty).size;
if size_in_bytes > (**ptr).data.alloc {
let total_size = size_in_bytes + sys::size_of::<Vec<()>>();
if n > (**ptr).data.alloc / (*ty).size {
let alloc = n * (*ty).size;
let total_size = alloc + sys::size_of::<Vec<()>>();
if alloc / (*ty).size != n || total_size < alloc {
fail!("vector size is too large: %u", n);
}
(*ptr) = local_realloc(*ptr as *(), total_size) as *mut Box<Vec<()>>;
(**ptr).data.alloc = size_in_bytes;
(**ptr).data.alloc = alloc;
}
}

Expand Down
12 changes: 11 additions & 1 deletion src/libstd/num/uint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,17 @@ pub fn next_power_of_two(n: uint) -> uint {
let mut tmp: uint = n - 1u;
let mut shift: uint = 1u;
while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; }
return tmp + 1u;
tmp + 1u
}

/// Returns the smallest power of 2 greater than or equal to `n`
#[inline]
pub fn next_power_of_two_opt(n: uint) -> Option<uint> {
let halfbits: uint = sys::size_of::<uint>() * 4u;
let mut tmp: uint = n - 1u;
let mut shift: uint = 1u;
while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; }
tmp.checked_add(&1)
}

#[cfg(target_word_size = "32")]
Expand Down
2 changes: 1 addition & 1 deletion src/libstd/rt/io/extensions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ impl<T: Reader> ReaderUtil for T {
let start_len = buf.len();
let mut total_read = 0;

buf.reserve_at_least(start_len + len);
buf.reserve_additional(len);
vec::raw::set_len(buf, start_len + len);

do (|| {
Expand Down
183 changes: 78 additions & 105 deletions src/libstd/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ use char;
use char::Char;
use clone::{Clone, DeepClone};
use container::{Container, Mutable};
use num::Times;
use iter::{Iterator, FromIterator, Extendable};
use iter::{Iterator, FromIterator, Extendable, range};
use iter::{Filter, AdditiveIterator, Map};
use iter::{Invert, DoubleEndedIterator, ExactSize};
use libc;
Expand All @@ -33,7 +32,6 @@ use ptr;
use ptr::RawPtr;
use to_str::ToStr;
use uint;
use unstable::raw::{Repr, Slice};
use vec;
use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
use default::Default;
Expand Down Expand Up @@ -182,23 +180,15 @@ impl<'self, S: Str> StrVector for &'self [S] {
fn concat(&self) -> ~str {
if self.is_empty() { return ~""; }

// `len` calculation may overflow but push_str but will check boundaries
let len = self.iter().map(|s| s.as_slice().len()).sum();

let mut s = with_capacity(len);
let mut result = with_capacity(len);

unsafe {
do s.as_mut_buf |buf, _| {
let mut buf = buf;
for ss in self.iter() {
do ss.as_slice().as_imm_buf |ssbuf, sslen| {
ptr::copy_memory(buf, ssbuf, sslen);
buf = buf.offset(sslen as int);
}
}
}
raw::set_len(&mut s, len);
for s in self.iter() {
result.push_str(s.as_slice())
}
s
result
}

/// Concatenate a vector of strings, placing a given separator between each.
Expand All @@ -209,34 +199,21 @@ impl<'self, S: Str> StrVector for &'self [S] {
if sep.is_empty() { return self.concat(); }

// this is wrong without the guarantee that `self` is non-empty
// `len` calculation may overflow but push_str but will check boundaries
let len = sep.len() * (self.len() - 1)
+ self.iter().map(|s| s.as_slice().len()).sum();
let mut s = ~"";
let mut result = with_capacity(len);
let mut first = true;

s.reserve(len);

unsafe {
do s.as_mut_buf |buf, _| {
do sep.as_imm_buf |sepbuf, seplen| {
let mut buf = buf;
for ss in self.iter() {
do ss.as_slice().as_imm_buf |ssbuf, sslen| {
if first {
first = false;
} else {
ptr::copy_memory(buf, sepbuf, seplen);
buf = buf.offset(seplen as int);
}
ptr::copy_memory(buf, ssbuf, sslen);
buf = buf.offset(sslen as int);
}
}
}
for s in self.iter() {
if first {
first = false;
} else {
result.push_str(sep);
}
raw::set_len(&mut s, len);
result.push_str(s.as_slice());
}
s
result
}
}

Expand Down Expand Up @@ -959,7 +936,6 @@ static TAG_CONT_U8: u8 = 128u8;

/// Unsafe operations
pub mod raw {
use option::Some;
use cast;
use libc;
use ptr;
Expand Down Expand Up @@ -1062,21 +1038,22 @@ pub mod raw {
}
}

/// Appends a byte to a string. (Not UTF-8 safe).
/// Appends a byte to a string.
/// The caller must preserve the valid UTF-8 property.
#[inline]
pub unsafe fn push_byte(s: &mut ~str, b: u8) {
let v: &mut ~[u8] = cast::transmute(s);
v.push(b);
as_owned_vec(s).push(b)
}

/// Appends a vector of bytes to a string. (Not UTF-8 safe).
unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
let new_len = s.len() + bytes.len();
s.reserve_at_least(new_len);
for byte in bytes.iter() { push_byte(&mut *s, *byte); }
/// Appends a vector of bytes to a string.
/// The caller must preserve the valid UTF-8 property.
#[inline]
pub unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
vec::bytes::push_bytes(as_owned_vec(s), bytes);
}

/// Removes the last byte from a string and returns it. (Not UTF-8 safe).
/// Removes the last byte from a string and returns it.
/// The caller must preserve the valid UTF-8 property.
pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
let len = s.len();
assert!((len > 0u));
Expand All @@ -1085,7 +1062,8 @@ pub mod raw {
return b;
}

/// Removes the first byte from a string and returns it. (Not UTF-8 safe).
/// Removes the first byte from a string and returns it.
/// The caller must preserve the valid UTF-8 property.
pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
let len = s.len();
assert!((len > 0u));
Expand All @@ -1094,15 +1072,21 @@ pub mod raw {
return b;
}

/// Access the str in its vector representation.
/// The caller must preserve the valid UTF-8 property when modifying.
#[inline]
pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
cast::transmute(s)
}

/// Sets the length of a string
///
/// This will explicitly set the size of the string, without actually
/// modifying its buffers, so it is up to the caller to ensure that
/// the string is actually the specified size.
#[inline]
pub unsafe fn set_len(s: &mut ~str, new_len: uint) {
let v: &mut ~[u8] = cast::transmute(s);
vec::raw::set_len(v, new_len)
vec::raw::set_len(as_owned_vec(s), new_len)
}

/// Sets the length of a string
Expand Down Expand Up @@ -2053,22 +2037,11 @@ impl<'self> StrSlice<'self> for &'self str {

/// Given a string, make a new string with repeated copies of it.
fn repeat(&self, nn: uint) -> ~str {
do self.as_imm_buf |buf, len| {
let mut ret = with_capacity(nn * len);

unsafe {
do ret.as_mut_buf |rbuf, _len| {
let mut rbuf = rbuf;

do nn.times {
ptr::copy_memory(rbuf, buf, len);
rbuf = rbuf.offset(len as int);
}
}
raw::set_len(&mut ret, nn * len);
}
ret
let mut ret = with_capacity(nn * self.len());
for _ in range(0, nn) {
ret.push_str(*self);
}
ret
}

/// Retrieves the first character from a string slice and returns
Expand Down Expand Up @@ -2191,54 +2164,35 @@ impl OwnedStr for ~str {
/// Appends a string slice to the back of a string, without overallocating
#[inline]
fn push_str_no_overallocate(&mut self, rhs: &str) {
unsafe {
let llen = self.len();
let rlen = rhs.len();
self.reserve(llen + rlen);
do self.as_imm_buf |lbuf, _llen| {
do rhs.as_imm_buf |rbuf, _rlen| {
let dst = ptr::offset(lbuf, llen as int);
let dst = cast::transmute_mut_unsafe(dst);
ptr::copy_memory(dst, rbuf, rlen);
}
}
raw::set_len(self, llen + rlen);
}
let new_cap = self.len() + rhs.len();
self.reserve(new_cap);
self.push_str(rhs);
}

/// Appends a string slice to the back of a string
#[inline]
fn push_str(&mut self, rhs: &str) {
unsafe {
let llen = self.len();
let rlen = rhs.len();
self.reserve_at_least(llen + rlen);
do self.as_imm_buf |lbuf, _llen| {
do rhs.as_imm_buf |rbuf, _rlen| {
let dst = ptr::offset(lbuf, llen as int);
let dst = cast::transmute_mut_unsafe(dst);
ptr::copy_memory(dst, rbuf, rlen);
}
}
raw::set_len(self, llen + rlen);
raw::push_bytes(self, rhs.as_bytes());
}
}

/// Appends a character to the back of a string
#[inline]
fn push_char(&mut self, c: char) {
let cur_len = self.len();
self.reserve_at_least(cur_len + 4); // may use up to 4 bytes

// Attempt to not use an intermediate buffer by just pushing bytes
// directly onto this string.
// may use up to 4 bytes.
unsafe {
let v = self.repr();
let len = c.encode_utf8(cast::transmute(Slice {
data: ((&(*v).data) as *u8).offset(cur_len as int),
len: 4,
}));
raw::set_len(self, cur_len + len);
raw::as_owned_vec(self).reserve_additional(4);

// Attempt to not use an intermediate buffer by just pushing bytes
// directly onto this string.
let used = do self.as_mut_buf |buf, _| {
do vec::raw::mut_buf_as_slice(buf.offset(cur_len as int), 4) |slc| {
c.encode_utf8(slc)
}
};
raw::set_len(self, cur_len + used);
}
}

Expand Down Expand Up @@ -2298,8 +2252,7 @@ impl OwnedStr for ~str {
#[inline]
fn reserve(&mut self, n: uint) {
unsafe {
let v: &mut ~[u8] = cast::transmute(self);
(*v).reserve(n);
raw::as_owned_vec(self).reserve(n)
}
}

Expand All @@ -2321,7 +2274,7 @@ impl OwnedStr for ~str {
/// * n - The number of bytes to reserve space for
#[inline]
fn reserve_at_least(&mut self, n: uint) {
self.reserve(uint::next_power_of_two(n))
self.reserve(uint::next_power_of_two_opt(n).unwrap_or(n))
}

/// Returns the number of single-byte characters the string can hold without
Expand Down Expand Up @@ -2351,8 +2304,9 @@ impl OwnedStr for ~str {

#[inline]
fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
let v: &mut ~[u8] = unsafe { cast::transmute(self) };
v.as_mut_buf(f)
unsafe {
raw::as_owned_vec(self).as_mut_buf(f)
}
}
}

Expand Down Expand Up @@ -3897,4 +3851,23 @@ mod bench {
with_capacity(100);
}
}

#[bench]
fn bench_push_str(bh: &mut BenchHarness) {
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
do bh.iter {
let mut r = ~"";
r.push_str(s);
}
}

#[bench]
fn bench_connect(bh: &mut BenchHarness) {
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
let sep = "→";
let v = [s, s, s, s, s, s, s, s, s, s];
do bh.iter {
assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
}
}
}
Loading