auto merge of #5776 : dbaupp/rust/syntax-parse-large-number, r=thestinger

bors · bors · commit 913ca088fc59 · 2013-04-08T12:06:58.000-07:00
Addresses #5544 and #5770, as well as a comment left in the documentation of `from_str_bytes_common`, so that there is now an option to ignore underscores.
diff --git a/src/libcore/num/f32.rs b/src/libcore/num/f32.rs
@@ -507,7 +507,7 @@ impl num::ToStrRadix for f32 {
 #[inline(always)]
 pub fn from_str(num: &str) -> Option<f32> {
     strconv::from_str_common(num, 10u, true, true, true,
-                             strconv::ExpDec, false)
+                             strconv::ExpDec, false, false)
 }
 
 /**
@@ -540,7 +540,7 @@ pub fn from_str(num: &str) -> Option<f32> {
 #[inline(always)]
 pub fn from_str_hex(num: &str) -> Option<f32> {
     strconv::from_str_common(num, 16u, true, true, true,
-                             strconv::ExpBin, false)
+                             strconv::ExpBin, false, false)
 }
 
 /**
@@ -565,7 +565,7 @@ pub fn from_str_hex(num: &str) -> Option<f32> {
 #[inline(always)]
 pub fn from_str_radix(num: &str, rdx: uint) -> Option<f32> {
     strconv::from_str_common(num, rdx, true, true, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 impl from_str::FromStr for f32 {
diff --git a/src/libcore/num/f64.rs b/src/libcore/num/f64.rs
@@ -529,7 +529,7 @@ impl num::ToStrRadix for f64 {
 #[inline(always)]
 pub fn from_str(num: &str) -> Option<f64> {
     strconv::from_str_common(num, 10u, true, true, true,
-                             strconv::ExpDec, false)
+                             strconv::ExpDec, false, false)
 }
 
 /**
@@ -562,7 +562,7 @@ pub fn from_str(num: &str) -> Option<f64> {
 #[inline(always)]
 pub fn from_str_hex(num: &str) -> Option<f64> {
     strconv::from_str_common(num, 16u, true, true, true,
-                             strconv::ExpBin, false)
+                             strconv::ExpBin, false, false)
 }
 
 /**
@@ -587,7 +587,7 @@ pub fn from_str_hex(num: &str) -> Option<f64> {
 #[inline(always)]
 pub fn from_str_radix(num: &str, rdx: uint) -> Option<f64> {
     strconv::from_str_common(num, rdx, true, true, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 impl from_str::FromStr for f64 {
diff --git a/src/libcore/num/float.rs b/src/libcore/num/float.rs
@@ -242,7 +242,7 @@ impl num::ToStrRadix for float {
 #[inline(always)]
 pub fn from_str(num: &str) -> Option<float> {
     strconv::from_str_common(num, 10u, true, true, true,
-                             strconv::ExpDec, false)
+                             strconv::ExpDec, false, false)
 }
 
 /**
@@ -275,7 +275,7 @@ pub fn from_str(num: &str) -> Option<float> {
 #[inline(always)]
 pub fn from_str_hex(num: &str) -> Option<float> {
     strconv::from_str_common(num, 16u, true, true, true,
-                             strconv::ExpBin, false)
+                             strconv::ExpBin, false, false)
 }
 
 /**
@@ -300,7 +300,7 @@ pub fn from_str_hex(num: &str) -> Option<float> {
 #[inline(always)]
 pub fn from_str_radix(num: &str, radix: uint) -> Option<float> {
     strconv::from_str_common(num, radix, true, true, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 impl from_str::FromStr for float {
diff --git a/src/libcore/num/int-template.rs b/src/libcore/num/int-template.rs
@@ -202,21 +202,21 @@ impl ops::Neg<T> for T {
 #[inline(always)]
 pub fn from_str(s: &str) -> Option<T> {
     strconv::from_str_common(s, 10u, true, false, false,
-                         strconv::ExpNone, false)
+                         strconv::ExpNone, false, false)
 }
 
 /// Parse a string as a number in the given base.
 #[inline(always)]
 pub fn from_str_radix(s: &str, radix: uint) -> Option<T> {
     strconv::from_str_common(s, radix, true, false, false,
-                         strconv::ExpNone, false)
+                         strconv::ExpNone, false, false)
 }
 
 /// Parse a byte slice as a number in the given base.
 #[inline(always)]
 pub fn parse_bytes(buf: &[u8], radix: uint) -> Option<T> {
     strconv::from_str_bytes_common(buf, radix, true, false, false,
-                               strconv::ExpNone, false)
+                               strconv::ExpNone, false, false)
 }
 
 impl FromStr for T {
diff --git a/src/libcore/num/strconv.rs b/src/libcore/num/strconv.rs
@@ -429,6 +429,8 @@ priv static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u;
  *                  `FFp128`. The exponent string itself is always base 10.
  *                  Can conflict with `radix`, see Failure.
  * - `empty_zero` - Whether to accept a empty `buf` as a 0 or not.
+ * - `ignore_underscores` - Whether all underscores within the string should
+ *                          be ignored.
  *
  * # Return value
  * Returns `Some(n)` if `buf` parses to a number n without overflowing, and
@@ -443,16 +445,13 @@ priv static DIGIT_E_RADIX: uint = ('e' as uint) - ('a' as uint) + 11u;
  *   between digit and exponent sign `'p'`.
  * - Fails if `radix` > 18 and `special == true` due to conflict
  *   between digit and lowest first character in `inf` and `NaN`, the `'i'`.
- *
- * # Possible improvements
- * - Could accept option to allow ignoring underscores, allowing for numbers
- *   formated like `FF_AE_FF_FF`.
  */
-pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
+pub fn from_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+
                                     Mul<T,T>+Sub<T,T>+Neg<T>+Add<T,T>+
                                     NumStrConv>(
         buf: &[u8], radix: uint, negative: bool, fractional: bool,
-        special: bool, exponent: ExponentFormat, empty_zero: bool
+        special: bool, exponent: ExponentFormat, empty_zero: bool,
+        ignore_underscores: bool
         ) -> Option<T> {
     match exponent {
         ExpDec if radix >= DIGIT_E_RADIX       // decimal exponent 'e'
@@ -531,12 +530,16 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
                     accum -= cast(digit as int);
                 }
 
-                // Detect overflow by comparing to last value
-                if accum_positive && accum < last_accum { return None; }
-                if !accum_positive && accum > last_accum { return None; }
+                // Detect overflow by comparing to last value, except
+                // if we've not seen any non-zero digits.
+                if last_accum != _0 {
+                    if accum_positive && accum <= last_accum { return None; }
+                    if !accum_positive && accum >= last_accum { return None; }
+                }
                 last_accum = accum;
             }
             None => match c {
+                '_' if ignore_underscores => {}
                 'e' | 'E' | 'p' | 'P' => {
                     exp_found = true;
                     break;                       // start of exponent
@@ -580,6 +583,7 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
                     last_accum = accum;
                 }
                 None => match c {
+                    '_' if ignore_underscores => {}
                     'e' | 'E' | 'p' | 'P' => {
                         exp_found = true;
                         break;                   // start of exponent
@@ -607,6 +611,7 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
     if exp_found {
         let c = buf[i] as char;
         let base = match (c, exponent) {
+            // c is never _ so don't need to handle specially
             ('e', ExpDec) | ('E', ExpDec) => 10u,
             ('p', ExpBin) | ('P', ExpBin) => 2u,
             _ => return None // char doesn't fit given exponent format
@@ -615,7 +620,8 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
         // parse remaining bytes as decimal integer,
         // skipping the exponent char
         let exp: Option<int> = from_str_bytes_common(
-            buf.slice(i+1, len), 10, true, false, false, ExpNone, false);
+            buf.slice(i+1, len), 10, true, false, false, ExpNone, false,
+            ignore_underscores);
 
         match exp {
             Some(exp_pow) => {
@@ -637,11 +643,44 @@ pub fn from_str_bytes_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+
  * `from_str_bytes_common()`, for details see there.
  */
 #[inline(always)]
-pub fn from_str_common<T:NumCast+Zero+One+Ord+Copy+Div<T,T>+Mul<T,T>+
+pub fn from_str_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+Mul<T,T>+
                               Sub<T,T>+Neg<T>+Add<T,T>+NumStrConv>(
         buf: &str, radix: uint, negative: bool, fractional: bool,
-        special: bool, exponent: ExponentFormat, empty_zero: bool
+        special: bool, exponent: ExponentFormat, empty_zero: bool,
+        ignore_underscores: bool
         ) -> Option<T> {
     from_str_bytes_common(str::to_bytes(buf), radix, negative,
-                            fractional, special, exponent, empty_zero)
+                          fractional, special, exponent, empty_zero,
+                          ignore_underscores)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use option::*;
+
+    #[test]
+    fn from_str_ignore_underscores() {
+        let s : Option<u8> = from_str_common("__1__", 2, false, false, false,
+                                             ExpNone, false, true);
+        assert_eq!(s, Some(1u8));
+
+        let n : Option<u8> = from_str_common("__1__", 2, false, false, false,
+                                             ExpNone, false, false);
+        assert_eq!(n, None);
+
+        let f : Option<f32> = from_str_common("_1_._1_e_1_", 10, false, true, false,
+                                              ExpDec, false, true);
+        assert_eq!(f, Some(1.1e1f32));
+    }
+
+    #[test]
+    fn from_str_issue5770() {
+        // try to parse 0b1_1111_1111 = 511 as a u8. Caused problems
+        // since 255*2+1 == 255 (mod 256) so the overflow wasn't
+        // detected.
+        let n : Option<u8> = from_str_common("111111111", 2, false, false, false,
+                                             ExpNone, false, false);
+        assert_eq!(n, None);
+    }
 }
diff --git a/src/libcore/num/uint-template.rs b/src/libcore/num/uint-template.rs
@@ -168,21 +168,21 @@ impl ops::Neg<T> for T {
 #[inline(always)]
 pub fn from_str(s: &str) -> Option<T> {
     strconv::from_str_common(s, 10u, false, false, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 /// Parse a string as a number in the given base.
 #[inline(always)]
 pub fn from_str_radix(s: &str, radix: uint) -> Option<T> {
     strconv::from_str_common(s, radix, false, false, false,
-                             strconv::ExpNone, false)
+                             strconv::ExpNone, false, false)
 }
 
 /// Parse a byte slice as a number in the given base.
 #[inline(always)]
 pub fn parse_bytes(buf: &[u8], radix: uint) -> Option<T> {
     strconv::from_str_bytes_common(buf, radix, false, false, false,
-                                   strconv::ExpNone, false)
+                                   strconv::ExpNone, false, false)
 }
 
 impl FromStr for T {
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
@@ -442,7 +442,11 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
         if str::len(num_str) == 0u {
             rdr.fatal(~"no valid digits found for number");
         }
-        let parsed = u64::from_str_radix(num_str, base as uint).get();
+        let parsed = match u64::from_str_radix(num_str, base as uint) {
+            Some(p) => p,
+            None => rdr.fatal(~"int literal is too large")
+        };
+
         match tp {
           either::Left(t) => return token::LIT_INT(parsed as i64, t),
           either::Right(t) => return token::LIT_UINT(parsed, t)
@@ -503,7 +507,10 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
         if str::len(num_str) == 0u {
             rdr.fatal(~"no valid digits found for number");
         }
-        let parsed = u64::from_str_radix(num_str, base as uint).get();
+        let parsed = match u64::from_str_radix(num_str, base as uint) {
+            Some(p) => p,
+            None => rdr.fatal(~"int literal is too large")
+        };
 
         debug!("lexing %s as an unsuffixed integer literal",
                num_str);
diff --git a/src/test/compile-fail/issue-5544-a.rs b/src/test/compile-fail/issue-5544-a.rs
@@ -0,0 +1,14 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+fn main() {
+    let _i = 18446744073709551616; // 2^64
+    //~^ ERROR int literal is too large
+}
diff --git a/src/test/compile-fail/issue-5544-b.rs b/src/test/compile-fail/issue-5544-b.rs
@@ -0,0 +1,14 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+fn main() {
+    let _i = 0xff_ffff_ffff_ffff_ffff;
+    //~^ ERROR int literal is too large
+}