Skip to content

Commit

Permalink
Fix parsing floats panics and bugs (#1110)
Browse files Browse the repository at this point in the history
* Fix parsing float panic

* Fix float parsing functions

* Fix parseFloat bugs

* Fix parseInt and parseFloat bugs

* Fix clippy

* Add todo comment

* Update boa/src/builtins/string/mod.rs

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>
  • Loading branch information
2 people authored and Razican committed May 22, 2021
1 parent 548dfbb commit 2f2daec
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 54 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions boa/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ bitflags = "1.2.1"
indexmap = "1.6.1"
ryu-js = "0.2.1"
chrono = "0.4.19"
fast-float = "0.2.0"

# Optional Dependencies
measureme = { version = "9.0.0", optional = true }
Expand Down
51 changes: 29 additions & 22 deletions boa/src/builtins/number/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number

use super::function::make_builtin_fn;
use super::string::is_trimmable_whitespace;
use crate::{
builtins::BuiltIn,
object::{ConstructorBuilder, ObjectData, PROTOTYPE},
Expand Down Expand Up @@ -673,7 +674,7 @@ impl Number {
let input_string = val.to_string(context)?;

// 2. Let S be ! TrimString(inputString, start).
let mut var_s = input_string.trim();
let mut var_s = input_string.trim_start_matches(is_trimmable_whitespace);

// 3. Let sign be 1.
// 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS),
Expand Down Expand Up @@ -789,31 +790,37 @@ impl Number {
///
/// [spec]: https://tc39.es/ecma262/#sec-parsefloat-string
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
pub(crate) fn parse_float(_: &Value, args: &[Value], _ctx: &mut Context) -> Result<Value> {
pub(crate) fn parse_float(_: &Value, args: &[Value], context: &mut Context) -> Result<Value> {
if let Some(val) = args.get(0) {
match val {
Value::String(s) => {
if let Ok(i) = s.parse::<i32>() {
// Attempt to parse an integer first so that it can be stored as an integer
// to improve performance
Ok(Value::integer(i))
} else if let Ok(f) = s.parse::<f64>() {
Ok(Value::rational(f))
} else {
// String can't be parsed.
Ok(Value::from(f64::NAN))
}
}
Value::Integer(i) => Ok(Value::integer(*i)),
Value::Rational(f) => Ok(Value::rational(*f)),
_ => {
// Wrong argument type to parseFloat.
Ok(Value::from(f64::NAN))
}
let input_string = val.to_string(context)?;
let s = input_string.trim_start_matches(is_trimmable_whitespace);
let s_prefix_lower = s.chars().take(4).collect::<String>().to_ascii_lowercase();

// TODO: write our own lexer to match syntax StrDecimalLiteral
if s.starts_with("Infinity") || s.starts_with("+Infinity") {
Ok(Value::from(f64::INFINITY))
} else if s.starts_with("-Infinity") {
Ok(Value::from(f64::NEG_INFINITY))
} else if s_prefix_lower.starts_with("inf")
|| s_prefix_lower.starts_with("+inf")
|| s_prefix_lower.starts_with("-inf")
{
// Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
Ok(Value::nan())
} else {
Ok(fast_float::parse_partial::<f64, _>(s)
.map(|(f, len)| {
if len > 0 {
Value::rational(f)
} else {
Value::nan()
}
})
.unwrap_or_else(|_| Value::nan()))
}
} else {
// Not enough arguments to parseFloat.
Ok(Value::from(f64::NAN))
Ok(Value::nan())
}
}

Expand Down
50 changes: 24 additions & 26 deletions boa/src/builtins/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,27 @@ pub(crate) fn code_point_at(string: RcString, position: i32) -> Option<(u32, u8,
Some((cp, 2, false))
}

/// Helper function to check if a `char` is trimmable.
#[inline]
pub(crate) fn is_trimmable_whitespace(c: char) -> bool {
// The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
//
// Rust uses \p{White_Space} by default, which also includes:
// `\u{0085}' (next line)
// And does not include:
// '\u{FEFF}' (zero width non-breaking space)
// Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
matches!(
c,
'\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
// Unicode Space_Separator category
'\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
// Line terminators: https://tc39.es/ecma262/#sec-line-terminators
'\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
)
}

fn is_leading_surrogate(value: u16) -> bool {
(0xD800..=0xDBFF).contains(&value)
}
Expand Down Expand Up @@ -968,27 +989,6 @@ impl String {
Self::string_pad(primitive, max_length, fill_string, true)
}

/// Helper function to check if a `char` is trimmable.
#[inline]
fn is_trimmable_whitespace(c: char) -> bool {
// The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
//
// Rust uses \p{White_Space} by default, which also includes:
// `\u{0085}' (next line)
// And does not include:
// '\u{FEFF}' (zero width non-breaking space)
// Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
matches!(
c,
'\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
// Unicode Space_Seperator category
'\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
// Line terminators: https://tc39.es/ecma262/#sec-line-terminators
'\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
)
}

/// String.prototype.trim()
///
/// The `trim()` method removes whitespace from both ends of a string.
Expand All @@ -1004,9 +1004,7 @@ impl String {
pub(crate) fn trim(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
let this = this.require_object_coercible(context)?;
let string = this.to_string(context)?;
Ok(Value::from(
string.trim_matches(Self::is_trimmable_whitespace),
))
Ok(Value::from(string.trim_matches(is_trimmable_whitespace)))
}

/// `String.prototype.trimStart()`
Expand All @@ -1024,7 +1022,7 @@ impl String {
pub(crate) fn trim_start(this: &Value, _: &[Value], context: &mut Context) -> Result<Value> {
let string = this.to_string(context)?;
Ok(Value::from(
string.trim_start_matches(Self::is_trimmable_whitespace),
string.trim_start_matches(is_trimmable_whitespace),
))
}

Expand All @@ -1044,7 +1042,7 @@ impl String {
let this = this.require_object_coercible(context)?;
let string = this.to_string(context)?;
Ok(Value::from(
string.trim_end_matches(Self::is_trimmable_whitespace),
string.trim_end_matches(is_trimmable_whitespace),
))
}

Expand Down
4 changes: 2 additions & 2 deletions boa/src/syntax/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use crate::{
lexer::{token::Numeric, Token},
},
};
use std::io::Read;
use std::str;
use std::{io::Read, str::FromStr};

/// Number literal lexing.
///
Expand Down Expand Up @@ -381,7 +381,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
)
}
NumericKind::Rational /* base: 10 */ => {
let val = f64::from_str(num_str).expect("Failed to parse float after checks");
let val: f64 = fast_float::parse(num_str).expect("Failed to parse float after checks");
let int_val = val as i32;

// The truncated float should be identically to the non-truncated float for the conversion to be loss-less,
Expand Down
26 changes: 22 additions & 4 deletions boa/src/value/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ mod tests;
use crate::{
builtins::{
number::{f64_to_int32, f64_to_uint32},
string::is_trimmable_whitespace,
BigInt, Number,
},
object::{GcObject, Object, ObjectData},
Expand Down Expand Up @@ -809,12 +810,29 @@ impl Value {
Value::Null => Ok(0.0),
Value::Undefined => Ok(f64::NAN),
Value::Boolean(b) => Ok(if b { 1.0 } else { 0.0 }),
// TODO: this is probably not 100% correct, see https://tc39.es/ecma262/#sec-tonumber-applied-to-the-string-type
Value::String(ref string) => {
if string.trim().is_empty() {
return Ok(0.0);
let string = string.trim_matches(is_trimmable_whitespace);

// TODO: write our own lexer to match syntax StrDecimalLiteral
match string {
"" => Ok(0.0),
"Infinity" | "+Infinity" => Ok(f64::INFINITY),
"-Infinity" => Ok(f64::NEG_INFINITY),
_ if matches!(
string
.chars()
.take(4)
.collect::<String>()
.to_ascii_lowercase()
.as_str(),
"inf" | "+inf" | "-inf" | "nan" | "+nan" | "-nan"
) =>
{
// Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
Ok(f64::NAN)
}
_ => Ok(fast_float::parse(string).unwrap_or(f64::NAN)),
}
Ok(string.parse().unwrap_or(f64::NAN))
}
Value::Rational(number) => Ok(number),
Value::Integer(integer) => Ok(f64::from(integer)),
Expand Down

0 comments on commit 2f2daec

Please sign in to comment.