From 804476009ae762bdd97eca2641c77e845c59227c Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Fri, 5 Feb 2021 23:50:55 -0800 Subject: [PATCH 01/18] Add fromCharCode --- boa/src/builtins/number/conversions.rs | 9 +++++++ boa/src/builtins/number/mod.rs | 2 +- boa/src/builtins/string/mod.rs | 35 ++++++++++++++++++++++++-- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/boa/src/builtins/number/conversions.rs b/boa/src/builtins/number/conversions.rs index e35837e1e15..c431735e314 100644 --- a/boa/src/builtins/number/conversions.rs +++ b/boa/src/builtins/number/conversions.rs @@ -84,3 +84,12 @@ pub(crate) fn f64_to_int32(number: f64) -> i32 { pub(crate) fn f64_to_uint32(number: f64) -> u32 { f64_to_int32(number) as u32 } + +/// Converts a 64-bit floating point number to an `u16` according to the [`ToUint16`][ToUint16] algorithm. +/// +/// [ToUint16]: https://tc39.es/ecma262/#sec-touint16 +#[inline] +pub(crate) fn f64_to_uint16(number: f64) -> u16 { + let n = f64_to_int32(number) as u32; + (n % (1 << 16)) as u16 +} diff --git a/boa/src/builtins/number/mod.rs b/boa/src/builtins/number/mod.rs index e1753997bcd..f6bb2e90f70 100644 --- a/boa/src/builtins/number/mod.rs +++ b/boa/src/builtins/number/mod.rs @@ -26,7 +26,7 @@ use num_traits::{float::FloatCore, Num}; mod conversions; -pub(crate) use conversions::{f64_to_int32, f64_to_uint32}; +pub(crate) use conversions::{f64_to_int32, f64_to_uint32, f64_to_uint16}; #[cfg(test)] mod tests; diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index b7ee8b07b4e..3912b41af03 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -17,7 +17,9 @@ use crate::builtins::Symbol; use crate::object::PROTOTYPE; use crate::property::DataDescriptor; use crate::{ - builtins::{string::string_iterator::StringIterator, Array, BuiltIn, RegExp}, + builtins::{ + number::f64_to_uint16, string::string_iterator::StringIterator, Array, BuiltIn, RegExp, + }, object::{ConstructorBuilder, Object, ObjectData}, property::Attribute, value::{RcString, Value}, @@ -27,7 +29,8 @@ use regress::Regex; use std::{ char::{decode_utf16, from_u32}, cmp::{max, min}, - f64::NAN, + f32::INFINITY, + f64::{NAN, NEG_INFINITY}, string::String as StdString, }; @@ -133,6 +136,7 @@ impl BuiltIn for String { .method(Self::match_all, "matchAll", 1) .method(Self::replace, "replace", 2) .method(Self::iterator, (symbol_iterator, "[Symbol.iterator]"), 0) + .static_method(Self::from_char_code, "fromCharCode", 1) .build(); (Self::NAME, string_object.into(), Self::attribute()) @@ -1324,6 +1328,33 @@ impl String { RegExp::match_all(&re, this.to_string(context)?.to_string(), context) } + /// `String.fromCharCode(num1[, ...[, numN]])` + /// + /// The static String.fromCharCode() method returns a string created from the specified sequence of UTF-16 code units. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma262/#sec-string.fromcharcode + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCharCode + pub(crate) fn from_char_code( + _: &Value, + args: &[Value], + context: &mut Context, + ) -> Result { + let mut elements: Vec = Vec::new(); + + for arg in args.iter() { + let number = f64_to_uint16(arg.to_number(context)?); + elements.push(number); + } + + let string = std::string::String::from_utf16_lossy(&elements); + + Ok(Value::from(string)) + } + pub(crate) fn iterator(this: &Value, _: &[Value], context: &mut Context) -> Result { StringIterator::create_string_iterator(context, this.clone()) } From e4ceba1a3ec618c13623eb971f26be3096b8af09 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sat, 6 Feb 2021 00:32:30 -0800 Subject: [PATCH 02/18] Add fromCodePoint --- boa/src/builtins/number/mod.rs | 2 +- boa/src/builtins/string/mod.rs | 46 +++++++++++++++++++++++++++++----- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/boa/src/builtins/number/mod.rs b/boa/src/builtins/number/mod.rs index f6bb2e90f70..6ec225a2db2 100644 --- a/boa/src/builtins/number/mod.rs +++ b/boa/src/builtins/number/mod.rs @@ -26,7 +26,7 @@ use num_traits::{float::FloatCore, Num}; mod conversions; -pub(crate) use conversions::{f64_to_int32, f64_to_uint32, f64_to_uint16}; +pub(crate) use conversions::{f64_to_int32, f64_to_uint16, f64_to_uint32}; #[cfg(test)] mod tests; diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 3912b41af03..4dd40f8a681 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -18,7 +18,8 @@ use crate::object::PROTOTYPE; use crate::property::DataDescriptor; use crate::{ builtins::{ - number::f64_to_uint16, string::string_iterator::StringIterator, Array, BuiltIn, RegExp, + number::f64_to_uint16, string::string_iterator::StringIterator, Array, BuiltIn, Number, + RegExp, }, object::{ConstructorBuilder, Object, ObjectData}, property::Attribute, @@ -29,8 +30,8 @@ use regress::Regex; use std::{ char::{decode_utf16, from_u32}, cmp::{max, min}, - f32::INFINITY, - f64::{NAN, NEG_INFINITY}, + convert::TryFrom, + f64::NAN, string::String as StdString, }; @@ -137,6 +138,7 @@ impl BuiltIn for String { .method(Self::replace, "replace", 2) .method(Self::iterator, (symbol_iterator, "[Symbol.iterator]"), 0) .static_method(Self::from_char_code, "fromCharCode", 1) + .static_method(Self::from_code_point, "fromCodePoint", 1) .build(); (Self::NAME, string_object.into(), Self::attribute()) @@ -1257,7 +1259,7 @@ impl String { Some(separator) if separator.is_empty() => string .encode_utf16() // TODO: Support keeping invalid code point in string - .map(|cp| Value::from(std::string::String::from_utf16_lossy(&[cp]))) + .map(|cp| Value::from(StdString::from_utf16_lossy(&[cp]))) .take(limit) .collect(), Some(separator) => string @@ -1330,7 +1332,7 @@ impl String { /// `String.fromCharCode(num1[, ...[, numN]])` /// - /// The static String.fromCharCode() method returns a string created from the specified sequence of UTF-16 code units. + /// The static `String.fromCharCode()` method returns a string created from the specified sequence of UTF-16 code units. /// /// More information: /// - [ECMAScript reference][spec] @@ -1350,11 +1352,43 @@ impl String { elements.push(number); } - let string = std::string::String::from_utf16_lossy(&elements); + let string = StdString::from_utf16_lossy(&elements); Ok(Value::from(string)) } + /// `String.fromCodePoint(num1[, ...[, numN]])` + /// + /// The static `String.fromCodePoint()` method returns a string created by using the specified sequence of code points. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma262/#sec-string.fromcodepoint + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint + pub(crate) fn from_code_point( + _: &Value, + args: &[Value], + context: &mut Context, + ) -> Result { + let mut result = StdString::new(); + + for arg in args.iter() { + let number = arg.to_number(context)?; + + if !Number::is_float_integer(number) || number < 0f64 || number > (0x10FFFF as f64) { + return Err( + context.construct_range_error(format!("invalid code point: {}", number)) + ); + } + + result.push(char::try_from(number as u32).unwrap()); + } + + Ok(Value::from(result)) + } + pub(crate) fn iterator(this: &Value, _: &[Value], context: &mut Context) -> Result { StringIterator::create_string_iterator(context, this.clone()) } From c112659f5e233d5ff48cb18b50fc353bacc71dbd Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sat, 6 Feb 2021 02:39:12 -0800 Subject: [PATCH 03/18] Add String.raw --- boa/src/builtins/string/mod.rs | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 4dd40f8a681..5ed396183ca 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -139,6 +139,7 @@ impl BuiltIn for String { .method(Self::iterator, (symbol_iterator, "[Symbol.iterator]"), 0) .static_method(Self::from_char_code, "fromCharCode", 1) .static_method(Self::from_code_point, "fromCodePoint", 1) + .static_method(Self::raw, "raw", 1) .build(); (Self::NAME, string_object.into(), Self::attribute()) @@ -1389,6 +1390,48 @@ impl String { Ok(Value::from(result)) } + /// `String.raw(callSite, ...substitutions)` + /// + /// The static `String.raw()` method is a tag function of template literals. It's used to get the raw string form of template strings, + /// that is, substitutions (e.g. ${foo}) are processed, but escapes (e.g. \n) are not. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma262/#sec-string.raw + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/raw + pub(crate) fn raw(_: &Value, args: &[Value], context: &mut Context) -> Result { + let cooked: Value = args + .get(0) + .unwrap_or(&Value::undefined()) + .to_object(context)? + .into(); + let raw: Value = cooked.get_field("raw", context)?.to_object(context)?.into(); + let literal_segments = raw.get_field("length", context)?.to_length(context)?; + + if literal_segments == 0 { + return Ok(Value::from("")); + } + + let mut string_elements = StdString::new(); + for i in 0..literal_segments { + let seg = raw.get_field(i, context)?.to_string(context)?; + + string_elements.push_str(&seg); + + if i + 1 == literal_segments { + break; + } + + if let Some(next) = args.get(i + 1) { + string_elements.push_str(&next.to_string(context)?); + } + } + + Ok(Value::from(string_elements)) + } + pub(crate) fn iterator(this: &Value, _: &[Value], context: &mut Context) -> Result { StringIterator::create_string_iterator(context, this.clone()) } From 3911f17678cfc35b6f0f86fe1f830bc028dd1c33 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sun, 7 Feb 2021 00:52:23 -0800 Subject: [PATCH 04/18] Fix broken tests --- boa/src/builtins/string/tests.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index 0e5f0791600..ad8001d91a3 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -860,14 +860,14 @@ fn last_index_of_with_non_string_search_string_argument() { fn last_index_of_with_from_index_argument() { let mut context = Context::new(); assert_eq!(forward(&mut context, "''.lastIndexOf('x', 2)"), "-1"); - assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "-1"); - assert_eq!(forward(&mut context, "'abcxx'.lastIndexOf('x', 2)"), "4"); - assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "-1"); - assert_eq!(forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 2)"), "3"); + assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "0"); + assert_eq!(forward(&mut context, "'abcxx'.lastIndexOf('x', 2)"), "-1"); + assert_eq!(forward(&mut context, "'x'.lastIndexOf('x', 2)"), "0"); + assert_eq!(forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 2)"), "-1"); assert_eq!( forward(&mut context, "'µµµxµµµ'.lastIndexOf('x', 10000000)"), - "-1" + "3" ); } @@ -912,13 +912,13 @@ fn last_index_non_integer_position_argument() { ); assert_eq!( forward(&mut context, "'abcx'.lastIndexOf('x', new String('1'))"), - "3" + "-1" ); assert_eq!( forward(&mut context, "'abcx'.lastIndexOf('x', new String('100'))"), - "-1" + "3" ); - assert_eq!(forward(&mut context, "'abcx'.lastIndexOf('x', null)"), "3"); + assert_eq!(forward(&mut context, "'abcx'.lastIndexOf('x', null)"), "-1"); } #[test] From 41df390f547cbe85e558adc5c5e63c99f4803162 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sun, 7 Feb 2021 00:52:50 -0800 Subject: [PATCH 05/18] Fix and refactor string.prototype methods --- boa/src/builtins/string/mod.rs | 497 +++++++++++++++++---------------- 1 file changed, 263 insertions(+), 234 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 5ed396183ca..ea8810c905f 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -27,13 +27,7 @@ use crate::{ BoaProfiler, Context, Result, }; use regress::Regex; -use std::{ - char::{decode_utf16, from_u32}, - cmp::{max, min}, - convert::TryFrom, - f64::NAN, - string::String as StdString, -}; +use std::{convert::TryFrom, f64::NAN, string::String as StdString}; pub(crate) fn code_point_at(string: RcString, position: i32) -> Option<(u32, u8, bool)> { let size = string.encode_utf16().count() as i32; @@ -249,15 +243,16 @@ impl String { pub(crate) fn char_at(this: &Value, args: &[Value], context: &mut Context) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(Value::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning empty string when pos is obviously out of range - if pos < 0 || pos >= primitive_val.len() as i32 { + if position < 0f64 || position >= string.len() as f64 { return Ok("".into()); } @@ -265,8 +260,11 @@ impl String { // unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of // bytes is an O(1) operation. - if let Some(utf16_val) = primitive_val.encode_utf16().nth(pos as usize) { - Ok(Value::from(from_u32(utf16_val as u32).unwrap())) + if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { + // TODO: Full UTF-16 support + Ok(Value::from( + char::try_from(utf16_val as u32).unwrap_or('\u{FFFD}' /* replacement char */), + )) } else { Ok("".into()) } @@ -333,22 +331,23 @@ impl String { ) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(Value::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning NaN when pos is obviously out of range - if pos < 0 || pos >= primitive_val.len() as i32 { + if position < 0f64 || position >= string.len() as f64 { return Ok(Value::from(NAN)); } // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. // If there is no element at that index, the result is NaN - if let Some(utf16_val) = primitive_val.encode_utf16().nth(pos as usize) { + if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { Ok(Value::from(f64::from(utf16_val))) } else { Ok(Value::from(NAN)) @@ -395,23 +394,20 @@ impl String { let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; - if let Some(arg) = args.get(0) { - let n = arg.to_integer(context)?; - if n < 0.0 { - return context.throw_range_error("repeat count cannot be a negative number"); - } - - if n.is_infinite() { - return context.throw_range_error("repeat count cannot be infinity"); - } + if let Some(count) = args.get(0) { + let n = count.to_integer(context)?; - if n * (string.len() as f64) > Self::MAX_STRING_LENGTH { - return context - .throw_range_error("repeat count must not overflow maximum string length"); + if n < 0.0 { + context.throw_range_error("repeat count cannot be a negative number") + } else if n.is_infinite() { + context.throw_range_error("repeat count cannot be infinity") + } else if n * (string.len() as f64) > Self::MAX_STRING_LENGTH { + context.throw_range_error("repeat count must not overflow maximum string length") + } else { + Ok(Value::from(string.repeat(n as usize))) } - Ok(string.repeat(n as usize).into()) } else { - Ok("".into()) + Ok(Value::from("")) } } @@ -428,42 +424,50 @@ impl String { pub(crate) fn slice(this: &Value, args: &[Value], context: &mut Context) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. - let length = primitive_val.chars().count() as i32; + let length = string.chars().count(); - let start = args + let int_start = args .get(0) .cloned() .unwrap_or_else(Value::undefined) - .to_integer(context)? as i32; - let end = args - .get(1) - .cloned() - .unwrap_or_else(|| Value::integer(length)) - .to_integer(context)? as i32; + .to_integer(context)?; - let from = if start < 0 { - max(length.wrapping_add(start), 0) + let from = if int_start == f64::NEG_INFINITY { + 0.0 + } else if int_start < 0.0 { + (length as f64 + int_start).max(0.0) } else { - min(start, length) - }; - let to = if end < 0 { - max(length.wrapping_add(end), 0) + int_start.min(length as f64) + } as usize; + + let end = args.get(1).cloned().unwrap_or_else(Value::undefined); + + let int_end = if end.is_undefined() { + length as f64 } else { - min(end, length) + end.to_integer(context)? }; - let span = max(to.wrapping_sub(from), 0); + let to = if int_end == f64::NEG_INFINITY { + 0.0 + } else if int_end < 0.0 { + (length as f64 + int_end).max(0.0) + } else { + int_end.min(length as f64) + } as usize; - let new_str: StdString = primitive_val - .chars() - .skip(from as usize) - .take(span as usize) - .collect(); - Ok(Value::from(new_str)) + if from >= to { + Ok(Value::from("")) + } else { + let span = to - from; + let substring: StdString = string.chars().skip(from).take(span).collect(); + Ok(Value::from(substring)) + } } /// `String.prototype.startWith( searchString[, position] )` @@ -483,39 +487,37 @@ impl String { ) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; - let arg = args.get(0).cloned().unwrap_or_else(Value::undefined); + let search_string = args.get(0).cloned().unwrap_or_else(Value::undefined); - if Self::is_regexp_object(&arg) { + if Self::is_regexp_object(&search_string) { context.throw_type_error( "First argument to String.prototype.startsWith must not be a regular expression", )?; } - let search_string = arg.to_string(context)?; + let search_string = search_string.to_string(context)?; - let length = primitive_val.chars().count() as i32; - let search_length = search_string.chars().count() as i32; + let length = string.chars().count(); + let search_length = search_string.chars().count(); // If less than 2 args specified, position is 'undefined', defaults to 0 - let position = if args.len() < 2 { - 0 - } else { - args.get(1) - .expect("failed to get arg") - .to_integer(context)? as i32 + let pos = match args.get(1).cloned().unwrap_or_else(Value::undefined) { + position if position.is_undefined() => 0.0, + position => position.to_integer(context)?, }; - let start = min(max(position, 0), length); - let end = start.wrapping_add(search_length); + let start = pos.min(length as f64).max(0.0); + let end = start + search_length as f64; - if end > length { + if end > length as f64 { Ok(Value::from(false)) } else { // Only use the part of the string from "start" - let this_string: StdString = primitive_val.chars().skip(start as usize).collect(); - Ok(Value::from(this_string.starts_with(search_string.as_str()))) + let substring: StdString = string.chars().skip(start as usize).collect(); + Ok(Value::from(substring.starts_with(search_string.as_str()))) } } @@ -532,40 +534,38 @@ impl String { pub(crate) fn ends_with(this: &Value, args: &[Value], context: &mut Context) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; - let arg = args.get(0).cloned().unwrap_or_else(Value::undefined); + let search_string = args.get(0).cloned().unwrap_or_else(Value::undefined); - if Self::is_regexp_object(&arg) { + if Self::is_regexp_object(&search_string) { context.throw_type_error( "First argument to String.prototype.endsWith must not be a regular expression", )?; } - let search_string = arg.to_string(context)?; + let search_string = search_string.to_string(context)?; - let length = primitive_val.chars().count() as i32; - let search_length = search_string.chars().count() as i32; + let length = string.chars().count() as f64; + let search_length = search_string.chars().count() as f64; // If less than 2 args specified, end_position is 'undefined', defaults to // length of this - let end_position = if args.len() < 2 { - length - } else { - args.get(1) - .expect("Could not get argumetn") - .to_integer(context)? as i32 - }; + let end = args + .get(1) + .map(|end_position| end_position.to_integer(context)) + .transpose()? + .map_or(length, |end_position| end_position.max(0.0).min(length)); - let end = min(max(end_position, 0), length); - let start = end.wrapping_sub(search_length); + let start = end - search_length; - if start < 0 { + if start < 0f64 { Ok(Value::from(false)) } else { // Only use the part of the string up to "end" - let this_string: StdString = primitive_val.chars().take(end as usize).collect(); - Ok(Value::from(this_string.ends_with(search_string.as_str()))) + let substring: StdString = string.chars().take(end as usize).collect(); + Ok(Value::from(substring.ends_with(search_string.as_str()))) } } @@ -582,7 +582,8 @@ impl String { pub(crate) fn includes(this: &Value, args: &[Value], context: &mut Context) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; let arg = args.get(0).cloned().unwrap_or_else(Value::undefined); @@ -594,23 +595,19 @@ impl String { let search_string = arg.to_string(context)?; - let length = primitive_val.chars().count() as i32; + let length = string.chars().count(); // If less than 2 args specified, position is 'undefined', defaults to 0 - let position = if args.len() < 2 { - 0 - } else { - args.get(1) - .expect("Could not get argument") - .to_integer(context)? as i32 - }; - - let start = min(max(position, 0), length); + let start = args + .get(1) + .map(|position| position.to_integer(context)) + .transpose()? + .map_or(0, |position| position.max(0.0).min(length as f64) as usize); // Take the string from "this" and use only the part of it after "start" - let this_string: StdString = primitive_val.chars().skip(start as usize).collect(); + let substring: StdString = string.chars().skip(start).collect(); - Ok(Value::from(this_string.contains(search_string.as_str()))) + Ok(Value::from(substring.contains(search_string.as_str()))) } /// Return either the string itself or the string of the regex equivalent @@ -654,19 +651,20 @@ impl String { /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace pub(crate) fn replace(this: &Value, args: &[Value], context: &mut Context) -> Result { // TODO: Support Symbol replacer - let primitive_val = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; if args.is_empty() { - return Ok(Value::from(primitive_val)); + return Ok(Value::from(string)); } let regex_body = Self::get_regex_string(args.get(0).expect("Value needed")); let re = Regex::new(®ex_body).expect("unable to convert regex to regex object"); - let mat = match re.find(&primitive_val) { + let mat = match re.find(&string) { Some(mat) => mat, - None => return Ok(Value::from(primitive_val)), + None => return Ok(Value::from(string)), }; let caps = re - .find(&primitive_val) + .find(&string) .expect("unable to get capture groups from text") .captures; @@ -696,17 +694,17 @@ impl String { } (Some('&'), _) => { // $& - result.push_str(&primitive_val[mat.range()]); + result.push_str(&string[mat.range()]); } (Some('`'), _) => { // $` let start_of_match = mat.start(); - result.push_str(&primitive_val[..start_of_match]); + result.push_str(&string[..start_of_match]); } (Some('\''), _) => { // $' let end_of_match = mat.end(); - result.push_str(&primitive_val[end_of_match..]); + result.push_str(&string[end_of_match..]); } (Some(second), Some(third)) if second_is_digit && third_is_digit => @@ -723,7 +721,7 @@ impl String { } } else { let group = match mat.group(nn) { - Some(range) => &primitive_val[range.clone()], + Some(range) => &string[range.clone()], _ => "", }; result.push_str(group); @@ -738,7 +736,7 @@ impl String { result.push(second); } else { let group = match mat.group(n) { - Some(range) => &primitive_val[range.clone()], + Some(range) => &string[range.clone()], _ => "", }; result.push_str(group); @@ -770,7 +768,7 @@ impl String { let mut results: Vec = mat .groups() .map(|group| match group { - Some(range) => Value::from(&primitive_val[range]), + Some(range) => Value::from(&string[range]), None => Value::undefined(), }) .collect(); @@ -779,7 +777,7 @@ impl String { let start = mat.start(); results.push(Value::from(start)); // Push the whole string being examined - results.push(Value::from(primitive_val.to_string())); + results.push(Value::from(string.to_string())); let result = context.call(&replace_object, this, &results)?; @@ -791,8 +789,8 @@ impl String { "undefined".to_string() }; - Ok(Value::from(primitive_val.replacen( - &primitive_val[mat.range()], + Ok(Value::from(string.replacen( + &string[mat.range()], &replace_value, 1, ))) @@ -812,8 +810,8 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.indexof /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf pub(crate) fn index_of(this: &Value, args: &[Value], context: &mut Context) -> Result { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; let search_string = args .get(0) @@ -859,8 +857,8 @@ impl String { args: &[Value], context: &mut Context, ) -> Result { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; let search_string = args .get(0) @@ -871,21 +869,29 @@ impl String { let length = string.chars().count(); let start = args .get(1) - .map(|position| position.to_integer(context)) + .map(|position| { + let num_pos = position.to_number(context)?; + if num_pos.is_nan() { + Ok(f64::INFINITY) + } else { + Value::from(num_pos).to_integer(context) + } + }) .transpose()? - .map_or(0, |position| position.max(0.0).min(length as f64) as usize); + .map_or(length, |position| { + position.max(0.0).min(length as f64) as usize + }); if search_string.is_empty() { return Ok(start.min(length).into()); } - if start < length { - if let Some(position) = string.rfind(search_string.as_str()) { - return Ok(string[..position].chars().count().into()); - } + let substring: StdString = string.chars().take(start + 1).collect(); + if let Some(position) = substring.rfind(search_string.as_str()) { + Ok(substring[..position].chars().count().into()) + } else { + Ok(Value::from(-1)) } - - Ok(Value::from(-1)) } /// `String.prototype.match( regexp )` @@ -900,12 +906,15 @@ impl String { /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match /// [regex]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions pub(crate) fn r#match(this: &Value, args: &[Value], context: &mut Context) -> Result { + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; + let re = RegExp::constructor( &Value::from(Object::default()), &[args.get(0).cloned().unwrap_or_default()], context, )?; - RegExp::r#match(&re, this.to_string(context)?, context) + RegExp::r#match(&re, string, context) } /// Abstract method `StringPad`. @@ -913,32 +922,55 @@ impl String { /// Performs the actual string padding for padStart/End. /// fn string_pad( - primitive: RcString, - max_length: i32, - fill_string: Option, + object: &Value, + max_length: &Value, + fill_string: &Value, at_start: bool, + context: &mut Context, ) -> Result { - let primitive_length = primitive.len() as i32; + let string = object.to_string(context)?; + + let int_max_length = max_length.to_length(context)?; + let string_length = string.chars().count(); + + if int_max_length <= string_length { + return Ok(Value::from(string)); + } + + let filler = if fill_string.is_undefined() { + "\u{0020}".into() + } else { + fill_string.to_string(context)? + }; - if max_length <= primitive_length { - return Ok(Value::from(primitive)); + if filler.is_empty() { + return Ok(Value::from(string)); } - let filter = fill_string.as_deref().unwrap_or(" "); + let fill_len = int_max_length - string_length; + let filler_len = filler.chars().count(); - let fill_len = max_length.wrapping_sub(primitive_length); - let mut fill_str = StdString::new(); + let mut truncated_string_filler = StdString::new(); + let mut truncated_string_filler_len: usize = 0; - while fill_str.len() < fill_len as usize { - fill_str.push_str(filter); + while truncated_string_filler_len < fill_len { + if truncated_string_filler_len.wrapping_add(filler_len) <= fill_len { + truncated_string_filler.push_str(&filler); + truncated_string_filler_len += filler_len; + } else { + truncated_string_filler + .extend(filler.chars().take(fill_len - truncated_string_filler_len)); + truncated_string_filler_len = filler_len; + } } - // Cut to size max_length - let concat_fill_str: StdString = fill_str.chars().take(fill_len as usize).collect(); if at_start { - Ok(Value::from(format!("{}{}", concat_fill_str, &primitive))) + truncated_string_filler.push_str(&string); + Ok(Value::from(truncated_string_filler)) } else { - Ok(Value::from(format!("{}{}", primitive, &concat_fill_str))) + let mut string = string.to_string(); + string.push_str(&truncated_string_filler); + Ok(Value::from(string)) } } @@ -955,18 +987,12 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.padend /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/padEnd pub(crate) fn pad_end(this: &Value, args: &[Value], context: &mut Context) -> Result { - let primitive = this.to_string(context)?; - if args.is_empty() { - return Err(Value::from("padEnd requires maxLength argument")); - } - let max_length = args - .get(0) - .expect("failed to get argument for String method") - .to_integer(context)? as i32; + let object = this.require_object_coercible(context)?; - let fill_string = args.get(1).map(|arg| arg.to_string(context)).transpose()?; + let max_length = args.get(0).cloned().unwrap_or_else(Value::undefined); + let fill_string = args.get(1).cloned().unwrap_or_else(Value::undefined); - Self::string_pad(primitive, max_length, fill_string, false) + Self::string_pad(object, &max_length, &fill_string, false, context) } /// `String.prototype.padStart( targetLength [, padString] )` @@ -982,18 +1008,12 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.padstart /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/padStart pub(crate) fn pad_start(this: &Value, args: &[Value], context: &mut Context) -> Result { - let primitive = this.to_string(context)?; - if args.is_empty() { - return Err(Value::from("padStart requires maxLength argument")); - } - let max_length = args - .get(0) - .expect("failed to get argument for String method") - .to_integer(context)? as i32; + let object = this.require_object_coercible(context)?; - let fill_string = args.get(1).map(|arg| arg.to_string(context)).transpose()?; + let max_length = args.get(0).cloned().unwrap_or_else(Value::undefined); + let fill_string = args.get(1).cloned().unwrap_or_else(Value::undefined); - Self::string_pad(primitive, max_length, fill_string, true) + Self::string_pad(object, &max_length, &fill_string, true, context) } /// String.prototype.trim() @@ -1009,8 +1029,8 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.trim /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim pub(crate) fn trim(this: &Value, _: &[Value], context: &mut Context) -> Result { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; Ok(Value::from(string.trim_matches(is_trimmable_whitespace))) } @@ -1027,7 +1047,8 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.trimstart /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trimStart pub(crate) fn trim_start(this: &Value, _: &[Value], context: &mut Context) -> Result { - let string = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; Ok(Value::from( string.trim_start_matches(is_trimmable_whitespace), )) @@ -1046,8 +1067,8 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.trimend /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trimEnd pub(crate) fn trim_end(this: &Value, _: &[Value], context: &mut Context) -> Result { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; Ok(Value::from( string.trim_end_matches(is_trimmable_whitespace), )) @@ -1067,10 +1088,11 @@ impl String { pub(crate) fn to_lowercase(this: &Value, _: &[Value], context: &mut Context) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let this_str = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; // The Rust String is mapped to uppercase using the builtin .to_lowercase(). // There might be corner cases where it does not behave exactly like Javascript expects - Ok(Value::from(this_str.to_lowercase())) + Ok(Value::from(string.to_lowercase())) } /// `String.prototype.toUpperCase()` @@ -1089,10 +1111,11 @@ impl String { pub(crate) fn to_uppercase(this: &Value, _: &[Value], context: &mut Context) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let this_str = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; // The Rust String is mapped to uppercase using the builtin .to_uppercase(). // There might be corner cases where it does not behave exactly like Javascript expects - Ok(Value::from(this_str.to_uppercase())) + Ok(Value::from(string.to_uppercase())) } /// `String.prototype.substring( indexStart[, indexEnd] )` @@ -1108,41 +1131,41 @@ impl String { pub(crate) fn substring(this: &Value, args: &[Value], context: &mut Context) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - // If no args are specified, start is 'undefined', defaults to 0 - let start = if args.is_empty() { - 0 - } else { - args.get(0) - .expect("failed to get argument for String method") - .to_integer(context)? as i32 - }; - let length = primitive_val.encode_utf16().count() as i32; - // If less than 2 args specified, end is the length of the this object converted to a String - let end = if args.len() < 2 { - length - } else { - args.get(1) - .expect("Could not get argument") - .to_integer(context)? as i32 + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; + + let len = string.len(); + let int_start = args + .get(0) + .cloned() + .unwrap_or_else(Value::undefined) + .to_integer(context)?; + + let int_end = match args.get(1).cloned().unwrap_or_else(Value::undefined) { + end if end.is_undefined() => len as f64, + end => end.to_integer(context)?, }; + // Both start and end args replaced by 0 if they were negative // or by the length of the String if they were greater - let final_start = min(max(start, 0), length); - let final_end = min(max(end, 0), length); + let final_start = int_start.max(0.0).min(len as f64); + let final_end = int_end.max(0.0).min(len as f64); + // Start and end are swapped if start is greater than end - let from = min(final_start, final_end) as usize; - let to = max(final_start, final_end) as usize; + let from = final_start.min(final_end); + let to = final_start.max(final_end); + // Extract the part of the string contained between the start index and the end index // where start is guaranteed to be smaller or equals to end - let extracted_string: std::result::Result = decode_utf16( - primitive_val - .encode_utf16() - .skip(from) - .take(to.wrapping_sub(from)), - ) - .collect(); - Ok(Value::from(extracted_string.expect("Invalid string"))) + let substring_utf16: Vec = string + .encode_utf16() + .skip(from as usize) + .take((to - from) as usize) + .collect(); + // TODO: Full UTF-16 support + let substring = StdString::from_utf16_lossy(&substring_utf16); + + Ok(Value::from(substring)) } /// `String.prototype.substr( start[, length] )` @@ -1159,46 +1182,48 @@ impl String { pub(crate) fn substr(this: &Value, args: &[Value], context: &mut Context) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string: Vec = object.to_string(context)?.encode_utf16().collect(); + let size = string.len(); + // If no args are specified, start is 'undefined', defaults to 0 - let mut start = if args.is_empty() { - 0 + + let int_start = args + .get(0) + .cloned() + .unwrap_or_else(Value::undefined) + .to_integer(context)?; + let int_start = if int_start == f64::NEG_INFINITY { + 0.0 + } else if int_start < 0.0 { + (int_start + size as f64).max(0.0) } else { - args.get(0) - .expect("failed to get argument for String method") - .to_integer(context)? as i32 + int_start }; - let length = primitive_val.chars().count() as i32; + // If less than 2 args specified, end is +infinity, the maximum number value. // Using i32::max_value() should be safe because the final length used is at most // the number of code units from start to the end of the string, // which should always be smaller or equals to both +infinity and i32::max_value - let end = if args.len() < 2 { - i32::max_value() - } else { - args.get(1) - .expect("Could not get argument") - .to_integer(context)? as i32 + let int_length = match args.get(1).cloned().unwrap_or_else(Value::undefined) { + length if length.is_undefined() => size as f64, + length => length.to_integer(context)?, }; - // If start is negative it become the number of code units from the end of the string - if start < 0 { - start = max(length.wrapping_add(start), 0); + + if int_start == f64::INFINITY || int_length <= 0.0 || int_length == f64::INFINITY { + return Ok(Value::from("")); } - // length replaced by 0 if it was negative - // or by the number of code units from start to the end of the string if it was greater - let result_length = min(max(end, 0), length.wrapping_sub(start)); - // If length is negative we return an empty string - // otherwise we extract the part of the string from start and is length code units long - if result_length <= 0 { + + let int_end = (int_start + int_length).min(size as f64) as usize; + let int_start = int_start as usize; + + if int_start >= int_end { Ok(Value::from("")) } else { - let extracted_string: StdString = primitive_val - .chars() - .skip(start as usize) - .take(result_length as usize) - .collect(); + let substring_utf16 = &string[int_start..int_end]; + let substring = StdString::from_utf16_lossy(substring_utf16); - Ok(Value::from(extracted_string)) + Ok(Value::from(substring)) } } @@ -1215,8 +1240,8 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.split /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split pub(crate) fn split(this: &Value, args: &[Value], context: &mut Context) -> Result { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; let separator = args.get(0).filter(|value| !value.is_null_or_undefined()); @@ -1303,6 +1328,9 @@ impl String { /// [cg]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Groups_and_Ranges // TODO: update this method to return iterator pub(crate) fn match_all(this: &Value, args: &[Value], context: &mut Context) -> Result { + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; + let re: Value = match args.get(0) { Some(arg) => { if arg.is_null() { @@ -1328,7 +1356,7 @@ impl String { ), }?; - RegExp::match_all(&re, this.to_string(context)?.to_string(), context) + RegExp::match_all(&re, string.to_string(), context) } /// `String.fromCharCode(num1[, ...[, numN]])` @@ -1353,6 +1381,7 @@ impl String { elements.push(number); } + // TODO: Full UTF-16 support let string = StdString::from_utf16_lossy(&elements); Ok(Value::from(string)) From 03a2d083e9686fdc04db5e991614dd03ac34ae80 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sun, 7 Feb 2021 01:12:28 -0800 Subject: [PATCH 06/18] Fix infinite loop bug --- boa/src/builtins/string/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index ea8810c905f..214ee44957e 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -960,7 +960,7 @@ impl String { } else { truncated_string_filler .extend(filler.chars().take(fill_len - truncated_string_filler_len)); - truncated_string_filler_len = filler_len; + truncated_string_filler_len = fill_len; } } From 21128f2456a916970ed55655170eaeae2201aa6e Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 8 Feb 2021 01:20:35 -0800 Subject: [PATCH 07/18] Fix string length bugs --- boa/src/builtins/string/mod.rs | 302 +++++++++++---------- boa/src/builtins/string/string_iterator.rs | 4 +- 2 files changed, 164 insertions(+), 142 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 214ee44957e..5f84db04789 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -29,17 +29,18 @@ use crate::{ use regress::Regex; use std::{convert::TryFrom, f64::NAN, string::String as StdString}; -pub(crate) fn code_point_at(string: RcString, position: i32) -> Option<(u32, u8, bool)> { - let size = string.encode_utf16().count() as i32; - if position < 0 || position >= size { +pub(crate) fn code_point_at(string: RcString, position: i64) -> Option<(u32, u8, bool)> { + let size = string.encode_utf16().count(); + if position < 0 || position >= size as i64 { return None; } + let mut encoded = string.encode_utf16(); let first = encoded.nth(position as usize)?; if !is_leading_surrogate(first) && !is_trailing_surrogate(first) { return Some((first as u32, 1, false)); } - if is_trailing_surrogate(first) || position + 1 == size { + if is_trailing_surrogate(first) || position + 1 == size as i64 { return Some((first as u32, 1, true)); } let second = encoded.next()?; @@ -252,8 +253,8 @@ impl String { .to_integer(context)?; // Fast path returning empty string when pos is obviously out of range - if position < 0f64 || position >= string.len() as f64 { - return Ok("".into()); + if position < 0f64 { + return Ok(Value::from("")); } // Calling .len() on a string would give the wrong result, as they are bytes not the number of @@ -266,7 +267,7 @@ impl String { char::try_from(utf16_val as u32).unwrap_or('\u{FFFD}' /* replacement char */), )) } else { - Ok("".into()) + Ok(Value::from("")) } } @@ -291,19 +292,20 @@ impl String { ) -> Result { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(Value::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning undefined when pos is obviously out of range - if pos < 0 || pos >= primitive_val.len() as i32 { + if position < 0.0 { return Ok(Value::undefined()); } - if let Some((code_point, _, _)) = code_point_at(primitive_val, pos) { + if let Some((code_point, _, _)) = code_point_at(string, position as i64) { Ok(Value::from(code_point)) } else { Ok(Value::undefined()) @@ -340,7 +342,7 @@ impl String { .to_integer(context)?; // Fast path returning NaN when pos is obviously out of range - if position < 0f64 || position >= string.len() as f64 { + if position < 0f64 { return Ok(Value::from(NAN)); } @@ -350,7 +352,7 @@ impl String { if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { Ok(Value::from(f64::from(utf16_val))) } else { - Ok(Value::from(NAN)) + Ok(Value::from(f64::NAN)) } } @@ -429,44 +431,40 @@ impl String { // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. - let length = string.chars().count(); + let len = string.encode_utf16().count(); - let int_start = args + let from = match args .get(0) .cloned() .unwrap_or_else(Value::undefined) - .to_integer(context)?; - - let from = if int_start == f64::NEG_INFINITY { - 0.0 - } else if int_start < 0.0 { - (length as f64 + int_start).max(0.0) - } else { - int_start.min(length as f64) + .to_integer(context)? + { + int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0, + int_start if int_start < 0.0 => (len as f64 + int_start).max(0.0), + int_start => int_start.min(len as f64), } as usize; - let end = args.get(1).cloned().unwrap_or_else(Value::undefined); - - let int_end = if end.is_undefined() { - length as f64 - } else { - end.to_integer(context)? - }; - - let to = if int_end == f64::NEG_INFINITY { - 0.0 - } else if int_end < 0.0 { - (length as f64 + int_end).max(0.0) - } else { - int_end.min(length as f64) + let to = match args + .get(1) + .filter(|end| !end.is_undefined()) + .map(|end| end.to_integer(context)) + .transpose()? + .unwrap_or(len as f64) + { + int_end if int_end.is_infinite() && int_end.is_sign_negative() => 0.0, + int_end if int_end < 0.0 => (len as f64 + int_end).max(0.0), + int_end => int_end.min(len as f64), } as usize; if from >= to { Ok(Value::from("")) } else { let span = to - from; - let substring: StdString = string.chars().skip(from).take(span).collect(); - Ok(Value::from(substring)) + + // TODO: Full UTF-16 support + let substring_utf16: Vec = string.encode_utf16().skip(from).take(span).collect(); + let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); + Ok(Value::from(substring_lossy)) } } @@ -498,10 +496,10 @@ impl String { )?; } - let search_string = search_string.to_string(context)?; + let search_str = search_string.to_string(context)?; - let length = string.chars().count(); - let search_length = search_string.chars().count(); + let len = string.encode_utf16().count(); + let search_length = search_str.encode_utf16().count(); // If less than 2 args specified, position is 'undefined', defaults to 0 let pos = match args.get(1).cloned().unwrap_or_else(Value::undefined) { @@ -509,15 +507,17 @@ impl String { position => position.to_integer(context)?, }; - let start = pos.min(length as f64).max(0.0); - let end = start + search_length as f64; + let start = pos.min(len as f64).max(0.0) as usize; + let end = start + search_length; - if end > length as f64 { + if end > len { Ok(Value::from(false)) } else { // Only use the part of the string from "start" - let substring: StdString = string.chars().skip(start as usize).collect(); - Ok(Value::from(substring.starts_with(search_string.as_str()))) + // TODO: Full UTF-16 support + let substring_utf16 = string.encode_utf16().skip(start).take(search_length); + let search_str_utf16 = search_str.encode_utf16(); + Ok(Value::from(substring_utf16.eq(search_str_utf16))) } } @@ -537,35 +537,40 @@ impl String { let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; - let search_string = args.get(0).cloned().unwrap_or_else(Value::undefined); + let search_str = match args.get(0).cloned().unwrap_or_else(Value::undefined) { + search_string if Self::is_regexp_object(&search_string) => { + return context.throw_type_error( + "First argument to String.prototype.endsWith must not be a regular expression", + ); + } + search_string => search_string.to_string(context)?, + }; - if Self::is_regexp_object(&search_string) { - context.throw_type_error( - "First argument to String.prototype.endsWith must not be a regular expression", - )?; - } + let len = string.encode_utf16().count(); - let search_string = search_string.to_string(context)?; + let pos = match args.get(1).cloned().unwrap_or_else(Value::undefined) { + end_position if end_position.is_undefined() => len as f64, + end_position => end_position.to_integer(context)?, + }; - let length = string.chars().count() as f64; - let search_length = search_string.chars().count() as f64; + let end = pos.max(0.0).min(len as f64) as usize; - // If less than 2 args specified, end_position is 'undefined', defaults to - // length of this - let end = args - .get(1) - .map(|end_position| end_position.to_integer(context)) - .transpose()? - .map_or(length, |end_position| end_position.max(0.0).min(length)); + if search_str.is_empty() { + return Ok(Value::from(true)); + } - let start = end - search_length; + let search_length = search_str.encode_utf16().count(); - if start < 0f64 { + if end < search_length { Ok(Value::from(false)) } else { - // Only use the part of the string up to "end" - let substring: StdString = string.chars().take(end as usize).collect(); - Ok(Value::from(substring.ends_with(search_string.as_str()))) + let start = end - search_length; + + // TODO: Full UTF-16 support + let substring_utf16 = string.encode_utf16().skip(start).take(search_length); + let search_str_utf16 = search_str.encode_utf16(); + + Ok(Value::from(substring_utf16.eq(search_str_utf16))) } } @@ -585,29 +590,31 @@ impl String { let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; - let arg = args.get(0).cloned().unwrap_or_else(Value::undefined); - - if Self::is_regexp_object(&arg) { - context.throw_type_error( - "First argument to String.prototype.includes must not be a regular expression", - )?; - } - - let search_string = arg.to_string(context)?; - - let length = string.chars().count(); + let search_str = match args.get(0).cloned().unwrap_or_else(Value::undefined) { + search_string if Self::is_regexp_object(&search_string) => { + return context.throw_type_error( + "First argument to String.prototype.includes must not be a regular expression", + ); + } + search_string => search_string.to_string(context)?, + }; - // If less than 2 args specified, position is 'undefined', defaults to 0 - let start = args + let pos = args .get(1) - .map(|position| position.to_integer(context)) - .transpose()? - .map_or(0, |position| position.max(0.0).min(length as f64) as usize); + .cloned() + .unwrap_or_else(Value::undefined) + .to_integer(context)?; - // Take the string from "this" and use only the part of it after "start" - let substring: StdString = string.chars().skip(start).collect(); + let start = pos.max(0.0) as usize; - Ok(Value::from(substring.contains(search_string.as_str()))) + // TODO: Full UTF-16 support + let substring_lossy = if start > 0 { + let substring_utf16: Vec = string.encode_utf16().skip(start).collect(); + StdString::from_utf16_lossy(&substring_utf16) + } else { + string.to_string() + }; + Ok(Value::from(substring_lossy.contains(search_str.as_str()))) } /// Return either the string itself or the string of the regex equivalent @@ -813,26 +820,39 @@ impl String { let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; - let search_string = args + let search_str = args .get(0) .cloned() .unwrap_or_else(Value::undefined) .to_string(context)?; - let length = string.chars().count(); - let start = args + let pos = args .get(1) - .map(|position| position.to_integer(context)) - .transpose()? - .map_or(0, |position| position.max(0.0).min(length as f64) as usize); + .cloned() + .unwrap_or_else(Value::undefined) + .to_integer(context)?; - if search_string.is_empty() { - return Ok(start.min(length).into()); + let len = string.encode_utf16().count(); + let start = pos.max(0.0); + + if search_str.is_empty() { + return Ok(Value::from(start.min(len as f64))); } - if start < length { - if let Some(position) = string.find(search_string.as_str()) { - return Ok(string[..position].chars().count().into()); + if start < len as f64 { + let start = start as usize; + + let substring_lossy = if start > 0 { + let substring_utf16: Vec = string.encode_utf16().skip(start).collect(); + StdString::from_utf16_lossy(&substring_utf16) + } else { + string.to_string() + }; + + if let Some(position) = substring_lossy.find(search_str.as_str()) { + return Ok(Value::from( + substring_lossy[..position].encode_utf16().count() + start, + )); } } @@ -860,35 +880,38 @@ impl String { let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; - let search_string = args + let search_str = args .get(0) .cloned() .unwrap_or_else(Value::undefined) .to_string(context)?; - let length = string.chars().count(); - let start = args + let num_pos = args .get(1) - .map(|position| { - let num_pos = position.to_number(context)?; - if num_pos.is_nan() { - Ok(f64::INFINITY) - } else { - Value::from(num_pos).to_integer(context) - } - }) - .transpose()? - .map_or(length, |position| { - position.max(0.0).min(length as f64) as usize - }); + .cloned() + .unwrap_or_else(Value::undefined) + .to_number(context)?; + + let pos = if num_pos.is_nan() { + f64::INFINITY + } else { + Value::from(num_pos).to_integer(context)? + }; - if search_string.is_empty() { - return Ok(start.min(length).into()); + let len = string.encode_utf16().count(); + let start = pos.max(0.0).min(len as f64) as usize; + + if search_str.is_empty() { + return Ok(Value::from(start as f64)); } - let substring: StdString = string.chars().take(start + 1).collect(); - if let Some(position) = substring.rfind(search_string.as_str()) { - Ok(substring[..position].chars().count().into()) + // TODO: Full UTF-16 support + let substring_utf16: Vec = string.encode_utf16().take(start + 1).collect(); + let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); + if let Some(position) = substring_lossy.rfind(search_str.as_str()) { + Ok(Value::from( + substring_lossy[..position].encode_utf16().count(), + )) } else { Ok(Value::from(-1)) } @@ -931,7 +954,7 @@ impl String { let string = object.to_string(context)?; let int_max_length = max_length.to_length(context)?; - let string_length = string.chars().count(); + let string_length = string.encode_utf16().count(); if int_max_length <= string_length { return Ok(Value::from(string)); @@ -942,13 +965,14 @@ impl String { } else { fill_string.to_string(context)? }; + let filler_utf16: Vec = filler.encode_utf16().collect(); if filler.is_empty() { return Ok(Value::from(string)); } let fill_len = int_max_length - string_length; - let filler_len = filler.chars().count(); + let filler_len = filler_utf16.len(); let mut truncated_string_filler = StdString::new(); let mut truncated_string_filler_len: usize = 0; @@ -958,8 +982,12 @@ impl String { truncated_string_filler.push_str(&filler); truncated_string_filler_len += filler_len; } else { - truncated_string_filler - .extend(filler.chars().take(fill_len - truncated_string_filler_len)); + truncated_string_filler.push_str( + StdString::from_utf16_lossy( + &filler_utf16[..fill_len - truncated_string_filler_len], + ) + .as_str(), + ); truncated_string_filler_len = fill_len; } } @@ -1152,17 +1180,13 @@ impl String { let final_end = int_end.max(0.0).min(len as f64); // Start and end are swapped if start is greater than end - let from = final_start.min(final_end); - let to = final_start.max(final_end); + let from = final_start.min(final_end) as usize; + let to = final_start.max(final_end) as usize; // Extract the part of the string contained between the start index and the end index // where start is guaranteed to be smaller or equals to end - let substring_utf16: Vec = string - .encode_utf16() - .skip(from as usize) - .take((to - from) as usize) - .collect(); // TODO: Full UTF-16 support + let substring_utf16: Vec = string.encode_utf16().skip(from).take(to - from).collect(); let substring = StdString::from_utf16_lossy(&substring_utf16); Ok(Value::from(substring)) @@ -1188,17 +1212,15 @@ impl String { // If no args are specified, start is 'undefined', defaults to 0 - let int_start = args + let int_start = match args .get(0) .cloned() .unwrap_or_else(Value::undefined) - .to_integer(context)?; - let int_start = if int_start == f64::NEG_INFINITY { - 0.0 - } else if int_start < 0.0 { - (int_start + size as f64).max(0.0) - } else { - int_start + .to_integer(context)? + { + int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0, + int_start if int_start < 0.0 => (int_start + size as f64).max(0.0), + int_start => int_start, }; // If less than 2 args specified, end is +infinity, the maximum number value. @@ -1210,7 +1232,7 @@ impl String { length => length.to_integer(context)?, }; - if int_start == f64::INFINITY || int_length <= 0.0 || int_length == f64::INFINITY { + if int_start.is_infinite() || int_length <= 0.0 || int_length.is_infinite() { return Ok(Value::from("")); } @@ -1407,7 +1429,7 @@ impl String { for arg in args.iter() { let number = arg.to_number(context)?; - if !Number::is_float_integer(number) || number < 0f64 || number > (0x10FFFF as f64) { + if !Number::is_float_integer(number) || number < 0.0 || number > (0x10FFFF as f64) { return Err( context.construct_range_error(format!("invalid code point: {}", number)) ); diff --git a/boa/src/builtins/string/string_iterator.rs b/boa/src/builtins/string/string_iterator.rs index cd76dafb653..2752406a30f 100644 --- a/boa/src/builtins/string/string_iterator.rs +++ b/boa/src/builtins/string/string_iterator.rs @@ -46,8 +46,8 @@ impl StringIterator { string_iterator.string = Value::undefined(); return Ok(create_iter_result_object(context, Value::undefined(), true)); } - let (_, code_unit_count, _) = - code_point_at(native_string, position).expect("Invalid code point position"); + let (_, code_unit_count, _) = code_point_at(native_string, position as i64) + .expect("Invalid code point position"); string_iterator.next_index += code_unit_count as i32; let result_string = crate::builtins::string::String::substring( &string_iterator.string, From 9ab2157d87cf8496256255ecb50802991b445fe6 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 8 Feb 2021 01:36:51 -0800 Subject: [PATCH 08/18] Fix unsafe add --- boa/src/builtins/string/mod.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 5f84db04789..5e124dfc292 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -507,15 +507,18 @@ impl String { position => position.to_integer(context)?, }; - let start = pos.min(len as f64).max(0.0) as usize; - let end = start + search_length; + let start = pos.min(len as f64).max(0.0); + let end = start + search_length as f64; - if end > len { + if end > len as f64 { Ok(Value::from(false)) } else { // Only use the part of the string from "start" // TODO: Full UTF-16 support - let substring_utf16 = string.encode_utf16().skip(start).take(search_length); + let substring_utf16 = string + .encode_utf16() + .skip(start as usize) + .take(search_length); let search_str_utf16 = search_str.encode_utf16(); Ok(Value::from(substring_utf16.eq(search_str_utf16))) } From 31458d3cd3df4d3d333e2bad8c311660cceba80d Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Wed, 17 Feb 2021 15:36:07 -0800 Subject: [PATCH 09/18] Update boa/src/builtins/string/mod.rs Co-authored-by: Halid Odat --- boa/src/builtins/string/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 5e124dfc292..73d1f70cbab 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -352,7 +352,7 @@ impl String { if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { Ok(Value::from(f64::from(utf16_val))) } else { - Ok(Value::from(f64::NAN)) + Ok(Value::nan()) } } From 3e49ebc3c459db8993f5a1b13ad1fdb79ab78b11 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Wed, 17 Feb 2021 15:50:29 -0800 Subject: [PATCH 10/18] Remove outdated comments --- boa/src/builtins/string/mod.rs | 37 ++++------------------------------ 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 321891464ed..8b3b65a4616 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -242,8 +242,6 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.charat /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charAt pub(crate) fn char_at(this: &Value, args: &[Value], context: &mut Context) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; let position = args @@ -253,7 +251,7 @@ impl String { .to_integer(context)?; // Fast path returning empty string when pos is obviously out of range - if position < 0f64 { + if position < 0.0 { return Ok(Value::from("")); } @@ -290,8 +288,6 @@ impl String { args: &[Value], context: &mut Context, ) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; let position = args @@ -331,8 +327,6 @@ impl String { args: &[Value], context: &mut Context, ) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; let position = args @@ -342,7 +336,7 @@ impl String { .to_integer(context)?; // Fast path returning NaN when pos is obviously out of range - if position < 0f64 { + if position < 0.0 { return Ok(Value::from(NAN)); } @@ -424,8 +418,6 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.slice /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/slice pub(crate) fn slice(this: &Value, args: &[Value], context: &mut Context) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; @@ -483,8 +475,6 @@ impl String { args: &[Value], context: &mut Context, ) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; @@ -535,8 +525,6 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.endswith /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/endsWith pub(crate) fn ends_with(this: &Value, args: &[Value], context: &mut Context) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; @@ -588,8 +576,6 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.includes /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/includes pub(crate) fn includes(this: &Value, args: &[Value], context: &mut Context) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; @@ -1117,8 +1103,6 @@ impl String { /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/toLowerCase #[allow(clippy::wrong_self_convention)] pub(crate) fn to_lowercase(this: &Value, _: &[Value], context: &mut Context) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; // The Rust String is mapped to uppercase using the builtin .to_lowercase(). @@ -1140,8 +1124,6 @@ impl String { /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/toUpperCase #[allow(clippy::wrong_self_convention)] pub(crate) fn to_uppercase(this: &Value, _: &[Value], context: &mut Context) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; // The Rust String is mapped to uppercase using the builtin .to_uppercase(). @@ -1160,8 +1142,6 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.substring /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/substring pub(crate) fn substring(this: &Value, args: &[Value], context: &mut Context) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string = object.to_string(context)?; @@ -1182,12 +1162,11 @@ impl String { let final_start = int_start.max(0.0).min(len as f64); let final_end = int_end.max(0.0).min(len as f64); - // Start and end are swapped if start is greater than end let from = final_start.min(final_end) as usize; let to = final_start.max(final_end) as usize; - // Extract the part of the string contained between the start index and the end index - // where start is guaranteed to be smaller or equals to end + // Extract the part of the string contained between the from index and the to index + // where from is guaranteed to be smaller or equal to to // TODO: Full UTF-16 support let substring_utf16: Vec = string.encode_utf16().skip(from).take(to - from).collect(); let substring = StdString::from_utf16_lossy(&substring_utf16); @@ -1207,14 +1186,10 @@ impl String { /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/substr /// pub(crate) fn substr(this: &Value, args: &[Value], context: &mut Context) -> Result { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value let object = this.require_object_coercible(context)?; let string: Vec = object.to_string(context)?.encode_utf16().collect(); let size = string.len(); - // If no args are specified, start is 'undefined', defaults to 0 - let int_start = match args .get(0) .cloned() @@ -1226,10 +1201,6 @@ impl String { int_start => int_start, }; - // If less than 2 args specified, end is +infinity, the maximum number value. - // Using i32::max_value() should be safe because the final length used is at most - // the number of code units from start to the end of the string, - // which should always be smaller or equals to both +infinity and i32::max_value let int_length = match args.get(1).cloned().unwrap_or_else(Value::undefined) { length if length.is_undefined() => size as f64, length => length.to_integer(context)?, From 0a4d92142a1f95efe7fd3f24de6081474c169f23 Mon Sep 17 00:00:00 2001 From: RageKnify Date: Sun, 6 Jun 2021 19:06:13 +0100 Subject: [PATCH 11/18] Style: Respect rustfmt --- boa/src/builtins/string/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index c6338d106c8..5d11c85fac9 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -28,10 +28,7 @@ use crate::{ BoaProfiler, Context, Result, }; use regress::Regex; -use std::{ - convert::TryFrom, - string::String as StdString, -}; +use std::{convert::TryFrom, string::String as StdString}; pub(crate) fn code_point_at(string: RcString, position: i64) -> Option<(u32, u8, bool)> { let size = string.encode_utf16().count(); From ad90e0b7b34c6a7eff95be977afcf1348f3f4f0a Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sun, 13 Feb 2022 14:19:46 -0800 Subject: [PATCH 12/18] Fix merge conflicts --- boa/src/builtins/number/conversions.rs | 9 - boa/src/builtins/number/mod.rs | 2 +- boa/src/builtins/regexp/mod.rs | 2 +- boa/src/builtins/string/mod.rs | 527 +++++++++++---------- boa/src/builtins/string/string_iterator.rs | 2 +- 5 files changed, 283 insertions(+), 259 deletions(-) diff --git a/boa/src/builtins/number/conversions.rs b/boa/src/builtins/number/conversions.rs index 7b034792fcb..e9960cbb957 100644 --- a/boa/src/builtins/number/conversions.rs +++ b/boa/src/builtins/number/conversions.rs @@ -84,12 +84,3 @@ pub(crate) fn f64_to_int32(number: f64) -> i32 { pub(crate) fn f64_to_uint32(number: f64) -> u32 { f64_to_int32(number) as u32 } - -/// Converts a 64-bit floating point number to an `u16` according to the [`ToUint16`][ToUint16] algorithm. -/// -/// [ToUint16]: https://tc39.es/ecma262/#sec-touint16 -#[inline] -pub(crate) fn f64_to_uint16(number: f64) -> u16 { - let n = f64_to_int32(number) as u32; - (n % (1 << 16)) as u16 -} diff --git a/boa/src/builtins/number/mod.rs b/boa/src/builtins/number/mod.rs index 5fe429689f4..54f9f16770f 100644 --- a/boa/src/builtins/number/mod.rs +++ b/boa/src/builtins/number/mod.rs @@ -28,7 +28,7 @@ use num_traits::{float::FloatCore, Num}; mod conversions; -pub(crate) use conversions::{f64_to_int32, f64_to_uint16, f64_to_uint32}; +pub(crate) use conversions::{f64_to_int32, f64_to_uint32}; #[cfg(test)] mod tests; diff --git a/boa/src/builtins/regexp/mod.rs b/boa/src/builtins/regexp/mod.rs index c587ca56810..e1ca4daab50 100644 --- a/boa/src/builtins/regexp/mod.rs +++ b/boa/src/builtins/regexp/mod.rs @@ -1760,7 +1760,7 @@ fn advance_string_index(s: &JsString, index: usize, unicode: bool) -> usize { // 5. Let cp be ! CodePointAt(S, index). let (_, offset, _) = - crate::builtins::string::code_point_at(s, index as i32).expect("Failed to get code point"); + crate::builtins::string::code_point_at(s, index as i64).expect("Failed to get code point"); index + offset as usize } diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index e5f32440e9f..52cc84d05e0 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -15,7 +15,7 @@ mod tests; use super::JsArgs; use crate::{ - builtins::{string::string_iterator::StringIterator, Array, BuiltIn, RegExp, Symbol}, + builtins::{string::string_iterator::StringIterator, Array, BuiltIn, Number, RegExp, Symbol}, context::StandardObjects, object::{ internal_methods::get_prototype_from_constructor, ConstructorBuilder, JsObject, ObjectData, @@ -24,16 +24,12 @@ use crate::{ symbol::WellKnownSymbols, BoaProfiler, Context, JsResult, JsString, JsValue, }; -use std::{ - char::{decode_utf16, from_u32}, - cmp::{max, min}, - string::String as StdString, -}; +use std::{char::from_u32, cmp::max, string::String as StdString}; use unicode_normalization::UnicodeNormalization; -pub(crate) fn code_point_at(string: &JsString, position: i32) -> Option<(u32, u8, bool)> { - let size = string.encode_utf16().count() as i32; - if position < 0 || position >= size { +pub(crate) fn code_point_at(string: &JsString, position: i64) -> Option<(u32, u8, bool)> { + let size = string.encode_utf16().count(); + if position < 0 || position >= size as i64 { return None; } @@ -42,7 +38,7 @@ pub(crate) fn code_point_at(string: &JsString, position: i32) -> Option<(u32, u8 if !is_leading_surrogate(first) && !is_trailing_surrogate(first) { return Some((u32::from(first), 1, false)); } - if is_trailing_surrogate(first) || position + 1 == size { + if is_trailing_surrogate(first) || position + 1 == size as i64 { return Some((u32::from(first), 1, true)); } let second = encoded.next()?; @@ -109,6 +105,7 @@ impl BuiltIn for String { .property("length", 0, attribute) .static_method(Self::raw, "raw", 1) .static_method(Self::from_char_code, "fromCharCode", 1) + .static_method(Self::from_code_point, "fromCodePoint", 1) .method(Self::char_at, "charAt", 1) .method(Self::char_code_at, "charCodeAt", 1) .method(Self::code_point_at, "codePointAt", 1) @@ -231,6 +228,38 @@ impl String { .ok_or_else(|| context.construct_type_error("'this' is not a string")) } + /// `String.fromCodePoint(num1[, ...[, numN]])` + /// + /// The static `String.fromCodePoint()` method returns a string created by using the specified sequence of code points. + /// + /// More information: + /// - [ECMAScript reference][spec] + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma262/#sec-string.fromcodepoint + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/fromCodePoint + pub(crate) fn from_code_point( + _: &JsValue, + args: &[JsValue], + context: &mut Context, + ) -> JsResult { + let mut result = StdString::new(); + + for arg in args.iter() { + let number = arg.to_number(context)?; + + if !Number::is_float_integer(number) || number < 0.0 || number > (0x10FFFF as f64) { + return Err( + context.construct_range_error(format!("invalid code point: {}", number)) + ); + } + + result.push(char::try_from(number as u32).unwrap()); + } + + Ok(result.into()) + } + /// `String.prototype.raw( template, ...substitutions )` /// /// More information: @@ -365,28 +394,30 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning empty string when pos is obviously out of range if position < 0.0 { - return Ok(Value::from("")); + return Ok("".into()); } // Calling .len() on a string would give the wrong result, as they are bytes not the number of // unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of // bytes is an O(1) operation. - if let Some(utf16_val) = primitive_val.encode_utf16().nth(pos as usize) { - Ok(JsValue::new(from_u32(u32::from(utf16_val)).unwrap())) + if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { + // TODO: Full UTF-16 support + Ok(char::try_from(utf16_val as u32) + .unwrap_or('\u{FFFD}' /* replacement char */) + .into()) } else { - Ok(Value::from("")) + Ok("".into()) } } @@ -446,19 +477,20 @@ impl String { ) -> JsResult { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning undefined when pos is obviously out of range - if pos < 0 || pos >= primitive_val.len() as i32 { + if position < 0.0 { return Ok(JsValue::undefined()); } - if let Some((code_point, _, _)) = code_point_at(&primitive_val, pos) { + if let Some((code_point, _, _)) = code_point_at(&string, position as i64) { Ok(JsValue::new(code_point)) } else { Ok(JsValue::undefined()) @@ -486,22 +518,23 @@ impl String { ) -> JsResult { // First we get it the actual string a private field stored on the object only the context has access to. // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - let pos = args + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + let position = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) - .to_integer(context)? as i32; + .to_integer(context)?; // Fast path returning NaN when pos is obviously out of range - if pos < 0 || pos >= primitive_val.len() as i32 { + if position < 0.0 || position >= string.len() as f64 { return Ok(JsValue::nan()); } // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. // If there is no element at that index, the result is NaN - if let Some(utf16_val) = primitive_val.encode_utf16().nth(pos as usize) { + if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { Ok(JsValue::new(f64::from(utf16_val))) } else { Ok(JsValue::nan()) @@ -527,8 +560,8 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - let object = this.require_object_coercible(context)?; - let mut string = object.to_string(context)?.to_string(); + let this = this.require_object_coercible(context)?; + let mut string = this.to_string(context)?.to_string(); for arg in args { string.push_str(&arg.to_string(context)?); @@ -553,23 +586,26 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - let object = this.require_object_coercible(context)?; - let string = object.to_string(context)?; - - if let Some(count) = args.get(0) { - let n = count.to_integer(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + if let Some(arg) = args.get(0) { + let n = arg.to_integer(context)?; if n < 0.0 { - context.throw_range_error("repeat count cannot be a negative number") - } else if n.is_infinite() { - context.throw_range_error("repeat count cannot be infinity") - } else if n * (string.len() as f64) > Self::MAX_STRING_LENGTH { - context.throw_range_error("repeat count must not overflow maximum string length") - } else { - Ok(Value::from(string.repeat(n as usize))) + return context.throw_range_error("repeat count cannot be a negative number"); + } + + if n.is_infinite() { + return context.throw_range_error("repeat count cannot be infinity"); + } + + if n * (string.len() as f64) > Self::MAX_STRING_LENGTH { + return context + .throw_range_error("repeat count must not overflow maximum string length"); } + Ok(string.repeat(n as usize).into()) } else { - Ok(Value::from("")) + Ok("".into()) } } @@ -588,38 +624,44 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; // Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation. let len = string.encode_utf16().count(); - let from = match args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) - .to_integer(context)? as i32; - let end = args + .to_integer(context)? + { + int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0, + int_start if int_start < 0.0 => (len as f64 + int_start).max(0.0), + int_start => int_start.min(len as f64), + } as usize; + + let to = match args .get(1) - .cloned() - .unwrap_or_else(|| JsValue::new(length)) - .to_integer(context)? as i32; + .filter(|end| !end.is_undefined()) + .map(|end| end.to_integer(context)) + .transpose()? + .unwrap_or(len as f64) + { + int_end if int_end.is_infinite() && int_end.is_sign_negative() => 0.0, + int_end if int_end < 0.0 => (len as f64 + int_end).max(0.0), + int_end => int_end.min(len as f64), + } as usize; if from >= to { - Ok(Value::from("")) + Ok("".into()) } else { let span = to - from; - - let span = max(to.wrapping_sub(from), 0); - - let new_str: StdString = primitive_val - .chars() - .skip(from as usize) - .take(span as usize) - .collect(); - Ok(JsValue::new(new_str)) + // TODO: Full UTF-16 support + let substring_utf16: Vec = string.encode_utf16().skip(from).take(span).collect(); + let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); + Ok(substring_lossy.into()) + } } /// `String.prototype.startWith( searchString[, position] )` @@ -637,13 +679,12 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; - let arg = args.get_or_undefined(0); + let search_string = args.get(0).cloned().unwrap_or_else(JsValue::undefined); - if Self::is_regexp_object(arg) { + if Self::is_regexp_object(&search_string) { context.throw_type_error( "First argument to String.prototype.startsWith must not be a regular expression", )?; @@ -655,23 +696,24 @@ impl String { let search_length = search_str.encode_utf16().count(); // If less than 2 args specified, position is 'undefined', defaults to 0 - let position = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 - } else { - 0 + let pos = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { + position if position.is_undefined() => 0.0, + position => position.to_integer(context)?, }; let start = pos.min(len as f64).max(0.0); let end = start + search_length as f64; - if end > length { + if end > len as f64 { Ok(JsValue::new(false)) } else { - // Only use the part of the string from "start" - let this_string: StdString = primitive_val.chars().skip(start as usize).collect(); - Ok(JsValue::new( - this_string.starts_with(search_string.as_str()), - )) + // TODO: Full UTF-16 support + let substring_utf16 = string + .encode_utf16() + .skip(start as usize) + .take(search_length); + let search_str_utf16 = search_str.encode_utf16(); + Ok(JsValue::new(substring_utf16.eq(search_str_utf16))) } } @@ -690,19 +732,21 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; - let arg = args.get_or_undefined(0); + let search_str = match args.get(0).cloned().unwrap_or_else(JsValue::undefined) { + search_string if Self::is_regexp_object(&search_string) => { + return context.throw_type_error( + "First argument to String.prototype.endsWith must not be a regular expression", + ); + } + search_string => search_string.to_string(context)?, + }; - if Self::is_regexp_object(arg) { - context.throw_type_error( - "First argument to String.prototype.endsWith must not be a regular expression", - )?; - } + let len = string.encode_utf16().count(); - let pos = match args.get(1).cloned().unwrap_or_else(Value::undefined) { + let pos = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { end_position if end_position.is_undefined() => len as f64, end_position => end_position.to_integer(context)?, }; @@ -711,20 +755,21 @@ impl String { // If less than 2 args specified, end_position is 'undefined', defaults to // length of this - let end_position = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 - } else { - length - }; + if search_str.is_empty() { + return Ok(JsValue::new(true)); + } let search_length = search_str.encode_utf16().count(); - if start < 0 { + if end < search_length { Ok(JsValue::new(false)) } else { - // Only use the part of the string up to "end" - let this_string: StdString = primitive_val.chars().take(end as usize).collect(); - Ok(JsValue::new(this_string.ends_with(search_string.as_str()))) + let start = end - search_length; + + let substring_utf16 = string.encode_utf16().skip(start).take(search_length); + let search_str_utf16 = search_str.encode_utf16(); + + Ok(JsValue::new(substring_utf16.eq(search_str_utf16))) } } @@ -743,36 +788,32 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - - let arg = args.get_or_undefined(0); + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; - if Self::is_regexp_object(arg) { - context.throw_type_error( - "First argument to String.prototype.includes must not be a regular expression", - )?; - } + let search_str = match args.get(0).cloned().unwrap_or_else(JsValue::undefined) { + search_string if Self::is_regexp_object(&search_string) => { + return context.throw_type_error( + "First argument to String.prototype.includes must not be a regular expression", + ); + } + search_string => search_string.to_string(context)?, + }; + let pos = args + .get(1) + .cloned() + .unwrap_or_else(JsValue::undefined) + .to_integer(context)?; let start = pos.max(0.0) as usize; - let length = primitive_val.chars().count() as i32; - - // If less than 2 args specified, position is 'undefined', defaults to 0 - - let position = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 + let substring_lossy = if start > 0 { + let substring_utf16: Vec = string.encode_utf16().skip(start).collect(); + StdString::from_utf16_lossy(&substring_utf16) } else { - 0 + string.to_string() }; - - let start = min(max(position, 0), length); - - // Take the string from "this" and use only the part of it after "start" - let this_string: StdString = primitive_val.chars().skip(start as usize).collect(); - - Ok(JsValue::new(this_string.contains(search_string.as_str()))) + Ok(substring_lossy.contains(search_str.as_str()).into()) } fn is_regexp_object(value: &JsValue) -> bool { @@ -1101,14 +1142,14 @@ impl String { let pos = args .get(1) .cloned() - .unwrap_or_else(Value::undefined) + .unwrap_or_else(JsValue::undefined) .to_integer(context)?; let len = string.encode_utf16().count(); let start = pos.max(0.0); if search_str.is_empty() { - return Ok(Value::from(start.min(len as f64))); + return Ok(JsValue::new(start.min(len as f64))); } if start < len as f64 { @@ -1122,12 +1163,11 @@ impl String { }; if let Some(position) = substring_lossy.find(search_str.as_str()) { - return Ok(Value::from( + return Ok(JsValue::new( substring_lossy[..position].encode_utf16().count() + start, )); } } - Ok(JsValue::new(-1)) } @@ -1161,34 +1201,32 @@ impl String { let num_pos = args .get(1) .cloned() - .unwrap_or_else(Value::undefined) + .unwrap_or_else(JsValue::undefined) .to_number(context)?; let pos = if num_pos.is_nan() { f64::INFINITY } else { - Value::from(num_pos).to_integer(context)? + JsValue::new(num_pos).to_integer(context)? }; let len = string.encode_utf16().count(); let start = pos.max(0.0).min(len as f64) as usize; if search_str.is_empty() { - return Ok(Value::from(start as f64)); + return Ok(JsValue::new(start as f64)); } // TODO: Full UTF-16 support let substring_utf16: Vec = string.encode_utf16().take(start + 1).collect(); let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); if let Some(position) = substring_lossy.rfind(search_str.as_str()) { - Ok(Value::from( + Ok(JsValue::new( substring_lossy[..position].encode_utf16().count(), )) } else { - Ok(Value::from(-1)) + Ok(JsValue::new(-1)) } - - Ok(JsValue::new(-1)) } /// `String.prototype.match( regexp )` @@ -1237,34 +1275,59 @@ impl String { /// Performs the actual string padding for padStart/End. /// fn string_pad( - primitive: JsString, - max_length: i32, - fill_string: Option<&JsString>, + object: &JsValue, + max_length: &JsValue, + fill_string: &JsValue, at_start: bool, - ) -> JsValue { - let primitive_length = primitive.len() as i32; + context: &mut Context, + ) -> JsResult { + let string = object.to_string(context)?; - if max_length <= primitive_length { - return JsValue::new(primitive); + let int_max_length = max_length.to_length(context)?; + let string_length = string.encode_utf16().count(); + + if int_max_length <= string_length { + return Ok(string.into()); } - let filler = fill_string.map_or(" ", JsString::as_str); + let filler = if fill_string.is_undefined() { + "\u{0020}".into() + } else { + fill_string.to_string(context)? + }; + let filler_utf16: Vec = filler.encode_utf16().collect(); if filler.is_empty() { - return JsValue::new(primitive); + return Ok(string.into()); } - let fill_len = max_length.wrapping_sub(primitive_length); - let mut fill_str = StdString::new(); + let fill_len = int_max_length - string_length; + let filler_len = filler_utf16.len(); - while fill_str.len() < fill_len as usize { - fill_str.push_str(filler); - } + let mut truncated_string_filler = StdString::new(); + let mut truncated_string_filler_len: usize = 0; + while truncated_string_filler_len < fill_len { + if truncated_string_filler_len.wrapping_add(filler_len) <= fill_len { + truncated_string_filler.push_str(&filler); + truncated_string_filler_len += filler_len; + } else { + truncated_string_filler.push_str( + StdString::from_utf16_lossy( + &filler_utf16[..fill_len - truncated_string_filler_len], + ) + .as_str(), + ); + truncated_string_filler_len = fill_len; + } + } if at_start { - JsValue::new(format!("{concat_fill_str}{primitive}")) + truncated_string_filler.push_str(&string); + Ok(truncated_string_filler.into()) } else { - JsValue::new(format!("{primitive}{concat_fill_str}")) + let mut string = string.to_string(); + string.push_str(&truncated_string_filler); + Ok(string.into()) } } @@ -1285,24 +1348,12 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - let primitive = this.to_string(context)?; - if args.is_empty() { - return Err(JsValue::new("padEnd requires maxLength argument")); - } - let max_length = args - .get(0) - .expect("failed to get argument for String method") - .to_integer(context)? as i32; + let this = this.require_object_coercible(context)?; - let max_length = args.get(0).cloned().unwrap_or_else(Value::undefined); - let fill_string = args.get(1).cloned().unwrap_or_else(Value::undefined); + let max_length = args.get(0).cloned().unwrap_or_else(JsValue::undefined); + let fill_string = args.get(1).cloned().unwrap_or_else(JsValue::undefined); - Ok(Self::string_pad( - primitive, - max_length, - fill_string.as_ref(), - false, - )) + Self::string_pad(this, &max_length, &fill_string, false, context) } /// `String.prototype.padStart( targetLength [, padString] )` @@ -1322,24 +1373,12 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - let primitive = this.to_string(context)?; - if args.is_empty() { - return Err(JsValue::new("padStart requires maxLength argument")); - } - let max_length = args - .get(0) - .expect("failed to get argument for String method") - .to_integer(context)? as i32; + let this = this.require_object_coercible(context)?; - let max_length = args.get(0).cloned().unwrap_or_else(Value::undefined); - let fill_string = args.get(1).cloned().unwrap_or_else(Value::undefined); + let max_length = args.get(0).cloned().unwrap_or_else(JsValue::undefined); + let fill_string = args.get(1).cloned().unwrap_or_else(JsValue::undefined); - Ok(Self::string_pad( - primitive, - max_length, - fill_string.as_ref(), - true, - )) + Self::string_pad(this, &max_length, &fill_string, true, context) } /// String.prototype.trim() @@ -1355,8 +1394,8 @@ impl String { /// [spec]: https://tc39.es/ecma262/#sec-string.prototype.trim /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim pub(crate) fn trim(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult { - let this = this.require_object_coercible(context)?; - let string = this.to_string(context)?; + let object = this.require_object_coercible(context)?; + let string = object.to_string(context)?; Ok(JsValue::new(string.trim_matches(is_trimmable_whitespace))) } @@ -1377,6 +1416,7 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { + let this = this.require_object_coercible(context)?; let string = this.to_string(context)?; Ok(JsValue::new( string.trim_start_matches(is_trimmable_whitespace), @@ -1423,12 +1463,11 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let this_str = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; // The Rust String is mapped to uppercase using the builtin .to_lowercase(). // There might be corner cases where it does not behave exactly like Javascript expects - Ok(JsValue::new(this_str.to_lowercase())) + Ok(JsValue::new(string.to_lowercase())) } /// `String.prototype.toUpperCase()` @@ -1449,12 +1488,11 @@ impl String { _: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let this_str = this.to_string(context)?; + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; // The Rust String is mapped to uppercase using the builtin .to_uppercase(). // There might be corner cases where it does not behave exactly like Javascript expects - Ok(JsValue::new(this_str.to_uppercase())) + Ok(JsValue::new(string.to_uppercase())) } /// `String.prototype.substring( indexStart[, indexEnd] )` @@ -1472,39 +1510,36 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - // If no args are specified, start is 'undefined', defaults to 0 - let start = if let Some(integer) = args.get(0) { - integer.to_integer(context)? as i32 - } else { - 0 - }; - let length = primitive_val.encode_utf16().count() as i32; - // If less than 2 args specified, end is the length of the this object converted to a String - let end = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 - } else { - length + let this = this.require_object_coercible(context)?; + let string = this.to_string(context)?; + + let len = string.len(); + let int_start = args + .get(0) + .cloned() + .unwrap_or_else(JsValue::undefined) + .to_integer(context)?; + + let int_end = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { + end if end.is_undefined() => len as f64, + end => end.to_integer(context)?, }; + // Both start and end args replaced by 0 if they were negative // or by the length of the String if they were greater - let final_start = min(max(start, 0), length); - let final_end = min(max(end, 0), length); - // Start and end are swapped if start is greater than end - let from = min(final_start, final_end) as usize; - let to = max(final_start, final_end) as usize; - // Extract the part of the string contained between the start index and the end index - // where start is guaranteed to be smaller or equals to end - let extracted_string: Result = decode_utf16( - primitive_val - .encode_utf16() - .skip(from) - .take(to.wrapping_sub(from)), - ) - .collect(); - Ok(JsValue::new(extracted_string.expect("Invalid string"))) + let final_start = int_start.max(0.0).min(len as f64); + let final_end = int_end.max(0.0).min(len as f64); + + let from = final_start.min(final_end) as usize; + let to = final_start.max(final_end) as usize; + + // Extract the part of the string contained between the from index and the to index + // where from is guaranteed to be smaller or equal to to + // TODO: Full UTF-16 support + let substring_utf16: Vec = string.encode_utf16().skip(from).take(to - from).collect(); + let substring = StdString::from_utf16_lossy(&substring_utf16); + + Ok(substring.into()) } /// `String.prototype.substr( start[, length] )` @@ -1523,41 +1558,39 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { - // First we get it the actual string a private field stored on the object only the context has access to. - // Then we convert it into a Rust String by wrapping it in from_value - let primitive_val = this.to_string(context)?; - // If no args are specified, start is 'undefined', defaults to 0 - let mut start = if let Some(integer) = args.get(0) { - integer.to_integer(context)? as i32 - } else { - 0 + let this = this.require_object_coercible(context)?; + let string: Vec = this.to_string(context)?.encode_utf16().collect(); + let size = string.len(); + + let int_start = match args + .get(0) + .cloned() + .unwrap_or_else(JsValue::undefined) + .to_integer(context)? + { + int_start if int_start.is_infinite() && int_start.is_sign_negative() => 0.0, + int_start if int_start < 0.0 => (int_start + size as f64).max(0.0), + int_start => int_start, }; - let length = primitive_val.chars().count() as i32; - // If less than 2 args specified, end is +infinity, the maximum number value. - // Using i32::max_value() should be safe because the final length used is at most - // the number of code units from start to the end of the string, - // which should always be smaller or equals to both +infinity and i32::max_value - let end = if let Some(integer) = args.get(1) { - integer.to_integer(context)? as i32 - } else { - i32::MAX + + let int_length = match args.get(1).cloned().unwrap_or_else(JsValue::undefined) { + length if length.is_undefined() => size as f64, + length => length.to_integer(context)?, }; if int_start.is_infinite() || int_length <= 0.0 || int_length.is_infinite() { - return Ok(Value::from("")); + return Ok("".into()); } - // length replaced by 0 if it was negative - // or by the number of code units from start to the end of the string if it was greater - let result_length = min(max(end, 0), length.wrapping_sub(start)); - // If length is negative we return an empty string - // otherwise we extract the part of the string from start and is length code units long - if result_length <= 0 { - Ok(JsValue::new("")) + + let int_end = (int_start + int_length).min(size as f64) as usize; + let int_start = int_start as usize; + + if int_start >= int_end { + Ok("".into()) } else { let substring_utf16 = &string[int_start..int_end]; let substring = StdString::from_utf16_lossy(substring_utf16); - - Ok(JsValue::new(extracted_string)) + Ok(substring.into()) } } diff --git a/boa/src/builtins/string/string_iterator.rs b/boa/src/builtins/string/string_iterator.rs index 18c218118a5..de2516eb456 100644 --- a/boa/src/builtins/string/string_iterator.rs +++ b/boa/src/builtins/string/string_iterator.rs @@ -57,7 +57,7 @@ impl StringIterator { )); } let (_, code_unit_count, _) = - code_point_at(&native_string, position).expect("Invalid code point position"); + code_point_at(&native_string, position as i64).expect("Invalid code point position"); string_iterator.next_index += i32::from(code_unit_count); let result_string = crate::builtins::string::String::substring( &string_iterator.string, From c9c4a94f7548f3bb01ab7ba4a642b14b92be20af Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sun, 13 Feb 2022 14:36:59 -0800 Subject: [PATCH 13/18] Fix clippy --- boa/src/builtins/string/mod.rs | 7 ++----- boa/src/builtins/string/string_iterator.rs | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 52cc84d05e0..af536616686 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -248,7 +248,7 @@ impl String { for arg in args.iter() { let number = arg.to_number(context)?; - if !Number::is_float_integer(number) || number < 0.0 || number > (0x10FFFF as f64) { + if !Number::is_float_integer(number) || number < 0.0 || number > f64::from(0x10FFFF) { return Err( context.construct_range_error(format!("invalid code point: {}", number)) ); @@ -412,8 +412,7 @@ impl String { // Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of // bytes is an O(1) operation. if let Some(utf16_val) = string.encode_utf16().nth(position as usize) { - // TODO: Full UTF-16 support - Ok(char::try_from(utf16_val as u32) + Ok(char::try_from(u32::from(utf16_val)) .unwrap_or('\u{FFFD}' /* replacement char */) .into()) } else { @@ -657,7 +656,6 @@ impl String { Ok("".into()) } else { let span = to - from; - // TODO: Full UTF-16 support let substring_utf16: Vec = string.encode_utf16().skip(from).take(span).collect(); let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); Ok(substring_lossy.into()) @@ -707,7 +705,6 @@ impl String { if end > len as f64 { Ok(JsValue::new(false)) } else { - // TODO: Full UTF-16 support let substring_utf16 = string .encode_utf16() .skip(start as usize) diff --git a/boa/src/builtins/string/string_iterator.rs b/boa/src/builtins/string/string_iterator.rs index de2516eb456..dd083e6085e 100644 --- a/boa/src/builtins/string/string_iterator.rs +++ b/boa/src/builtins/string/string_iterator.rs @@ -57,7 +57,7 @@ impl StringIterator { )); } let (_, code_unit_count, _) = - code_point_at(&native_string, position as i64).expect("Invalid code point position"); + code_point_at(&native_string, i64::from(position)).expect("Invalid code point position"); string_iterator.next_index += i32::from(code_unit_count); let result_string = crate::builtins::string::String::substring( &string_iterator.string, From cb926e2e6442a08897077264623d4753bd063a0e Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sun, 13 Feb 2022 15:27:18 -0800 Subject: [PATCH 14/18] Update test262 --- test262 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test262 b/test262 index e793512b55c..281c781ee4c 160000 --- a/test262 +++ b/test262 @@ -1 +1 @@ -Subproject commit e793512b55c199de6abc392d1be4de7325dae544 +Subproject commit 281c781ee4cb66a2e3ad6a26204a6d3f842947d6 From 83a9e3288292f902814527ebcb9ed0e6c990b1f9 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Sun, 13 Feb 2022 15:30:23 -0800 Subject: [PATCH 15/18] Fix rustfmt --- boa/src/builtins/string/string_iterator.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boa/src/builtins/string/string_iterator.rs b/boa/src/builtins/string/string_iterator.rs index dd083e6085e..60df8b77a20 100644 --- a/boa/src/builtins/string/string_iterator.rs +++ b/boa/src/builtins/string/string_iterator.rs @@ -56,8 +56,8 @@ impl StringIterator { context, )); } - let (_, code_unit_count, _) = - code_point_at(&native_string, i64::from(position)).expect("Invalid code point position"); + let (_, code_unit_count, _) = code_point_at(&native_string, i64::from(position)) + .expect("Invalid code point position"); string_iterator.next_index += i32::from(code_unit_count); let result_string = crate::builtins::string::String::substring( &string_iterator.string, From 4f6a323b69e85ac2eb564b6c087743989b5d1861 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 14 Feb 2022 15:45:20 -0800 Subject: [PATCH 16/18] Add spec steps comments and remove unwrap --- boa/src/builtins/string/mod.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index af536616686..0d8940e5a56 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -243,20 +243,30 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let result be the empty String. let mut result = StdString::new(); + // 2. For each element next of codePoints, do for arg in args.iter() { - let number = arg.to_number(context)?; + // a. Let nextCP be ? ToNumber(next). + let nextcp = arg.to_number(context)?; - if !Number::is_float_integer(number) || number < 0.0 || number > f64::from(0x10FFFF) { - return Err( - context.construct_range_error(format!("invalid code point: {}", number)) - ); + // b. If ! IsIntegralNumber(nextCP) is false, throw a RangeError exception. + if !Number::is_float_integer(nextcp) { + return Err(context.construct_range_error(format!("invalid code point: {nextcp}"))); + } + + // c. If ℝ(nextCP) < 0 or ℝ(nextCP) > 0x10FFFF, throw a RangeError exception. + if nextcp < 0.0 || nextcp > f64::from(0x10FFFF) { + return Err(context.construct_range_error(format!("invalid code point: {nextcp}"))); } - result.push(char::try_from(number as u32).unwrap()); + // d. Set result to the string-concatenation of result and ! UTF16EncodeCodePoint(ℝ(nextCP)). + result.push(char::try_from(nextcp as u32).expect("nextcp must be a valid code point")); } + // 3. Assert: If codePoints is empty, then result is the empty String. + // 4. Return result. Ok(result.into()) } From f29121d823d36a595df42e8fabc7e66c786e7dae Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 14 Feb 2022 16:30:28 -0800 Subject: [PATCH 17/18] Fix panics with replacement char workaround --- boa/src/builtins/string/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 0d8940e5a56..54bc03f663f 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -261,8 +261,9 @@ impl String { return Err(context.construct_range_error(format!("invalid code point: {nextcp}"))); } + // TODO: Full UTF-16 support // d. Set result to the string-concatenation of result and ! UTF16EncodeCodePoint(ℝ(nextCP)). - result.push(char::try_from(nextcp as u32).expect("nextcp must be a valid code point")); + result.push(char::try_from(nextcp as u32).unwrap_or('\u{FFFD}' /* replacement char */)); } // 3. Assert: If codePoints is empty, then result is the empty String. From b011da8c0dc2e8003e12a658e10b66709b85e8d2 Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 14 Feb 2022 20:20:29 -0800 Subject: [PATCH 18/18] Fix a bug in lastIndexOf --- boa/src/builtins/string/mod.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 54bc03f663f..c0252ecf01b 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -1197,44 +1197,59 @@ impl String { args: &[JsValue], context: &mut Context, ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). let this = this.require_object_coercible(context)?; + // 2. Let S be ? ToString(O). let string = this.to_string(context)?; + // 3. Let searchStr be ? ToString(searchString). let search_str = args .get(0) .cloned() .unwrap_or_else(JsValue::undefined) .to_string(context)?; + // 4. Let numPos be ? ToNumber(position). + // 5. Assert: If position is undefined, then numPos is NaN. let num_pos = args .get(1) .cloned() .unwrap_or_else(JsValue::undefined) .to_number(context)?; + // 6. If numPos is NaN, let pos be +∞; otherwise, let pos be ! ToIntegerOrInfinity(numPos). let pos = if num_pos.is_nan() { f64::INFINITY } else { JsValue::new(num_pos).to_integer(context)? }; + // 7. Let len be the length of S. let len = string.encode_utf16().count(); + // 8. Let start be the result of clamping pos between 0 and len. let start = pos.max(0.0).min(len as f64) as usize; + // 9. If searchStr is the empty String, return 𝔽(start). if search_str.is_empty() { return Ok(JsValue::new(start as f64)); } // TODO: Full UTF-16 support - let substring_utf16: Vec = string.encode_utf16().take(start + 1).collect(); + // 10. Let searchLen be the length of searchStr. + let search_len = search_str.encode_utf16().count(); + // 11. For each non-negative integer i starting with start such that i ≤ len - searchLen, in descending order, do + // a. Let candidate be the substring of S from i to i + searchLen. + let substring_utf16: Vec = string.encode_utf16().take(start + search_len).collect(); let substring_lossy = StdString::from_utf16_lossy(&substring_utf16); if let Some(position) = substring_lossy.rfind(search_str.as_str()) { - Ok(JsValue::new( + // b. If candidate is the same sequence of code units as searchStr, return 𝔽(i). + return Ok(JsValue::new( substring_lossy[..position].encode_utf16().count(), - )) - } else { - Ok(JsValue::new(-1)) + )); } + + // 12. Return -1𝔽. + Ok(JsValue::new(-1)) } /// `String.prototype.match( regexp )`