From 24eda416219a3b2cf19d4b78bb97a17e62525c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Borges?= Date: Thu, 13 Aug 2020 22:44:39 +0100 Subject: [PATCH 1/8] Fix: String.prototype.replace for multiple capture groups, closes #610 --- boa/src/builtins/string/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 80bc7909d75..7613103d402 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -486,7 +486,7 @@ impl String { } // Capture $1, $2, $3 etc - if re.is_match(&result) { + while re.is_match(&result) { let mat_caps = re.captures(&result).unwrap(); let group_str = mat_caps.get(1).unwrap().as_str(); let group_int = group_str.parse::().unwrap(); From eb813fd9945edd028dff0539ca6783cce1e321c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Borges?= Date: Thu, 13 Aug 2020 23:15:59 +0100 Subject: [PATCH 2/8] Test: Add test for #629, based on #610 --- boa/src/builtins/string/tests.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index 2ba8b048d65..f0c0a9476e5 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -218,6 +218,22 @@ fn replace() { assert_eq!(forward(&mut engine, "a"), "\"2bc\""); } +#[test] +fn replace_with_regex() { + let realm = Realm::create(); + let mut engine = Interpreter::new(realm); + let init = r#" + var re = /(\w+)\s(\w+)/; + var a = "John Smith"; + a = a.replace(re, '$2, $1'); + a + "#; + + forward(&mut engine, init); + + assert_eq!(forward(&mut engine, "a"), "\"Smith, John\""); +} + #[test] fn replace_with_function() { let realm = Realm::create(); From 33c07e6a4023f1d3438385a91c19d9b584e8020e Mon Sep 17 00:00:00 2001 From: RageKnify Date: Mon, 24 Aug 2020 05:20:01 +0100 Subject: [PATCH 3/8] Fix: String.prototype.replace capture groups per spec, see #629 Based on https://tc39.es/ecma262/#sec-getsubstitution --- boa/src/builtins/string/mod.rs | 118 ++++++++++++++++++++++++--------- 1 file changed, 85 insertions(+), 33 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index cbe3e7f0d69..a4dcec13804 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -479,39 +479,91 @@ impl String { match replace_object { Value::String(val) => { // https://tc39.es/ecma262/#table-45 - let mut result = val.to_string(); - let re = Regex::new(r"\$(\d)").unwrap(); - - if val.find("$$").is_some() { - result = val.replace("$$", "$") - } - - if val.find("$`").is_some() { - let start_of_match = mat.start(); - let slice = &primitive_val[..start_of_match]; - result = val.replace("$`", slice); - } - - if val.find("$'").is_some() { - let end_of_match = mat.end(); - let slice = &primitive_val[end_of_match..]; - result = val.replace("$'", slice); - } - - if val.find("$&").is_some() { - // get matched value - let matched = caps.get(0).expect("cannot get matched value"); - result = val.replace("$&", matched.as_str()); - } - - // Capture $1, $2, $3 etc - while re.is_match(&result) { - let mat_caps = re.captures(&result).unwrap(); - let group_str = mat_caps.get(1).unwrap().as_str(); - let group_int = group_str.parse::().unwrap(); - result = re - .replace(result.as_str(), caps.get(group_int).unwrap().as_str()) - .to_string() + let mut result = StdString::new(); + let mut chars = val.chars(); + + let m = caps.len(); + while let Some(chr) = chars.next() { + match chr { + '$' => match chars.next() { + Some(next_chr) => match next_chr { + '$' => result.push(chr), + '&' => { + // get matched value + let matched = + caps.get(0).expect("cannot get matched value"); + result.push_str(matched.as_str()) + } + '`' => { + let start_of_match = mat.start(); + let slice = &primitive_val[..start_of_match]; + result.push_str(slice) + } + '\'' => { + let end_of_match = mat.end(); + let slice = &primitive_val[end_of_match..]; + result.push_str(slice); + } + _ if next_chr.is_digit(10) => { + match chars.next() { + Some(next_next_chr) if next_next_chr.is_digit(10) => { + // 2 digit group + let tens = next_chr.to_digit(10).unwrap() as usize; + let units = + next_next_chr.to_digit(10).unwrap() as usize; + let nn = 10 * tens + units; + if nn == 0 || nn > m { + result.push(chr); + result.push(next_chr); + result.push(next_next_chr); + } else { + let group = match caps.get(nn) { + Some(text) => text.as_str(), + None => "", + }; + result.push_str(group); + } + } + Some(next_next_chr) => { + let n = next_chr.to_digit(10).unwrap() as usize; + if n == 0 || n > m { + result.push(chr); + result.push(next_chr); + } else { + let group = match caps.get(n) { + Some(text) => text.as_str(), + None => "", + }; + result.push_str(group); + } + // 1 digit group, place next_next_chr afterwards + result.push(next_next_chr); + } + None => { + // 1 digit group + let n = next_chr.to_digit(10).unwrap() as usize; + if n == 0 || n > m { + result.push(chr); + result.push(next_chr); + } else { + let group = match caps.get(n) { + Some(text) => text.as_str(), + None => "", + }; + result.push_str(group); + } + } + } + } + '<' => unimplemented!(), + _ => result.push(chr), + }, + None => { + result.push(chr); + } + }, + _ => result.push(chr), + } } result From 81cf31d3fbec348d86cd9d89531f2e341f4b90f3 Mon Sep 17 00:00:00 2001 From: RageKnify Date: Mon, 24 Aug 2020 15:08:12 +0100 Subject: [PATCH 4/8] Test: Improve coverage for String.prototype.replace Now covers all cases in https://tc39.es/ecma262/#table-45 except named capture groups because it's not yet implemented --- boa/src/builtins/string/tests.rs | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index f0c0a9476e5..fcdc8a01908 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -219,7 +219,7 @@ fn replace() { } #[test] -fn replace_with_regex() { +fn replace_with_capture_groups() { let realm = Realm::create(); let mut engine = Interpreter::new(realm); let init = r#" @@ -234,6 +234,44 @@ fn replace_with_regex() { assert_eq!(forward(&mut engine, "a"), "\"Smith, John\""); } +#[test] +fn replace_with_tenth_capture_group() { + let realm = Realm::create(); + let mut engine = Interpreter::new(realm); + let init = r#" + var re = /(\d)(\d)(\d)(\d)(\d)(\d)(\d)(\d)(\d)(\d)/; + var a = "0123456789"; + let res = a.replace(re, '$10'); + "#; + + forward(&mut engine, init); + + assert_eq!(forward(&mut engine, "res"), "\"9\""); +} + +#[test] +fn replace_substitutions() { + let realm = Realm::create(); + let mut engine = Interpreter::new(realm); + let init = r#" + var re = / two /; + var a = "one two three"; + var dollar = a.replace(re, " $$ "); + var matched = a.replace(re, "$&$&"); + var start = a.replace(re, " $` "); + var end = a.replace(re, " $' "); + var no_sub = a.replace(re, " $_ "); + "#; + + forward(&mut engine, init); + + assert_eq!(forward(&mut engine, "dollar"), "\"one $ three\""); + assert_eq!(forward(&mut engine, "matched"), "\"one two two three\""); + assert_eq!(forward(&mut engine, "start"), "\"one one three\""); + assert_eq!(forward(&mut engine, "end"), "\"one three three\""); + assert_eq!(forward(&mut engine, "no_sub"), "\"one $_ three\""); +} + #[test] fn replace_with_function() { let realm = Realm::create(); From a8222ced381fcab93a721323b94ff6b54427f061 Mon Sep 17 00:00:00 2001 From: RageKnify Date: Mon, 24 Aug 2020 15:24:33 +0100 Subject: [PATCH 5/8] Refactor: String.prototype.replace for better readability --- boa/src/builtins/string/mod.rs | 155 ++++++++++++++++----------------- 1 file changed, 77 insertions(+), 78 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index a4dcec13804..78dd16f3a91 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -480,89 +480,88 @@ impl String { Value::String(val) => { // https://tc39.es/ecma262/#table-45 let mut result = StdString::new(); - let mut chars = val.chars(); + let mut chars = val.chars().peekable(); let m = caps.len(); - while let Some(chr) = chars.next() { - match chr { - '$' => match chars.next() { - Some(next_chr) => match next_chr { - '$' => result.push(chr), - '&' => { - // get matched value - let matched = - caps.get(0).expect("cannot get matched value"); - result.push_str(matched.as_str()) - } - '`' => { - let start_of_match = mat.start(); - let slice = &primitive_val[..start_of_match]; - result.push_str(slice) - } - '\'' => { - let end_of_match = mat.end(); - let slice = &primitive_val[end_of_match..]; - result.push_str(slice); + + while let Some(first) = chars.next() { + if first == '$' { + let second = chars.next(); + let second_is_digit = second.map(|ch| ch.is_digit(10)).unwrap_or(false); + // we use peek so that it is still in the iterator if not used + let third = if second_is_digit { chars.peek() } else { None }; + let third_is_digit = third.map(|ch| ch.is_digit(10)).unwrap_or(false); + + match (second, third) { + (Some('$'), _) => { + // $$ + result.push('$'); + } + (Some('&'), _) => { + // $& + let matched = caps.get(0).expect("cannot get matched value"); + result.push_str(matched.as_str()); + } + (Some('`'), _) => { + // $` + let start_of_match = mat.start(); + let slice = &primitive_val[..start_of_match]; + result.push_str(slice); + } + (Some('\''), _) => { + // $' + let end_of_match = mat.end(); + let slice = &primitive_val[end_of_match..]; + result.push_str(slice); + } + (_, _) if second_is_digit && third_is_digit => { + // $nn + let tens = second.unwrap().to_digit(10).unwrap() as usize; + let units = third.unwrap().to_digit(10).unwrap() as usize; + let nn = 10 * tens + units; + if nn == 0 || nn > m { + [Some(first), second, chars.next()] + .iter() + .flatten() + .for_each(|ch: &char| result.push(*ch)); + } else { + let group = match caps.get(nn) { + Some(text) => text.as_str(), + None => "", + }; + result.push_str(group); + chars.next(); // consume third } - _ if next_chr.is_digit(10) => { - match chars.next() { - Some(next_next_chr) if next_next_chr.is_digit(10) => { - // 2 digit group - let tens = next_chr.to_digit(10).unwrap() as usize; - let units = - next_next_chr.to_digit(10).unwrap() as usize; - let nn = 10 * tens + units; - if nn == 0 || nn > m { - result.push(chr); - result.push(next_chr); - result.push(next_next_chr); - } else { - let group = match caps.get(nn) { - Some(text) => text.as_str(), - None => "", - }; - result.push_str(group); - } - } - Some(next_next_chr) => { - let n = next_chr.to_digit(10).unwrap() as usize; - if n == 0 || n > m { - result.push(chr); - result.push(next_chr); - } else { - let group = match caps.get(n) { - Some(text) => text.as_str(), - None => "", - }; - result.push_str(group); - } - // 1 digit group, place next_next_chr afterwards - result.push(next_next_chr); - } - None => { - // 1 digit group - let n = next_chr.to_digit(10).unwrap() as usize; - if n == 0 || n > m { - result.push(chr); - result.push(next_chr); - } else { - let group = match caps.get(n) { - Some(text) => text.as_str(), - None => "", - }; - result.push_str(group); - } - } - } + } + (_, _) if second_is_digit => { + // $n + let n = second.unwrap().to_digit(10).unwrap() as usize; + if n == 0 || n > m { + [Some(first), second] + .iter() + .flatten() + .for_each(|ch: &char| result.push(*ch)); + } else { + let group = match caps.get(n) { + Some(text) => text.as_str(), + None => "", + }; + result.push_str(group); } - '<' => unimplemented!(), - _ => result.push(chr), - }, - None => { - result.push(chr); } - }, - _ => result.push(chr), + (Some('<'), _) => { + // $< + todo!("named capture groups") + } + _ => { + // $?, ? is none of the above + // we can consume second because it isn't $ + result.push(first); + second.iter().for_each(|ch: &char| result.push(*ch)); + } + } + } else { + result.push(first); } } From 6cbc36637074133758685a60c862ffa8b9e0752f Mon Sep 17 00:00:00 2001 From: RageKnify Date: Tue, 25 Aug 2020 12:28:51 +0100 Subject: [PATCH 6/8] Fix: String.prototype.replace to not panic if no match Introduce early return if `regexp|substr` isn't found in `this` Add test case covering this situation --- boa/src/builtins/string/mod.rs | 5 ++++- boa/src/builtins/string/tests.rs | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 78dd16f3a91..0c00cbdb517 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -468,7 +468,10 @@ impl String { let regex_body = Self::get_regex_string(args.get(0).expect("Value needed")); let re = Regex::new(®ex_body).expect("unable to convert regex to regex object"); - let mat = re.find(&primitive_val).expect("unable to find value"); + let mat = match re.find(&primitive_val) { + Some(mat) => mat, + None => return Ok(Value::from(primitive_val)), + }; let caps = re .captures(&primitive_val) .expect("unable to get capture groups from text"); diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index fcdc8a01908..3666f4a1cbc 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -218,6 +218,20 @@ fn replace() { assert_eq!(forward(&mut engine, "a"), "\"2bc\""); } +#[test] +fn replace_no_match() { + let realm = Realm::create(); + let mut engine = Interpreter::new(realm); + let init = r#" + var a = "abc"; + a = a.replace(/d/, "$&$&"); + "#; + + forward(&mut engine, init); + + assert_eq!(forward(&mut engine, "a"), "\"abc\""); +} + #[test] fn replace_with_capture_groups() { let realm = Realm::create(); From 1ca08284d3bf6de71f2a644ec3681a7c7acda172 Mon Sep 17 00:00:00 2001 From: RageKnify Date: Tue, 1 Sep 2020 15:26:01 +0100 Subject: [PATCH 7/8] Refactor: Cleanup String.prototype.replace Sugestions by @HalidOdat in boa-dev/boa#629 --- boa/src/builtins/string/mod.rs | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index 0c00cbdb517..c11b6e99f84 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -490,10 +490,10 @@ impl String { while let Some(first) = chars.next() { if first == '$' { let second = chars.next(); - let second_is_digit = second.map(|ch| ch.is_digit(10)).unwrap_or(false); + let second_is_digit = second.map_or(false, |ch| ch.is_digit(10)); // we use peek so that it is still in the iterator if not used let third = if second_is_digit { chars.peek() } else { None }; - let third_is_digit = third.map(|ch| ch.is_digit(10)).unwrap_or(false); + let third_is_digit = third.map_or(false, |ch| ch.is_digit(10)); match (second, third) { (Some('$'), _) => { @@ -508,22 +508,22 @@ impl String { (Some('`'), _) => { // $` let start_of_match = mat.start(); - let slice = &primitive_val[..start_of_match]; - result.push_str(slice); + result.push_str(&primitive_val[..start_of_match]); } (Some('\''), _) => { // $' let end_of_match = mat.end(); - let slice = &primitive_val[end_of_match..]; - result.push_str(slice); + result.push_str(&primitive_val[end_of_match..]); } - (_, _) if second_is_digit && third_is_digit => { + (Some(second), Some(third)) + if second_is_digit && third_is_digit => + { // $nn - let tens = second.unwrap().to_digit(10).unwrap() as usize; - let units = third.unwrap().to_digit(10).unwrap() as usize; + let tens = second.to_digit(10).unwrap() as usize; + let units = third.to_digit(10).unwrap() as usize; let nn = 10 * tens + units; if nn == 0 || nn > m { - [Some(first), second, chars.next()] + [Some(first), Some(second), chars.next()] .iter() .flatten() .for_each(|ch: &char| result.push(*ch)); @@ -536,14 +536,12 @@ impl String { chars.next(); // consume third } } - (_, _) if second_is_digit => { + (Some(second), _) if second_is_digit => { // $n - let n = second.unwrap().to_digit(10).unwrap() as usize; + let n = second.to_digit(10).unwrap() as usize; if n == 0 || n > m { - [Some(first), second] - .iter() - .flatten() - .for_each(|ch: &char| result.push(*ch)); + result.push(first); + result.push(second); } else { let group = match caps.get(n) { Some(text) => text.as_str(), From 54df20896c18ea341a2cb7889ffc80cecfd421eb Mon Sep 17 00:00:00 2001 From: RageKnify Date: Tue, 1 Sep 2020 16:48:44 +0100 Subject: [PATCH 8/8] Refactor: Cleanup String.prototype.replace Sugestions by @HalidOdat in boa-dev/boa#629 --- boa/src/builtins/string/mod.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/boa/src/builtins/string/mod.rs b/boa/src/builtins/string/mod.rs index c11b6e99f84..c104dfed0fe 100644 --- a/boa/src/builtins/string/mod.rs +++ b/boa/src/builtins/string/mod.rs @@ -523,10 +523,11 @@ impl String { let units = third.to_digit(10).unwrap() as usize; let nn = 10 * tens + units; if nn == 0 || nn > m { - [Some(first), Some(second), chars.next()] - .iter() - .flatten() - .for_each(|ch: &char| result.push(*ch)); + result.push(first); + result.push(second); + if let Some(ch) = chars.next() { + result.push(ch); + } } else { let group = match caps.get(nn) { Some(text) => text.as_str(), @@ -558,7 +559,9 @@ impl String { // $?, ? is none of the above // we can consume second because it isn't $ result.push(first); - second.iter().for_each(|ch: &char| result.push(*ch)); + if let Some(second) = second { + result.push(second); + } } } } else {