From 038acb49897f8a0faf28a9976bec9dd058763bfe Mon Sep 17 00:00:00 2001 From: Jevan Chan Date: Mon, 1 Feb 2021 08:48:44 -0800 Subject: [PATCH] Refactor StringLiteral (#1084) Fix octal escape in string literal Add tests Fix zero escape Fix zero escape lookahead Rename variables Rename helper functions Refactor match arms Fix escape line terminator sequence Fix single character escape Fix line terminator and escape followed by unicode char Fix broken tests Add NonOctalDecimalEscapeSequence Fix comment Refactor Modify error message Add tests Rename tests Add test for error Add comments for unsafe bytes to str Update boa/src/syntax/lexer/string.rs Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com> Minor refactor Remove unsafe bytes to str Fix panic when reading invalid utf-8 chars Refactor string literal Support invalid utf-8 chars in string literal input Add cook function for template literal Fix line continuation bug Add methods for utf16 buffer trait Add trait comments Add error message for template literal Add and fix comments Hide unused exported function and modify tests Fix bug Fix merge bug --- boa/src/builtins/json/tests.rs | 53 +++---- boa/src/builtins/string/tests.rs | 36 +++-- boa/src/syntax/lexer/string.rs | 258 ++++++++++++++++--------------- boa/src/syntax/lexer/template.rs | 94 +++++++---- boa/src/syntax/lexer/tests.rs | 162 ++++++++----------- 5 files changed, 304 insertions(+), 299 deletions(-) diff --git a/boa/src/builtins/json/tests.rs b/boa/src/builtins/json/tests.rs index abf4f5a1144..aec393982b1 100644 --- a/boa/src/builtins/json/tests.rs +++ b/boa/src/builtins/json/tests.rs @@ -217,10 +217,10 @@ fn json_stringify_pretty_print() { ); let expected = forward( &mut context, - r#"'{ - "a": "b", - "b": "c" -}'"#, + r#"'{\n' + +' "a": "b",\n' + +' "b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -235,10 +235,10 @@ fn json_stringify_pretty_print_four_spaces() { ); let expected = forward( &mut context, - r#"'{ - "a": "b", - "b": "c" -}'"#, + r#"'{\n' + +' "a": "b",\n' + +' "b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -253,10 +253,10 @@ fn json_stringify_pretty_print_twenty_spaces() { ); let expected = forward( &mut context, - r#"'{ - "a": "b", - "b": "c" -}'"#, + r#"'{\n' + +' "a": "b",\n' + +' "b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -271,10 +271,10 @@ fn json_stringify_pretty_print_with_number_object() { ); let expected = forward( &mut context, - r#"'{ - "a": "b", - "b": "c" -}'"#, + r#"'{\n' + +' "a": "b",\n' + +' "b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -301,10 +301,10 @@ fn json_stringify_pretty_print_with_too_long_string() { ); let expected = forward( &mut context, - r#"'{ -abcdefghij"a": "b", -abcdefghij"b": "c" -}'"#, + r#"'{\n' + +'abcdefghij"a": "b",\n' + +'abcdefghij"b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -319,10 +319,10 @@ fn json_stringify_pretty_print_with_string_object() { ); let expected = forward( &mut context, - r#"'{ -abcd"a": "b", -abcd"b": "c" -}'"#, + r#"'{\n' + +'abcd"a": "b",\n' + +'abcd"b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -404,10 +404,7 @@ fn json_parse_object_with_reviver() { fn json_parse_sets_prototypes() { let mut context = Context::new(); let init = r#" - const jsonString = "{ - \"ob\":{\"ject\":1}, - \"arr\": [0,1] - }"; + const jsonString = "{\"ob\":{\"ject\":1},\"arr\": [0,1]}"; const jsonObj = JSON.parse(jsonString); "#; eprintln!("{}", forward(&mut context, init)); diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index e94d0636aff..0e5f0791600 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -533,34 +533,46 @@ fn test_match() { #[test] fn trim() { let mut context = Context::new(); - assert_eq!(forward(&mut context, "'Hello'.trim()"), "\"Hello\""); - assert_eq!(forward(&mut context, "' \nHello'.trim()"), "\"Hello\""); - assert_eq!(forward(&mut context, "'Hello \n\r'.trim()"), "\"Hello\""); - assert_eq!(forward(&mut context, "' Hello '.trim()"), "\"Hello\""); + assert_eq!(forward(&mut context, r#"'Hello'.trim()"#), "\"Hello\""); + assert_eq!(forward(&mut context, r#"' \nHello'.trim()"#), "\"Hello\""); + assert_eq!(forward(&mut context, r#"'Hello \n\r'.trim()"#), "\"Hello\""); + assert_eq!(forward(&mut context, r#"' Hello '.trim()"#), "\"Hello\""); } #[test] fn trim_start() { let mut context = Context::new(); - assert_eq!(forward(&mut context, "'Hello'.trimStart()"), "\"Hello\""); - assert_eq!(forward(&mut context, "' \nHello'.trimStart()"), "\"Hello\""); + assert_eq!(forward(&mut context, r#"'Hello'.trimStart()"#), "\"Hello\""); assert_eq!( - forward(&mut context, "'Hello \n'.trimStart()"), + forward(&mut context, r#"' \nHello'.trimStart()"#), + "\"Hello\"" + ); + assert_eq!( + forward(&mut context, r#"'Hello \n'.trimStart()"#), "\"Hello \n\"" ); - assert_eq!(forward(&mut context, "' Hello '.trimStart()"), "\"Hello \""); + assert_eq!( + forward(&mut context, r#"' Hello '.trimStart()"#), + "\"Hello \"" + ); } #[test] fn trim_end() { let mut context = Context::new(); - assert_eq!(forward(&mut context, "'Hello'.trimEnd()"), "\"Hello\""); + assert_eq!(forward(&mut context, r#"'Hello'.trimEnd()"#), "\"Hello\""); assert_eq!( - forward(&mut context, "' \nHello'.trimEnd()"), + forward(&mut context, r#"' \nHello'.trimEnd()"#), "\" \nHello\"" ); - assert_eq!(forward(&mut context, "'Hello \n'.trimEnd()"), "\"Hello\""); - assert_eq!(forward(&mut context, "' Hello '.trimEnd()"), "\" Hello\""); + assert_eq!( + forward(&mut context, r#"'Hello \n'.trimEnd()"#), + "\"Hello\"" + ); + assert_eq!( + forward(&mut context, r#"' Hello '.trimEnd()"#), + "\" Hello\"" + ); } #[test] diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index b4542a70d41..d6b5049edb8 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -8,7 +8,6 @@ use crate::{ lexer::{Token, TokenKind}, }, }; -use core::convert::TryFrom; use std::{ io::{self, ErrorKind, Read}, str, @@ -47,7 +46,34 @@ impl StringLiteral { pub(crate) enum StringTerminator { SingleQuote, DoubleQuote, - End, +} + +/// Extends a buffer type to store UTF-16 code units and convert to string. +pub(crate) trait UTF16CodeUnitsBuffer { + /// Encodes the code point to UTF-16 code units and push to the buffer. + fn push_code_point(&mut self, code_point: u32); + + /// Decodes the buffer into a String and replace the invalid data with the replacement character (U+FFFD). + fn to_string_lossy(&self) -> String; +} + +impl UTF16CodeUnitsBuffer for Vec { + #[inline] + fn push_code_point(&mut self, code_point: u32) { + if code_point <= 65535 { + self.push(code_point as u16); + } else { + let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16; + let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16; + self.push(cu1); + self.push(cu2); + } + } + + #[inline] + fn to_string_lossy(&self) -> String { + String::from_utf16_lossy(self.as_slice()) + } } impl Tokenizer for StringLiteral { @@ -72,18 +98,19 @@ impl StringLiteral { /// /// [spec]: https://tc39.es/ecma262/#prod-LineTerminator #[inline] - pub(super) fn is_line_terminator(ch: char) -> bool { + pub(super) fn is_line_terminator(ch: u32) -> bool { matches!( ch, - '\u{000A}' /* */ | '\u{000D}' /* */ | '\u{2028}' /* */ | '\u{2029}' /* */ + 0x000A /* */ | 0x000D /* */ | 0x2028 /* */ | 0x2029 /* */ ) } - pub(super) fn take_string_characters( + #[inline] + fn take_string_characters( cursor: &mut Cursor, start_pos: Position, terminator: StringTerminator, - strict_mode: bool, + is_strict_mode: bool, ) -> Result<(String, Span), Error> where R: Read, @@ -91,97 +118,25 @@ impl StringLiteral { let mut buf = Vec::new(); loop { let ch_start_pos = cursor.pos(); - let ch = cursor.next_char()?.map(char::try_from).transpose().unwrap(); + let ch = cursor.next_char()?; match ch { - Some('\'') if terminator == StringTerminator::SingleQuote => { - break; - } - Some('"') if terminator == StringTerminator::DoubleQuote => { - break; - } - None if terminator == StringTerminator::End => { - break; - } - Some('\\') => { + Some(0x0027 /* ' */) if terminator == StringTerminator::SingleQuote => break, + Some(0x0022 /* " */) if terminator == StringTerminator::DoubleQuote => break, + Some(0x005C /* \ */) => { let _timer = BoaProfiler::global() .start_event("StringLiteral - escape sequence", "Lexing"); - let escape_ch = cursor - .next_char()? - .and_then(|byte| char::try_from(byte).ok()) - .ok_or_else(|| { - Error::from(io::Error::new( - ErrorKind::UnexpectedEof, - "unterminated escape sequence in literal", - )) - })?; - - match escape_ch { - 'b' => buf.push(0x0008 /* */), - 't' => buf.push(0x0009 /* */), - 'n' => buf.push(0x000A /* */), - 'v' => buf.push(0x000B /* */), - 'f' => buf.push(0x000C /* */), - 'r' => buf.push(0x000D /* */), - '"' => buf.push(0x0022 /* " */), - '\'' => buf.push(0x0027 /* ' */), - '\\' => buf.push(0x005C /* \ */), - '0' if cursor - .peek()? - .filter(|next_byte| (b'0'..=b'9').contains(next_byte)) - .is_none() => - { - buf.push(0x0000 /* NULL */) - } - 'x' => { - Self::take_hex_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?; - } - 'u' => { - Self::take_unicode_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?; - } - '8' | '9' => { - // Grammar: NonOctalDecimalEscapeSequence - if strict_mode { - return Err(Error::syntax( - "\\8 and \\9 are not allowed in strict mode", - ch_start_pos, - )); - } else { - buf.push(escape_ch as u16); - } - } - _ if escape_ch.is_digit(8) => { - Self::take_legacy_octal_escape_sequence( - cursor, - ch_start_pos, - Some(&mut buf), - strict_mode, - escape_ch as u8, - )?; - } - _ if Self::is_line_terminator(escape_ch) => { - // Grammar: LineContinuation - // Grammar: \ LineTerminatorSequence - // LineContinuation is the empty String. Do nothing and continue lexing. - } - _ => { - if escape_ch.len_utf16() == 1 { - buf.push(escape_ch as u16); - } else { - buf.extend(escape_ch.encode_utf16(&mut [0u16; 2]).iter()); - } - } - }; - } - Some(ch) => { - if ch.len_utf16() == 1 { - buf.push(ch as u16); - } else { - buf.extend(ch.encode_utf16(&mut [0u16; 2]).iter()); + if let Some(escape_value) = Self::take_escape_sequence_or_line_continuation(cursor, ch_start_pos, is_strict_mode, false)? { + buf.push_code_point(escape_value); } } - None => { + Some(0x2028) => buf.push(0x2028 /* */), + Some(0x2029) => buf.push(0x2029 /* */), + Some(ch) if !Self::is_line_terminator(ch) => { + buf.push_code_point(ch); + } + _ => { return Err(Error::from(io::Error::new( ErrorKind::UnexpectedEof, "unterminated string literal", @@ -190,17 +145,99 @@ impl StringLiteral { } } - Ok(( - String::from_utf16_lossy(buf.as_slice()), - Span::new(start_pos, cursor.pos()), - )) + Ok((buf.to_string_lossy(), Span::new(start_pos, cursor.pos()))) + } + + #[inline] + pub(super) fn take_escape_sequence_or_line_continuation( + cursor: &mut Cursor, + start_pos: Position, + is_strict_mode: bool, + is_template_literal: bool, + ) -> Result, Error> + where + R: Read, + { + let escape_ch = cursor.next_char()?.ok_or_else(|| { + Error::from(io::Error::new( + ErrorKind::UnexpectedEof, + "unterminated escape sequence in literal", + )) + })?; + + let escape_value = match escape_ch { + 0x0062 /* b */ => Some(0x0008 /* */), + 0x0074 /* t */ => Some(0x0009 /* */), + 0x006E /* n */ => Some(0x000A /* */), + 0x0076 /* v */ => Some(0x000B /* */), + 0x0066 /* f */ => Some(0x000C /* */), + 0x0072 /* r */ => Some(0x000D /* */), + 0x0022 /* " */ => Some(0x0022 /* " */), + 0x0027 /* ' */ => Some(0x0027 /* ' */), + 0x005C /* \ */ => Some(0x005C /* \ */), + 0x0030 /* 0 */ if cursor + .peek()? + .filter(|next_byte| (b'0'..=b'9').contains(next_byte)) + .is_none() => + Some(0x0000 /* NULL */), + 0x0078 /* x */ => { + Some(Self::take_hex_escape_sequence(cursor, start_pos)?) + } + 0x0075 /* u */ => { + Some(Self::take_unicode_escape_sequence(cursor, start_pos)?) + } + 0x0038 /* 8 */ | 0x0039 /* 9 */ => { + // Grammar: NonOctalDecimalEscapeSequence + if is_template_literal { + return Err(Error::syntax( + "\\8 and \\9 are not allowed in template literal", + start_pos, + )); + } else if is_strict_mode { + return Err(Error::syntax( + "\\8 and \\9 are not allowed in strict mode", + start_pos, + )); + } else { + Some(escape_ch) + } + } + _ if (0x0030..=0x0037 /* '0'..='7' */).contains(&escape_ch) => { + if is_template_literal { + return Err(Error::syntax( + "octal escape sequences are not allowed in template literal", + start_pos, + )); + } else if is_strict_mode { + return Err(Error::syntax( + "octal escape sequences are not allowed in strict mode", + start_pos, + )); + } else { + Some(Self::take_legacy_octal_escape_sequence( + cursor, + escape_ch as u8, + )?) + } + } + _ if Self::is_line_terminator(escape_ch) => { + // Grammar: LineContinuation + // Grammar: \ LineTerminatorSequence + // LineContinuation is the empty String. + None + } + _ => { + Some(escape_ch) + } + }; + + Ok(escape_value) } #[inline] pub(super) fn take_unicode_escape_sequence( cursor: &mut Cursor, start_pos: Position, - code_units_buf: Option<&mut Vec>, ) -> Result where R: Read, @@ -227,15 +264,6 @@ impl StringLiteral { "Unicode codepoint must not be greater than 0x10FFFF in escape sequence", start_pos, )); - } else if let Some(code_units_buf) = code_units_buf { - if code_point <= 65535 { - code_units_buf.push(code_point as u16); - } else { - let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16; - let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16; - code_units_buf.push(cu1); - code_units_buf.push(cu2); - } } Ok(code_point) @@ -251,10 +279,6 @@ impl StringLiteral { .and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok()) .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?; - if let Some(code_units_buf) = code_units_buf { - code_units_buf.push(code_point); - } - Ok(code_point as u32) } } @@ -263,7 +287,6 @@ impl StringLiteral { fn take_hex_escape_sequence( cursor: &mut Cursor, start_pos: Position, - code_units_buf: Option<&mut Vec>, ) -> Result where R: Read, @@ -275,30 +298,17 @@ impl StringLiteral { .and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok()) .ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?; - if let Some(code_units_buf) = code_units_buf { - code_units_buf.push(code_point); - } - Ok(code_point as u32) } #[inline] fn take_legacy_octal_escape_sequence( cursor: &mut Cursor, - start_pos: Position, - code_units_buf: Option<&mut Vec>, - strict_mode: bool, init_byte: u8, ) -> Result where R: Read, { - if strict_mode { - return Err(Error::syntax( - "octal escape sequences are not allowed in strict mode", - start_pos, - )); - } // Grammar: OctalDigit let mut code_point = (init_byte - b'0') as u32; @@ -321,10 +331,6 @@ impl StringLiteral { } } - if let Some(code_units_buf) = code_units_buf { - code_units_buf.push(code_point as u16); - } - Ok(code_point) } } diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs index ecec7a7387f..9636d1c849b 100644 --- a/boa/src/syntax/lexer/template.rs +++ b/boa/src/syntax/lexer/template.rs @@ -3,13 +3,12 @@ use super::{Cursor, Error, Tokenizer}; use crate::{ profiler::BoaProfiler, - syntax::lexer::string::{StringLiteral, StringTerminator}, + syntax::lexer::string::{StringLiteral, UTF16CodeUnitsBuffer}, syntax::{ ast::{Position, Span}, lexer::{Token, TokenKind}, }, }; -use std::convert::TryFrom; use std::io::{self, ErrorKind, Read}; /// Template literal lexing. @@ -34,65 +33,92 @@ impl Tokenizer for TemplateLiteral { let mut buf = Vec::new(); loop { - let next_chr = char::try_from(cursor.next_char()?.ok_or_else(|| { + let ch = cursor.next_char()?.ok_or_else(|| { Error::from(io::Error::new( ErrorKind::UnexpectedEof, "unterminated template literal", )) - })?) - .unwrap(); - match next_chr { - '`' => { - let raw = String::from_utf16_lossy(buf.as_slice()); - let (cooked, _) = StringLiteral::take_string_characters( - &mut Cursor::with_position(raw.as_bytes(), start_pos), - start_pos, - StringTerminator::End, - true, - )?; + })?; + + match ch { + 0x0060 /* ` */ => { + let raw = buf.to_string_lossy(); + // TODO: Cook the raw string only when needed (lazy evaluation) + let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?; + return Ok(Token::new( TokenKind::template_no_substitution(raw, cooked), Span::new(start_pos, cursor.pos()), )); } - '$' if cursor.peek()? == Some(b'{') => { - let _ = cursor.next_byte()?; - let raw = String::from_utf16_lossy(buf.as_slice()); - let (cooked, _) = StringLiteral::take_string_characters( - &mut Cursor::with_position(raw.as_bytes(), start_pos), - start_pos, - StringTerminator::End, - true, - )?; + 0x0024 /* $ */ if cursor.next_is(b'{')? => { + let raw = buf.to_string_lossy(); + // TODO: Cook the raw string only when needed (lazy evaluation) + let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?; + return Ok(Token::new( TokenKind::template_middle(raw, cooked), Span::new(start_pos, cursor.pos()), )); } - '\\' => { - let escape = cursor.peek()?.ok_or_else(|| { + 0x005C /* \ */ => { + let escape_ch = cursor.peek()?.ok_or_else(|| { Error::from(io::Error::new( ErrorKind::UnexpectedEof, "unterminated escape sequence in literal", )) })?; - buf.push('\\' as u16); - match escape { + + buf.push(b'\\' as u16); + match escape_ch { b'`' | b'$' | b'\\' => buf.push(cursor.next_byte()?.unwrap() as u16), _ => continue, } } - next_ch => { - if next_ch.len_utf16() == 1 { - buf.push(next_ch as u16); - } else { - let mut code_point_bytes_buf = [0u16; 2]; - let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf); + ch => { + buf.push_code_point(ch); + } + } + } + } +} - buf.extend(code_point_bytes.iter()); +impl TemplateLiteral { + fn cook_template_string( + raw: &str, + start_pos: Position, + is_strict_mode: bool, + ) -> Result { + let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos); + let mut buf: Vec = Vec::new(); + + loop { + let ch_start_pos = cursor.pos(); + let ch = cursor.next_char()?; + + match ch { + Some(0x005C /* \ */) => { + if let Some(escape_value) = + StringLiteral::take_escape_sequence_or_line_continuation( + &mut cursor, + ch_start_pos, + is_strict_mode, + true, + )? + { + buf.push_code_point(escape_value); } } + Some(ch) => { + // The caller guarantees that sequences '`' and '${' never appear + // LineTerminatorSequence is consumed by `cursor.next_char()` and returns , + // which matches the TV of + buf.push_code_point(ch); + } + None => break, } } + + Ok(buf.to_string_lossy()) } } diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs index 7ef4a34bc04..2cafc5c9000 100644 --- a/boa/src/syntax/lexer/tests.rs +++ b/boa/src/syntax/lexer/tests.rs @@ -6,7 +6,6 @@ use super::token::Numeric; use super::*; use super::{Error, Position}; use crate::syntax::ast::Keyword; -use crate::syntax::lexer::string::{StringLiteral, StringTerminator}; use std::str; fn span(start: (u32, u32), end: (u32, u32)) -> Span { @@ -815,9 +814,9 @@ fn illegal_code_point_following_numeric_literal() { #[test] fn string_unicode() { - let str = r#"'中文';"#; + let s = r#"'中文';"#; - let mut lexer = Lexer::new(str.as_bytes()); + let mut lexer = Lexer::new(s.as_bytes()); let expected = [ TokenKind::StringLiteral("中文".into()), @@ -859,74 +858,56 @@ fn string_unicode_escape_with_braces() { } #[test] -fn take_string_characters_unicode_escape_with_braces_2() { - let s = r#"\u{20ac}\u{a0}\u{a0}"#.to_string(); - - let mut cursor = Cursor::new(s.as_bytes()); - - if let Ok((s, _)) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) { - assert_eq!(s, "\u{20ac}\u{a0}\u{a0}") - } else { - panic!(); - } +fn string_unicode_escape_with_braces_2() { + let s = r#"'\u{20ac}\u{a0}\u{a0}'"#; + + let mut lexer = Lexer::new(s.as_bytes()); + + let expected = [TokenKind::StringLiteral("\u{20ac}\u{a0}\u{a0}".into())]; + + expect_tokens(&mut lexer, &expected); } #[test] -fn take_string_characters_with_single_escape() { - let s = r#"\Б"#.to_string(); - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); - assert_eq!(s, "Б"); +fn string_with_single_escape() { + let s = r#"'\Б'"#; + + let mut lexer = Lexer::new(s.as_bytes()); + + let expected = [TokenKind::StringLiteral("Б".into())]; + + expect_tokens(&mut lexer, &expected); } #[test] -fn take_string_characters_legacy_octal_escape() { +fn string_legacy_octal_escape() { let test_cases = [ - (r#"\3"#, "\u{3}"), - (r#"\03"#, "\u{3}"), - (r#"\003"#, "\u{3}"), - (r#"\0003"#, "\u{0}3"), - (r#"\43"#, "#"), - (r#"\043"#, "#"), - (r#"\101"#, "A"), + (r#"'\3'"#, "\u{3}"), + (r#"'\03'"#, "\u{3}"), + (r#"'\003'"#, "\u{3}"), + (r#"'\0003'"#, "\u{0}3"), + (r#"'\43'"#, "#"), + (r#"'\043'"#, "#"), + (r#"'\101'"#, "A"), ]; for (s, expected) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); + let mut lexer = Lexer::new(s.as_bytes()); + + let expected_tokens = [TokenKind::StringLiteral((*expected).into())]; - assert_eq!(s, *expected); + expect_tokens(&mut lexer, &expected_tokens); } for (s, _) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - - if let Error::Syntax(_, pos) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - true, - ) - .expect_err("Octal-escape in strict mode not rejected as expected") + let mut lexer = Lexer::new(s.as_bytes()); + lexer.set_strict_mode(true); + + if let Error::Syntax(_, pos) = lexer + .next() + .expect_err("Octal-escape in strict mode not rejected as expected") { - assert_eq!(pos, Position::new(1, 1)); + assert_eq!(pos, Position::new(1, 2)); } else { panic!("invalid error type"); } @@ -934,52 +915,39 @@ fn take_string_characters_legacy_octal_escape() { } #[test] -fn take_string_characters_zero_escape() { - let test_cases = [(r#"\0"#, "\u{0}"), (r#"\0A"#, "\u{0}A")]; +fn string_zero_escape() { + let test_cases = [(r#"'\0'"#, "\u{0}"), (r#"'\0A'"#, "\u{0}A")]; for (s, expected) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); + let mut lexer = Lexer::new(s.as_bytes()); + + let expected_tokens = [TokenKind::StringLiteral((*expected).into())]; - assert_eq!(s, *expected); + expect_tokens(&mut lexer, &expected_tokens); } } #[test] -fn take_string_characters_non_octal_decimal_escape() { - let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")]; +fn string_non_octal_decimal_escape() { + let test_cases = [(r#"'\8'"#, "8"), (r#"'\9'"#, "9")]; for (s, expected) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); + let mut lexer = Lexer::new(s.as_bytes()); - assert_eq!(s, *expected); + let expected_tokens = [TokenKind::StringLiteral((*expected).into())]; + + expect_tokens(&mut lexer, &expected_tokens); } for (s, _) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - - if let Error::Syntax(_, pos) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - true, - ) - .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected") + let mut lexer = Lexer::new(s.as_bytes()); + lexer.set_strict_mode(true); + + if let Error::Syntax(_, pos) = lexer + .next() + .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected") { - assert_eq!(pos, Position::new(1, 1)); + assert_eq!(pos, Position::new(1, 2)); } else { panic!("invalid error type"); } @@ -987,18 +955,14 @@ fn take_string_characters_non_octal_decimal_escape() { } #[test] -fn take_string_characters_line_continuation() { - let s = "hello \\\nworld"; - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); +fn string_line_continuation() { + let s = "'hello \\\nworld'"; + + let mut lexer = Lexer::new(s.as_bytes()); + + let expected_tokens = [TokenKind::StringLiteral("hello world".into())]; - assert_eq!(s, "hello world"); + expect_tokens(&mut lexer, &expected_tokens); } mod carriage_return {