diff --git a/boa/src/builtins/json/tests.rs b/boa/src/builtins/json/tests.rs index abf4f5a1144..aec393982b1 100644 --- a/boa/src/builtins/json/tests.rs +++ b/boa/src/builtins/json/tests.rs @@ -217,10 +217,10 @@ fn json_stringify_pretty_print() { ); let expected = forward( &mut context, - r#"'{ - "a": "b", - "b": "c" -}'"#, + r#"'{\n' + +' "a": "b",\n' + +' "b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -235,10 +235,10 @@ fn json_stringify_pretty_print_four_spaces() { ); let expected = forward( &mut context, - r#"'{ - "a": "b", - "b": "c" -}'"#, + r#"'{\n' + +' "a": "b",\n' + +' "b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -253,10 +253,10 @@ fn json_stringify_pretty_print_twenty_spaces() { ); let expected = forward( &mut context, - r#"'{ - "a": "b", - "b": "c" -}'"#, + r#"'{\n' + +' "a": "b",\n' + +' "b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -271,10 +271,10 @@ fn json_stringify_pretty_print_with_number_object() { ); let expected = forward( &mut context, - r#"'{ - "a": "b", - "b": "c" -}'"#, + r#"'{\n' + +' "a": "b",\n' + +' "b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -301,10 +301,10 @@ fn json_stringify_pretty_print_with_too_long_string() { ); let expected = forward( &mut context, - r#"'{ -abcdefghij"a": "b", -abcdefghij"b": "c" -}'"#, + r#"'{\n' + +'abcdefghij"a": "b",\n' + +'abcdefghij"b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -319,10 +319,10 @@ fn json_stringify_pretty_print_with_string_object() { ); let expected = forward( &mut context, - r#"'{ -abcd"a": "b", -abcd"b": "c" -}'"#, + r#"'{\n' + +'abcd"a": "b",\n' + +'abcd"b": "c"\n' + +'}'"#, ); assert_eq!(actual, expected); } @@ -404,10 +404,7 @@ fn json_parse_object_with_reviver() { fn json_parse_sets_prototypes() { let mut context = Context::new(); let init = r#" - const jsonString = "{ - \"ob\":{\"ject\":1}, - \"arr\": [0,1] - }"; + const jsonString = "{\"ob\":{\"ject\":1},\"arr\": [0,1]}"; const jsonObj = JSON.parse(jsonString); "#; eprintln!("{}", forward(&mut context, init)); diff --git a/boa/src/builtins/string/tests.rs b/boa/src/builtins/string/tests.rs index e94d0636aff..0e5f0791600 100644 --- a/boa/src/builtins/string/tests.rs +++ b/boa/src/builtins/string/tests.rs @@ -533,34 +533,46 @@ fn test_match() { #[test] fn trim() { let mut context = Context::new(); - assert_eq!(forward(&mut context, "'Hello'.trim()"), "\"Hello\""); - assert_eq!(forward(&mut context, "' \nHello'.trim()"), "\"Hello\""); - assert_eq!(forward(&mut context, "'Hello \n\r'.trim()"), "\"Hello\""); - assert_eq!(forward(&mut context, "' Hello '.trim()"), "\"Hello\""); + assert_eq!(forward(&mut context, r#"'Hello'.trim()"#), "\"Hello\""); + assert_eq!(forward(&mut context, r#"' \nHello'.trim()"#), "\"Hello\""); + assert_eq!(forward(&mut context, r#"'Hello \n\r'.trim()"#), "\"Hello\""); + assert_eq!(forward(&mut context, r#"' Hello '.trim()"#), "\"Hello\""); } #[test] fn trim_start() { let mut context = Context::new(); - assert_eq!(forward(&mut context, "'Hello'.trimStart()"), "\"Hello\""); - assert_eq!(forward(&mut context, "' \nHello'.trimStart()"), "\"Hello\""); + assert_eq!(forward(&mut context, r#"'Hello'.trimStart()"#), "\"Hello\""); assert_eq!( - forward(&mut context, "'Hello \n'.trimStart()"), + forward(&mut context, r#"' \nHello'.trimStart()"#), + "\"Hello\"" + ); + assert_eq!( + forward(&mut context, r#"'Hello \n'.trimStart()"#), "\"Hello \n\"" ); - assert_eq!(forward(&mut context, "' Hello '.trimStart()"), "\"Hello \""); + assert_eq!( + forward(&mut context, r#"' Hello '.trimStart()"#), + "\"Hello \"" + ); } #[test] fn trim_end() { let mut context = Context::new(); - assert_eq!(forward(&mut context, "'Hello'.trimEnd()"), "\"Hello\""); + assert_eq!(forward(&mut context, r#"'Hello'.trimEnd()"#), "\"Hello\""); assert_eq!( - forward(&mut context, "' \nHello'.trimEnd()"), + forward(&mut context, r#"' \nHello'.trimEnd()"#), "\" \nHello\"" ); - assert_eq!(forward(&mut context, "'Hello \n'.trimEnd()"), "\"Hello\""); - assert_eq!(forward(&mut context, "' Hello '.trimEnd()"), "\" Hello\""); + assert_eq!( + forward(&mut context, r#"'Hello \n'.trimEnd()"#), + "\"Hello\"" + ); + assert_eq!( + forward(&mut context, r#"' Hello '.trimEnd()"#), + "\" Hello\"" + ); } #[test] diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs index b4542a70d41..d6b5049edb8 100644 --- a/boa/src/syntax/lexer/string.rs +++ b/boa/src/syntax/lexer/string.rs @@ -8,7 +8,6 @@ use crate::{ lexer::{Token, TokenKind}, }, }; -use core::convert::TryFrom; use std::{ io::{self, ErrorKind, Read}, str, @@ -47,7 +46,34 @@ impl StringLiteral { pub(crate) enum StringTerminator { SingleQuote, DoubleQuote, - End, +} + +/// Extends a buffer type to store UTF-16 code units and convert to string. +pub(crate) trait UTF16CodeUnitsBuffer { + /// Encodes the code point to UTF-16 code units and push to the buffer. + fn push_code_point(&mut self, code_point: u32); + + /// Decodes the buffer into a String and replace the invalid data with the replacement character (U+FFFD). + fn to_string_lossy(&self) -> String; +} + +impl UTF16CodeUnitsBuffer for Vec { + #[inline] + fn push_code_point(&mut self, code_point: u32) { + if code_point <= 65535 { + self.push(code_point as u16); + } else { + let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16; + let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16; + self.push(cu1); + self.push(cu2); + } + } + + #[inline] + fn to_string_lossy(&self) -> String { + String::from_utf16_lossy(self.as_slice()) + } } impl Tokenizer for StringLiteral { @@ -72,18 +98,19 @@ impl StringLiteral { /// /// [spec]: https://tc39.es/ecma262/#prod-LineTerminator #[inline] - pub(super) fn is_line_terminator(ch: char) -> bool { + pub(super) fn is_line_terminator(ch: u32) -> bool { matches!( ch, - '\u{000A}' /* */ | '\u{000D}' /* */ | '\u{2028}' /* */ | '\u{2029}' /* */ + 0x000A /* */ | 0x000D /* */ | 0x2028 /* */ | 0x2029 /* */ ) } - pub(super) fn take_string_characters( + #[inline] + fn take_string_characters( cursor: &mut Cursor, start_pos: Position, terminator: StringTerminator, - strict_mode: bool, + is_strict_mode: bool, ) -> Result<(String, Span), Error> where R: Read, @@ -91,97 +118,25 @@ impl StringLiteral { let mut buf = Vec::new(); loop { let ch_start_pos = cursor.pos(); - let ch = cursor.next_char()?.map(char::try_from).transpose().unwrap(); + let ch = cursor.next_char()?; match ch { - Some('\'') if terminator == StringTerminator::SingleQuote => { - break; - } - Some('"') if terminator == StringTerminator::DoubleQuote => { - break; - } - None if terminator == StringTerminator::End => { - break; - } - Some('\\') => { + Some(0x0027 /* ' */) if terminator == StringTerminator::SingleQuote => break, + Some(0x0022 /* " */) if terminator == StringTerminator::DoubleQuote => break, + Some(0x005C /* \ */) => { let _timer = BoaProfiler::global() .start_event("StringLiteral - escape sequence", "Lexing"); - let escape_ch = cursor - .next_char()? - .and_then(|byte| char::try_from(byte).ok()) - .ok_or_else(|| { - Error::from(io::Error::new( - ErrorKind::UnexpectedEof, - "unterminated escape sequence in literal", - )) - })?; - - match escape_ch { - 'b' => buf.push(0x0008 /* */), - 't' => buf.push(0x0009 /* */), - 'n' => buf.push(0x000A /* */), - 'v' => buf.push(0x000B /* */), - 'f' => buf.push(0x000C /* */), - 'r' => buf.push(0x000D /* */), - '"' => buf.push(0x0022 /* " */), - '\'' => buf.push(0x0027 /* ' */), - '\\' => buf.push(0x005C /* \ */), - '0' if cursor - .peek()? - .filter(|next_byte| (b'0'..=b'9').contains(next_byte)) - .is_none() => - { - buf.push(0x0000 /* NULL */) - } - 'x' => { - Self::take_hex_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?; - } - 'u' => { - Self::take_unicode_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?; - } - '8' | '9' => { - // Grammar: NonOctalDecimalEscapeSequence - if strict_mode { - return Err(Error::syntax( - "\\8 and \\9 are not allowed in strict mode", - ch_start_pos, - )); - } else { - buf.push(escape_ch as u16); - } - } - _ if escape_ch.is_digit(8) => { - Self::take_legacy_octal_escape_sequence( - cursor, - ch_start_pos, - Some(&mut buf), - strict_mode, - escape_ch as u8, - )?; - } - _ if Self::is_line_terminator(escape_ch) => { - // Grammar: LineContinuation - // Grammar: \ LineTerminatorSequence - // LineContinuation is the empty String. Do nothing and continue lexing. - } - _ => { - if escape_ch.len_utf16() == 1 { - buf.push(escape_ch as u16); - } else { - buf.extend(escape_ch.encode_utf16(&mut [0u16; 2]).iter()); - } - } - }; - } - Some(ch) => { - if ch.len_utf16() == 1 { - buf.push(ch as u16); - } else { - buf.extend(ch.encode_utf16(&mut [0u16; 2]).iter()); + if let Some(escape_value) = Self::take_escape_sequence_or_line_continuation(cursor, ch_start_pos, is_strict_mode, false)? { + buf.push_code_point(escape_value); } } - None => { + Some(0x2028) => buf.push(0x2028 /* */), + Some(0x2029) => buf.push(0x2029 /* */), + Some(ch) if !Self::is_line_terminator(ch) => { + buf.push_code_point(ch); + } + _ => { return Err(Error::from(io::Error::new( ErrorKind::UnexpectedEof, "unterminated string literal", @@ -190,17 +145,99 @@ impl StringLiteral { } } - Ok(( - String::from_utf16_lossy(buf.as_slice()), - Span::new(start_pos, cursor.pos()), - )) + Ok((buf.to_string_lossy(), Span::new(start_pos, cursor.pos()))) + } + + #[inline] + pub(super) fn take_escape_sequence_or_line_continuation( + cursor: &mut Cursor, + start_pos: Position, + is_strict_mode: bool, + is_template_literal: bool, + ) -> Result, Error> + where + R: Read, + { + let escape_ch = cursor.next_char()?.ok_or_else(|| { + Error::from(io::Error::new( + ErrorKind::UnexpectedEof, + "unterminated escape sequence in literal", + )) + })?; + + let escape_value = match escape_ch { + 0x0062 /* b */ => Some(0x0008 /* */), + 0x0074 /* t */ => Some(0x0009 /* */), + 0x006E /* n */ => Some(0x000A /* */), + 0x0076 /* v */ => Some(0x000B /* */), + 0x0066 /* f */ => Some(0x000C /* */), + 0x0072 /* r */ => Some(0x000D /* */), + 0x0022 /* " */ => Some(0x0022 /* " */), + 0x0027 /* ' */ => Some(0x0027 /* ' */), + 0x005C /* \ */ => Some(0x005C /* \ */), + 0x0030 /* 0 */ if cursor + .peek()? + .filter(|next_byte| (b'0'..=b'9').contains(next_byte)) + .is_none() => + Some(0x0000 /* NULL */), + 0x0078 /* x */ => { + Some(Self::take_hex_escape_sequence(cursor, start_pos)?) + } + 0x0075 /* u */ => { + Some(Self::take_unicode_escape_sequence(cursor, start_pos)?) + } + 0x0038 /* 8 */ | 0x0039 /* 9 */ => { + // Grammar: NonOctalDecimalEscapeSequence + if is_template_literal { + return Err(Error::syntax( + "\\8 and \\9 are not allowed in template literal", + start_pos, + )); + } else if is_strict_mode { + return Err(Error::syntax( + "\\8 and \\9 are not allowed in strict mode", + start_pos, + )); + } else { + Some(escape_ch) + } + } + _ if (0x0030..=0x0037 /* '0'..='7' */).contains(&escape_ch) => { + if is_template_literal { + return Err(Error::syntax( + "octal escape sequences are not allowed in template literal", + start_pos, + )); + } else if is_strict_mode { + return Err(Error::syntax( + "octal escape sequences are not allowed in strict mode", + start_pos, + )); + } else { + Some(Self::take_legacy_octal_escape_sequence( + cursor, + escape_ch as u8, + )?) + } + } + _ if Self::is_line_terminator(escape_ch) => { + // Grammar: LineContinuation + // Grammar: \ LineTerminatorSequence + // LineContinuation is the empty String. + None + } + _ => { + Some(escape_ch) + } + }; + + Ok(escape_value) } #[inline] pub(super) fn take_unicode_escape_sequence( cursor: &mut Cursor, start_pos: Position, - code_units_buf: Option<&mut Vec>, ) -> Result where R: Read, @@ -227,15 +264,6 @@ impl StringLiteral { "Unicode codepoint must not be greater than 0x10FFFF in escape sequence", start_pos, )); - } else if let Some(code_units_buf) = code_units_buf { - if code_point <= 65535 { - code_units_buf.push(code_point as u16); - } else { - let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16; - let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16; - code_units_buf.push(cu1); - code_units_buf.push(cu2); - } } Ok(code_point) @@ -251,10 +279,6 @@ impl StringLiteral { .and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok()) .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?; - if let Some(code_units_buf) = code_units_buf { - code_units_buf.push(code_point); - } - Ok(code_point as u32) } } @@ -263,7 +287,6 @@ impl StringLiteral { fn take_hex_escape_sequence( cursor: &mut Cursor, start_pos: Position, - code_units_buf: Option<&mut Vec>, ) -> Result where R: Read, @@ -275,30 +298,17 @@ impl StringLiteral { .and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok()) .ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?; - if let Some(code_units_buf) = code_units_buf { - code_units_buf.push(code_point); - } - Ok(code_point as u32) } #[inline] fn take_legacy_octal_escape_sequence( cursor: &mut Cursor, - start_pos: Position, - code_units_buf: Option<&mut Vec>, - strict_mode: bool, init_byte: u8, ) -> Result where R: Read, { - if strict_mode { - return Err(Error::syntax( - "octal escape sequences are not allowed in strict mode", - start_pos, - )); - } // Grammar: OctalDigit let mut code_point = (init_byte - b'0') as u32; @@ -321,10 +331,6 @@ impl StringLiteral { } } - if let Some(code_units_buf) = code_units_buf { - code_units_buf.push(code_point as u16); - } - Ok(code_point) } } diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs index ecec7a7387f..9636d1c849b 100644 --- a/boa/src/syntax/lexer/template.rs +++ b/boa/src/syntax/lexer/template.rs @@ -3,13 +3,12 @@ use super::{Cursor, Error, Tokenizer}; use crate::{ profiler::BoaProfiler, - syntax::lexer::string::{StringLiteral, StringTerminator}, + syntax::lexer::string::{StringLiteral, UTF16CodeUnitsBuffer}, syntax::{ ast::{Position, Span}, lexer::{Token, TokenKind}, }, }; -use std::convert::TryFrom; use std::io::{self, ErrorKind, Read}; /// Template literal lexing. @@ -34,65 +33,92 @@ impl Tokenizer for TemplateLiteral { let mut buf = Vec::new(); loop { - let next_chr = char::try_from(cursor.next_char()?.ok_or_else(|| { + let ch = cursor.next_char()?.ok_or_else(|| { Error::from(io::Error::new( ErrorKind::UnexpectedEof, "unterminated template literal", )) - })?) - .unwrap(); - match next_chr { - '`' => { - let raw = String::from_utf16_lossy(buf.as_slice()); - let (cooked, _) = StringLiteral::take_string_characters( - &mut Cursor::with_position(raw.as_bytes(), start_pos), - start_pos, - StringTerminator::End, - true, - )?; + })?; + + match ch { + 0x0060 /* ` */ => { + let raw = buf.to_string_lossy(); + // TODO: Cook the raw string only when needed (lazy evaluation) + let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?; + return Ok(Token::new( TokenKind::template_no_substitution(raw, cooked), Span::new(start_pos, cursor.pos()), )); } - '$' if cursor.peek()? == Some(b'{') => { - let _ = cursor.next_byte()?; - let raw = String::from_utf16_lossy(buf.as_slice()); - let (cooked, _) = StringLiteral::take_string_characters( - &mut Cursor::with_position(raw.as_bytes(), start_pos), - start_pos, - StringTerminator::End, - true, - )?; + 0x0024 /* $ */ if cursor.next_is(b'{')? => { + let raw = buf.to_string_lossy(); + // TODO: Cook the raw string only when needed (lazy evaluation) + let cooked = Self::cook_template_string(&raw, start_pos, cursor.strict_mode())?; + return Ok(Token::new( TokenKind::template_middle(raw, cooked), Span::new(start_pos, cursor.pos()), )); } - '\\' => { - let escape = cursor.peek()?.ok_or_else(|| { + 0x005C /* \ */ => { + let escape_ch = cursor.peek()?.ok_or_else(|| { Error::from(io::Error::new( ErrorKind::UnexpectedEof, "unterminated escape sequence in literal", )) })?; - buf.push('\\' as u16); - match escape { + + buf.push(b'\\' as u16); + match escape_ch { b'`' | b'$' | b'\\' => buf.push(cursor.next_byte()?.unwrap() as u16), _ => continue, } } - next_ch => { - if next_ch.len_utf16() == 1 { - buf.push(next_ch as u16); - } else { - let mut code_point_bytes_buf = [0u16; 2]; - let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf); + ch => { + buf.push_code_point(ch); + } + } + } + } +} - buf.extend(code_point_bytes.iter()); +impl TemplateLiteral { + fn cook_template_string( + raw: &str, + start_pos: Position, + is_strict_mode: bool, + ) -> Result { + let mut cursor = Cursor::with_position(raw.as_bytes(), start_pos); + let mut buf: Vec = Vec::new(); + + loop { + let ch_start_pos = cursor.pos(); + let ch = cursor.next_char()?; + + match ch { + Some(0x005C /* \ */) => { + if let Some(escape_value) = + StringLiteral::take_escape_sequence_or_line_continuation( + &mut cursor, + ch_start_pos, + is_strict_mode, + true, + )? + { + buf.push_code_point(escape_value); } } + Some(ch) => { + // The caller guarantees that sequences '`' and '${' never appear + // LineTerminatorSequence is consumed by `cursor.next_char()` and returns , + // which matches the TV of + buf.push_code_point(ch); + } + None => break, } } + + Ok(buf.to_string_lossy()) } } diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs index 7ef4a34bc04..2cafc5c9000 100644 --- a/boa/src/syntax/lexer/tests.rs +++ b/boa/src/syntax/lexer/tests.rs @@ -6,7 +6,6 @@ use super::token::Numeric; use super::*; use super::{Error, Position}; use crate::syntax::ast::Keyword; -use crate::syntax::lexer::string::{StringLiteral, StringTerminator}; use std::str; fn span(start: (u32, u32), end: (u32, u32)) -> Span { @@ -815,9 +814,9 @@ fn illegal_code_point_following_numeric_literal() { #[test] fn string_unicode() { - let str = r#"'中文';"#; + let s = r#"'中文';"#; - let mut lexer = Lexer::new(str.as_bytes()); + let mut lexer = Lexer::new(s.as_bytes()); let expected = [ TokenKind::StringLiteral("中文".into()), @@ -859,74 +858,56 @@ fn string_unicode_escape_with_braces() { } #[test] -fn take_string_characters_unicode_escape_with_braces_2() { - let s = r#"\u{20ac}\u{a0}\u{a0}"#.to_string(); - - let mut cursor = Cursor::new(s.as_bytes()); - - if let Ok((s, _)) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) { - assert_eq!(s, "\u{20ac}\u{a0}\u{a0}") - } else { - panic!(); - } +fn string_unicode_escape_with_braces_2() { + let s = r#"'\u{20ac}\u{a0}\u{a0}'"#; + + let mut lexer = Lexer::new(s.as_bytes()); + + let expected = [TokenKind::StringLiteral("\u{20ac}\u{a0}\u{a0}".into())]; + + expect_tokens(&mut lexer, &expected); } #[test] -fn take_string_characters_with_single_escape() { - let s = r#"\Б"#.to_string(); - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); - assert_eq!(s, "Б"); +fn string_with_single_escape() { + let s = r#"'\Б'"#; + + let mut lexer = Lexer::new(s.as_bytes()); + + let expected = [TokenKind::StringLiteral("Б".into())]; + + expect_tokens(&mut lexer, &expected); } #[test] -fn take_string_characters_legacy_octal_escape() { +fn string_legacy_octal_escape() { let test_cases = [ - (r#"\3"#, "\u{3}"), - (r#"\03"#, "\u{3}"), - (r#"\003"#, "\u{3}"), - (r#"\0003"#, "\u{0}3"), - (r#"\43"#, "#"), - (r#"\043"#, "#"), - (r#"\101"#, "A"), + (r#"'\3'"#, "\u{3}"), + (r#"'\03'"#, "\u{3}"), + (r#"'\003'"#, "\u{3}"), + (r#"'\0003'"#, "\u{0}3"), + (r#"'\43'"#, "#"), + (r#"'\043'"#, "#"), + (r#"'\101'"#, "A"), ]; for (s, expected) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); + let mut lexer = Lexer::new(s.as_bytes()); + + let expected_tokens = [TokenKind::StringLiteral((*expected).into())]; - assert_eq!(s, *expected); + expect_tokens(&mut lexer, &expected_tokens); } for (s, _) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - - if let Error::Syntax(_, pos) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - true, - ) - .expect_err("Octal-escape in strict mode not rejected as expected") + let mut lexer = Lexer::new(s.as_bytes()); + lexer.set_strict_mode(true); + + if let Error::Syntax(_, pos) = lexer + .next() + .expect_err("Octal-escape in strict mode not rejected as expected") { - assert_eq!(pos, Position::new(1, 1)); + assert_eq!(pos, Position::new(1, 2)); } else { panic!("invalid error type"); } @@ -934,52 +915,39 @@ fn take_string_characters_legacy_octal_escape() { } #[test] -fn take_string_characters_zero_escape() { - let test_cases = [(r#"\0"#, "\u{0}"), (r#"\0A"#, "\u{0}A")]; +fn string_zero_escape() { + let test_cases = [(r#"'\0'"#, "\u{0}"), (r#"'\0A'"#, "\u{0}A")]; for (s, expected) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); + let mut lexer = Lexer::new(s.as_bytes()); + + let expected_tokens = [TokenKind::StringLiteral((*expected).into())]; - assert_eq!(s, *expected); + expect_tokens(&mut lexer, &expected_tokens); } } #[test] -fn take_string_characters_non_octal_decimal_escape() { - let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")]; +fn string_non_octal_decimal_escape() { + let test_cases = [(r#"'\8'"#, "8"), (r#"'\9'"#, "9")]; for (s, expected) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); + let mut lexer = Lexer::new(s.as_bytes()); - assert_eq!(s, *expected); + let expected_tokens = [TokenKind::StringLiteral((*expected).into())]; + + expect_tokens(&mut lexer, &expected_tokens); } for (s, _) in test_cases.iter() { - let mut cursor = Cursor::new(s.as_bytes()); - - if let Error::Syntax(_, pos) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - true, - ) - .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected") + let mut lexer = Lexer::new(s.as_bytes()); + lexer.set_strict_mode(true); + + if let Error::Syntax(_, pos) = lexer + .next() + .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected") { - assert_eq!(pos, Position::new(1, 1)); + assert_eq!(pos, Position::new(1, 2)); } else { panic!("invalid error type"); } @@ -987,18 +955,14 @@ fn take_string_characters_non_octal_decimal_escape() { } #[test] -fn take_string_characters_line_continuation() { - let s = "hello \\\nworld"; - let mut cursor = Cursor::new(s.as_bytes()); - let (s, _) = StringLiteral::take_string_characters( - &mut cursor, - Position::new(1, 1), - StringTerminator::End, - false, - ) - .unwrap(); +fn string_line_continuation() { + let s = "'hello \\\nworld'"; + + let mut lexer = Lexer::new(s.as_bytes()); + + let expected_tokens = [TokenKind::StringLiteral("hello world".into())]; - assert_eq!(s, "hello world"); + expect_tokens(&mut lexer, &expected_tokens); } mod carriage_return {