From 4968fa58611b90b60deef9fa8221440769e01ae5 Mon Sep 17 00:00:00 2001 From: Carson McManus Date: Mon, 16 Sep 2024 07:12:14 -0400 Subject: [PATCH] fix(parser/html): fix whitespace being lexed as html literal (#3908) --- crates/biome_html_parser/src/lexer/mod.rs | 64 ++++++++++++------- crates/biome_html_parser/src/lexer/tests.rs | 41 ++++++++---- crates/biome_html_parser/src/syntax/mod.rs | 8 +-- crates/biome_html_parser/src/token_source.rs | 9 +-- .../error/element/br-with-end.html.snap | 26 +------- .../error/element/missing-close-tag.html.snap | 14 ++-- .../attributes/multiline-attributes.html.snap | 14 ++-- .../html_specs/ok/element_list.html.snap | 56 +++++++--------- .../tests/html_specs/ok/hello-world.html.snap | 43 ++++--------- .../html_specs/ok/no-end-tags/meta.html.snap | 11 +--- .../html_specs/ok/no-end-tags/wbr.html.snap | 51 ++++++++------- .../html_specs/ok/special-chars.html.snap | 24 +++++-- 12 files changed, 181 insertions(+), 180 deletions(-) diff --git a/crates/biome_html_parser/src/lexer/mod.rs b/crates/biome_html_parser/src/lexer/mod.rs index efef979a527f..4c799f15aa6c 100644 --- a/crates/biome_html_parser/src/lexer/mod.rs +++ b/crates/biome_html_parser/src/lexer/mod.rs @@ -61,8 +61,8 @@ impl<'src> HtmlLexer<'src> { self.consume_byte(T![>]) } b'/' => self.consume_byte(T![/]), - b'!' => self.consume_byte(T![!]), b'=' => self.consume_byte(T![=]), + b'!' => self.consume_byte(T![!]), b'\'' | b'"' => self.consume_string_literal(current), // TODO: differentiate between attribute names and identifiers _ if is_identifier_byte(current) || is_attribute_name_byte(current) => { @@ -80,32 +80,14 @@ impl<'src> HtmlLexer<'src> { } } - fn consume_element_list_token(&mut self, current: u8) -> HtmlSyntaxKind { - debug_assert!(!self.is_eof()); + fn consume_token_outside_tag(&mut self, current: u8) -> HtmlSyntaxKind { match current { - b'<' => self.consume_byte(T![<]), - _ => { - while let Some(chr) = self.current_byte() { - match chr { - b'<' => break, - chr => { - if chr.is_ascii() { - self.advance(1); - } else { - self.advance_char_unchecked(); - } - } - } - } - - HTML_LITERAL - } + b'\n' | b'\r' | b'\t' | b' ' => self.consume_newline_or_whitespaces(), + b'<' => self.consume_l_angle(), + _ => self.consume_html_text(), } } - #[allow(unused)] - fn consume_element_token(&mut self, current: u8) {} - /// Bumps the current byte and creates a lexed token of the passed in kind. #[inline] fn consume_byte(&mut self, tok: HtmlSyntaxKind) -> HtmlSyntaxKind { @@ -337,6 +319,40 @@ impl<'src> HtmlLexer<'src> { Ok(()) } + + /// Consume HTML text literals outside of tags. + /// + /// This includes text and single spaces between words. If newline or a second + /// consecutive space is found, this will stop consuming and to allow the lexer to + /// switch to `consume_whitespace`. + /// + /// See: https://html.spec.whatwg.org/#space-separated-tokens + /// See: https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace + fn consume_html_text(&mut self) -> HtmlSyntaxKind { + let mut saw_space = false; + while let Some(current) = self.current_byte() { + match current { + b'<' => break, + b'\n' | b'\r' => { + self.after_newline = true; + break; + } + b' ' => { + if saw_space { + break; + } + self.advance(1); + saw_space = true; + } + _ => { + self.advance(1); + saw_space = false; + } + } + } + + HTML_LITERAL + } } impl<'src> Lexer<'src> for HtmlLexer<'src> { @@ -368,7 +384,7 @@ impl<'src> Lexer<'src> for HtmlLexer<'src> { match self.current_byte() { Some(current) => match context { HtmlLexContext::Regular => self.consume_token(current), - HtmlLexContext::ElementList => self.consume_element_list_token(current), + HtmlLexContext::OutsideTag => self.consume_token_outside_tag(current), }, None => EOF, } diff --git a/crates/biome_html_parser/src/lexer/tests.rs b/crates/biome_html_parser/src/lexer/tests.rs index 2a615888cf69..7940a7e575b5 100644 --- a/crates/biome_html_parser/src/lexer/tests.rs +++ b/crates/biome_html_parser/src/lexer/tests.rs @@ -48,7 +48,7 @@ fn losslessness(string: String) -> bool { // Assert the result of lexing a piece of source code, // and make sure the tokens yielded are fully lossless and the source can be reconstructed from only the tokens macro_rules! assert_lex { - ($src:expr, $($kind:ident:$len:expr $(,)?)*) => {{ + ($context:expr, $src:expr, $($kind:ident:$len:expr $(,)?)*) => {{ let mut lexer = HtmlLexer::from_str($src); let mut idx = 0; let mut tok_idx = TextSize::default(); @@ -56,7 +56,7 @@ macro_rules! assert_lex { let mut new_str = String::with_capacity($src.len()); let mut tokens = vec![]; - while lexer.next_token(HtmlLexContext::default()) != EOF { + while lexer.next_token($context) != EOF { tokens.push((lexer.current(), lexer.current_range())); } @@ -97,6 +97,9 @@ macro_rules! assert_lex { assert_eq!($src, new_str, "Failed to reconstruct input"); }}; + ($src:expr, $($kind:ident:$len:expr $(,)?)*) => { + assert_lex!(HtmlLexContext::default(), $src, $($kind:$len,)*); + }; } #[test] @@ -150,17 +153,11 @@ fn element() { } #[test] -fn element_with_text() { +fn html_text() { assert_lex! { - "
abcdefghijklmnopqrstuvwxyz!@_-:;
", - L_ANGLE: 1, - HTML_LITERAL: 3, - R_ANGLE: 1, + HtmlLexContext::OutsideTag, + "abcdefghijklmnopqrstuvwxyz!@_-:;", HTML_LITERAL: 32, - L_ANGLE: 1, - SLASH: 1, - HTML_LITERAL: 3, - R_ANGLE: 1, } } @@ -231,3 +228,25 @@ fn html_element() { R_ANGLE: 1, } } + +#[test] +fn html_text_spaces() { + assert_lex! { + HtmlLexContext::OutsideTag, + "Lorem ipsum dolor sit amet, consectetur.", + HTML_LITERAL: 40, + } +} + +#[test] +fn html_text_spaces_with_lines() { + assert_lex! { + HtmlLexContext::OutsideTag, + "Lorem ipsum dolor sit + amet, consectetur.", + HTML_LITERAL: 21, + NEWLINE: 1, + WHITESPACE: 8, + HTML_LITERAL: 18, + } +} diff --git a/crates/biome_html_parser/src/syntax/mod.rs b/crates/biome_html_parser/src/syntax/mod.rs index d5c5b209ed61..dc776b8c1a62 100644 --- a/crates/biome_html_parser/src/syntax/mod.rs +++ b/crates/biome_html_parser/src/syntax/mod.rs @@ -68,17 +68,17 @@ fn parse_element(p: &mut HtmlParser) -> ParsedSyntax { if p.at(T![/]) { p.bump(T![/]); - p.expect(T![>]); + p.expect_with_context(T![>], HtmlLexContext::OutsideTag); Present(m.complete(p, HTML_SELF_CLOSING_ELEMENT)) } else { if should_be_self_closing { if p.at(T![/]) { p.bump(T![/]); } - p.expect(T![>]); + p.expect_with_context(T![>], HtmlLexContext::OutsideTag); return Present(m.complete(p, HTML_SELF_CLOSING_ELEMENT)); } - p.expect_with_context(T![>], HtmlLexContext::ElementList); + p.expect_with_context(T![>], HtmlLexContext::OutsideTag); let opening = m.complete(p, HTML_OPENING_ELEMENT); loop { ElementList.parse_list(p); @@ -128,7 +128,7 @@ impl ParseNodeList for ElementList { T![<] => parse_element(p), HTML_LITERAL => { let m = p.start(); - p.bump(HTML_LITERAL); + p.bump_with_context(HTML_LITERAL, HtmlLexContext::OutsideTag); Present(m.complete(p, HTML_CONTENT)) } _ => Absent, diff --git a/crates/biome_html_parser/src/token_source.rs b/crates/biome_html_parser/src/token_source.rs index 4c407913cfb0..9c8d6b809ad9 100644 --- a/crates/biome_html_parser/src/token_source.rs +++ b/crates/biome_html_parser/src/token_source.rs @@ -16,12 +16,13 @@ pub(crate) struct HtmlTokenSource<'source> { #[derive(Copy, Clone, Debug, Default)] pub(crate) enum HtmlLexContext { - /// The default state + /// The default state. This state is used for a majority of the lexing, which is inside html tags. #[default] Regular, - #[allow(unused)] - /// When the lexer is inside a element list, newlines, spaces and quotes are part of the text - ElementList, + /// When the lexer is outside of a tag, special characters are lexed as text. + /// + /// The exeptions being `<` which indicates the start of a tag, and `>` which is invalid syntax if not preceeded with a `<`. + OutsideTag, } impl LexContext for HtmlLexContext { diff --git a/crates/biome_html_parser/tests/html_specs/error/element/br-with-end.html.snap b/crates/biome_html_parser/tests/html_specs/error/element/br-with-end.html.snap index 175b4811f4aa..4a730a38fd69 100644 --- a/crates/biome_html_parser/tests/html_specs/error/element/br-with-end.html.snap +++ b/crates/biome_html_parser/tests/html_specs/error/element/br-with-end.html.snap @@ -40,19 +40,7 @@ HtmlRoot { r_angle_token: R_ANGLE@12..13 ">" [] [], }, HtmlContent { - value_token: HTML_LITERAL@13..18 "This" [] [Whitespace(" ")], - }, - HtmlContent { - value_token: HTML_LITERAL@18..23 "text" [] [Whitespace(" ")], - }, - HtmlContent { - value_token: HTML_LITERAL@23..26 "is" [] [Whitespace(" ")], - }, - HtmlContent { - value_token: HTML_LITERAL@26..33 "inside" [] [Whitespace(" ")], - }, - HtmlContent { - value_token: HTML_LITERAL@33..36 "br." [] [], + value_token: HTML_LITERAL@13..36 "This text is inside br." [] [], }, ], HtmlBogusElement { @@ -107,16 +95,8 @@ HtmlRoot { 2: HTML_ATTRIBUTE_LIST@12..12 3: (empty) 4: R_ANGLE@12..13 ">" [] [] - 2: HTML_CONTENT@13..18 - 0: HTML_LITERAL@13..18 "This" [] [Whitespace(" ")] - 3: HTML_CONTENT@18..23 - 0: HTML_LITERAL@18..23 "text" [] [Whitespace(" ")] - 4: HTML_CONTENT@23..26 - 0: HTML_LITERAL@23..26 "is" [] [Whitespace(" ")] - 5: HTML_CONTENT@26..33 - 0: HTML_LITERAL@26..33 "inside" [] [Whitespace(" ")] - 6: HTML_CONTENT@33..36 - 0: HTML_LITERAL@33..36 "br." [] [] + 2: HTML_CONTENT@13..36 + 0: HTML_LITERAL@13..36 "This text is inside br." [] [] 2: HTML_BOGUS_ELEMENT@36..41 0: L_ANGLE@36..37 "<" [] [] 1: SLASH@37..38 "/" [] [] diff --git a/crates/biome_html_parser/tests/html_specs/error/element/missing-close-tag.html.snap b/crates/biome_html_parser/tests/html_specs/error/element/missing-close-tag.html.snap index faf12687a53c..492a8352dabf 100644 --- a/crates/biome_html_parser/tests/html_specs/error/element/missing-close-tag.html.snap +++ b/crates/biome_html_parser/tests/html_specs/error/element/missing-close-tag.html.snap @@ -27,12 +27,12 @@ HtmlRoot { }, children: HtmlElementList [ HtmlContent { - value_token: HTML_LITERAL@5..9 "foo\n" [] [], + value_token: HTML_LITERAL@5..8 "foo" [] [], }, ], closing_element: missing (required), }, - eof_token: EOF@9..9 "" [] [], + eof_token: EOF@8..9 "" [Newline("\n")] [], } ``` @@ -42,18 +42,18 @@ HtmlRoot { 0: HTML_ROOT@0..9 0: (empty) 1: (empty) - 2: HTML_ELEMENT@0..9 + 2: HTML_ELEMENT@0..8 0: HTML_OPENING_ELEMENT@0..5 0: L_ANGLE@0..1 "<" [] [] 1: HTML_NAME@1..4 0: HTML_LITERAL@1..4 "div" [] [] 2: HTML_ATTRIBUTE_LIST@4..4 3: R_ANGLE@4..5 ">" [] [] - 1: HTML_ELEMENT_LIST@5..9 - 0: HTML_CONTENT@5..9 - 0: HTML_LITERAL@5..9 "foo\n" [] [] + 1: HTML_ELEMENT_LIST@5..8 + 0: HTML_CONTENT@5..8 + 0: HTML_LITERAL@5..8 "foo" [] [] 2: (empty) - 3: EOF@9..9 "" [] [] + 3: EOF@8..9 "" [Newline("\n")] [] ``` diff --git a/crates/biome_html_parser/tests/html_specs/ok/attributes/multiline-attributes.html.snap b/crates/biome_html_parser/tests/html_specs/ok/attributes/multiline-attributes.html.snap index 35f794115850..c5ab105a4f9c 100644 --- a/crates/biome_html_parser/tests/html_specs/ok/attributes/multiline-attributes.html.snap +++ b/crates/biome_html_parser/tests/html_specs/ok/attributes/multiline-attributes.html.snap @@ -55,11 +55,11 @@ HtmlRoot { }, children: HtmlElementList [ HtmlContent { - value_token: HTML_LITERAL@34..40 "\n\tfoo\n" [] [], + value_token: HTML_LITERAL@34..39 "foo" [Newline("\n"), Whitespace("\t")] [], }, ], closing_element: HtmlClosingElement { - l_angle_token: L_ANGLE@40..41 "<" [] [], + l_angle_token: L_ANGLE@39..41 "<" [Newline("\n")] [], slash_token: SLASH@41..42 "/" [] [], name: HtmlName { value_token: HTML_LITERAL@42..45 "div" [] [], @@ -98,11 +98,11 @@ HtmlRoot { 1: HTML_STRING@24..32 0: HTML_STRING_LITERAL@24..32 "\"button\"" [] [] 3: R_ANGLE@32..34 ">" [Newline("\n")] [] - 1: HTML_ELEMENT_LIST@34..40 - 0: HTML_CONTENT@34..40 - 0: HTML_LITERAL@34..40 "\n\tfoo\n" [] [] - 2: HTML_CLOSING_ELEMENT@40..46 - 0: L_ANGLE@40..41 "<" [] [] + 1: HTML_ELEMENT_LIST@34..39 + 0: HTML_CONTENT@34..39 + 0: HTML_LITERAL@34..39 "foo" [Newline("\n"), Whitespace("\t")] [] + 2: HTML_CLOSING_ELEMENT@39..46 + 0: L_ANGLE@39..41 "<" [Newline("\n")] [] 1: SLASH@41..42 "/" [] [] 2: HTML_NAME@42..45 0: HTML_LITERAL@42..45 "div" [] [] diff --git a/crates/biome_html_parser/tests/html_specs/ok/element_list.html.snap b/crates/biome_html_parser/tests/html_specs/ok/element_list.html.snap index 076e874c9b38..1d2404a7e00e 100644 --- a/crates/biome_html_parser/tests/html_specs/ok/element_list.html.snap +++ b/crates/biome_html_parser/tests/html_specs/ok/element_list.html.snap @@ -48,12 +48,9 @@ HtmlRoot { r_angle_token: R_ANGLE@4..5 ">" [] [], }, children: HtmlElementList [ - HtmlContent { - value_token: HTML_LITERAL@5..7 "\n\t" [] [], - }, HtmlElement { opening_element: HtmlOpeningElement { - l_angle_token: L_ANGLE@7..8 "<" [] [], + l_angle_token: L_ANGLE@5..8 "<" [Newline("\n"), Whitespace("\t")] [], name: HtmlName { value_token: HTML_LITERAL@8..11 "div" [] [], }, @@ -62,11 +59,11 @@ HtmlRoot { }, children: HtmlElementList [ HtmlContent { - value_token: HTML_LITERAL@12..27 "\n\t\tsome text\n\t\t" [] [], + value_token: HTML_LITERAL@12..24 "some text" [Newline("\n"), Whitespace("\t\t")] [], }, HtmlElement { opening_element: HtmlOpeningElement { - l_angle_token: L_ANGLE@27..28 "<" [] [], + l_angle_token: L_ANGLE@24..28 "<" [Newline("\n"), Whitespace("\t\t")] [], name: HtmlName { value_token: HTML_LITERAL@28..31 "div" [] [], }, @@ -157,10 +154,10 @@ HtmlRoot { }, children: HtmlElementList [ HtmlContent { - value_token: HTML_LITERAL@100..117 "\n\t\t\tsome text\n\t\t\t" [] [], + value_token: HTML_LITERAL@100..113 "some text" [Newline("\n"), Whitespace("\t\t\t")] [], }, HtmlSelfClosingElement { - l_angle_token: L_ANGLE@117..118 "<" [] [], + l_angle_token: L_ANGLE@113..118 "<" [Newline("\n"), Whitespace("\t\t\t")] [], name: HtmlName { value_token: HTML_LITERAL@118..122 "img" [] [Whitespace(" ")], }, @@ -253,11 +250,8 @@ HtmlRoot { r_angle_token: R_ANGLE@253..254 ">" [] [], }, children: HtmlElementList [ - HtmlContent { - value_token: HTML_LITERAL@254..259 "\n\t\t\t\t" [] [], - }, HtmlSelfClosingElement { - l_angle_token: L_ANGLE@259..260 "<" [] [], + l_angle_token: L_ANGLE@254..260 "<" [Newline("\n"), Whitespace("\t\t\t\t")] [], name: HtmlName { value_token: HTML_LITERAL@260..264 "img" [] [Whitespace(" ")], }, @@ -398,21 +392,19 @@ HtmlRoot { 2: HTML_ATTRIBUTE_LIST@4..4 3: R_ANGLE@4..5 ">" [] [] 1: HTML_ELEMENT_LIST@5..417 - 0: HTML_CONTENT@5..7 - 0: HTML_LITERAL@5..7 "\n\t" [] [] - 1: HTML_ELEMENT@7..417 - 0: HTML_OPENING_ELEMENT@7..12 - 0: L_ANGLE@7..8 "<" [] [] + 0: HTML_ELEMENT@5..417 + 0: HTML_OPENING_ELEMENT@5..12 + 0: L_ANGLE@5..8 "<" [Newline("\n"), Whitespace("\t")] [] 1: HTML_NAME@8..11 0: HTML_LITERAL@8..11 "div" [] [] 2: HTML_ATTRIBUTE_LIST@11..11 3: R_ANGLE@11..12 ">" [] [] 1: HTML_ELEMENT_LIST@12..409 - 0: HTML_CONTENT@12..27 - 0: HTML_LITERAL@12..27 "\n\t\tsome text\n\t\t" [] [] - 1: HTML_ELEMENT@27..38 - 0: HTML_OPENING_ELEMENT@27..32 - 0: L_ANGLE@27..28 "<" [] [] + 0: HTML_CONTENT@12..24 + 0: HTML_LITERAL@12..24 "some text" [Newline("\n"), Whitespace("\t\t")] [] + 1: HTML_ELEMENT@24..38 + 0: HTML_OPENING_ELEMENT@24..32 + 0: L_ANGLE@24..28 "<" [Newline("\n"), Whitespace("\t\t")] [] 1: HTML_NAME@28..31 0: HTML_LITERAL@28..31 "div" [] [] 2: HTML_ATTRIBUTE_LIST@31..31 @@ -478,10 +470,10 @@ HtmlRoot { 2: HTML_ATTRIBUTE_LIST@99..99 3: R_ANGLE@99..100 ">" [] [] 1: HTML_ELEMENT_LIST@100..400 - 0: HTML_CONTENT@100..117 - 0: HTML_LITERAL@100..117 "\n\t\t\tsome text\n\t\t\t" [] [] - 1: HTML_SELF_CLOSING_ELEMENT@117..146 - 0: L_ANGLE@117..118 "<" [] [] + 0: HTML_CONTENT@100..113 + 0: HTML_LITERAL@100..113 "some text" [Newline("\n"), Whitespace("\t\t\t")] [] + 1: HTML_SELF_CLOSING_ELEMENT@113..146 + 0: L_ANGLE@113..118 "<" [Newline("\n"), Whitespace("\t\t\t")] [] 1: HTML_NAME@118..122 0: HTML_LITERAL@118..122 "img" [] [Whitespace(" ")] 2: HTML_ATTRIBUTE_LIST@122..144 @@ -544,10 +536,8 @@ HtmlRoot { 2: HTML_ATTRIBUTE_LIST@253..253 3: R_ANGLE@253..254 ">" [] [] 1: HTML_ELEMENT_LIST@254..390 - 0: HTML_CONTENT@254..259 - 0: HTML_LITERAL@254..259 "\n\t\t\t\t" [] [] - 1: HTML_SELF_CLOSING_ELEMENT@259..288 - 0: L_ANGLE@259..260 "<" [] [] + 0: HTML_SELF_CLOSING_ELEMENT@254..288 + 0: L_ANGLE@254..260 "<" [Newline("\n"), Whitespace("\t\t\t\t")] [] 1: HTML_NAME@260..264 0: HTML_LITERAL@260..264 "img" [] [Whitespace(" ")] 2: HTML_ATTRIBUTE_LIST@264..286 @@ -560,7 +550,7 @@ HtmlRoot { 0: HTML_STRING_LITERAL@268..286 "\"attributes.html \"" [] [] 3: SLASH@286..287 "/" [] [] 4: R_ANGLE@287..288 ">" [] [] - 2: HTML_SELF_CLOSING_ELEMENT@288..322 + 1: HTML_SELF_CLOSING_ELEMENT@288..322 0: L_ANGLE@288..294 "<" [Newline("\n"), Whitespace("\t\t\t\t")] [] 1: HTML_NAME@294..298 0: HTML_LITERAL@294..298 "img" [] [Whitespace(" ")] @@ -574,7 +564,7 @@ HtmlRoot { 0: HTML_STRING_LITERAL@302..320 "\"attributes.html \"" [] [] 3: SLASH@320..321 "/" [] [] 4: R_ANGLE@321..322 ">" [] [] - 3: HTML_SELF_CLOSING_ELEMENT@322..356 + 2: HTML_SELF_CLOSING_ELEMENT@322..356 0: L_ANGLE@322..328 "<" [Newline("\n"), Whitespace("\t\t\t\t")] [] 1: HTML_NAME@328..332 0: HTML_LITERAL@328..332 "img" [] [Whitespace(" ")] @@ -588,7 +578,7 @@ HtmlRoot { 0: HTML_STRING_LITERAL@336..354 "\"attributes.html \"" [] [] 3: SLASH@354..355 "/" [] [] 4: R_ANGLE@355..356 ">" [] [] - 4: HTML_SELF_CLOSING_ELEMENT@356..390 + 3: HTML_SELF_CLOSING_ELEMENT@356..390 0: L_ANGLE@356..362 "<" [Newline("\n"), Whitespace("\t\t\t\t")] [] 1: HTML_NAME@362..366 0: HTML_LITERAL@362..366 "img" [] [Whitespace(" ")] diff --git a/crates/biome_html_parser/tests/html_specs/ok/hello-world.html.snap b/crates/biome_html_parser/tests/html_specs/ok/hello-world.html.snap index e138a8d95b45..0f0c778f898a 100644 --- a/crates/biome_html_parser/tests/html_specs/ok/hello-world.html.snap +++ b/crates/biome_html_parser/tests/html_specs/ok/hello-world.html.snap @@ -44,12 +44,9 @@ HtmlRoot { r_angle_token: R_ANGLE@21..22 ">" [] [], }, children: HtmlElementList [ - HtmlContent { - value_token: HTML_LITERAL@22..24 "\n\t" [] [], - }, HtmlElement { opening_element: HtmlOpeningElement { - l_angle_token: L_ANGLE@24..25 "<" [] [], + l_angle_token: L_ANGLE@22..25 "<" [Newline("\n"), Whitespace("\t")] [], name: HtmlName { value_token: HTML_LITERAL@25..29 "head" [] [], }, @@ -57,12 +54,9 @@ HtmlRoot { r_angle_token: R_ANGLE@29..30 ">" [] [], }, children: HtmlElementList [ - HtmlContent { - value_token: HTML_LITERAL@30..33 "\n\t\t" [] [], - }, HtmlElement { opening_element: HtmlOpeningElement { - l_angle_token: L_ANGLE@33..34 "<" [] [], + l_angle_token: L_ANGLE@30..34 "<" [Newline("\n"), Whitespace("\t\t")] [], name: HtmlName { value_token: HTML_LITERAL@34..39 "title" [] [], }, @@ -103,12 +97,9 @@ HtmlRoot { r_angle_token: R_ANGLE@77..78 ">" [] [], }, children: HtmlElementList [ - HtmlContent { - value_token: HTML_LITERAL@78..81 "\n\t\t" [] [], - }, HtmlElement { opening_element: HtmlOpeningElement { - l_angle_token: L_ANGLE@81..82 "<" [] [], + l_angle_token: L_ANGLE@78..82 "<" [Newline("\n"), Whitespace("\t\t")] [], name: HtmlName { value_token: HTML_LITERAL@82..84 "h1" [] [], }, @@ -198,21 +189,17 @@ HtmlRoot { 2: HTML_ATTRIBUTE_LIST@21..21 3: R_ANGLE@21..22 ">" [] [] 1: HTML_ELEMENT_LIST@22..147 - 0: HTML_CONTENT@22..24 - 0: HTML_LITERAL@22..24 "\n\t" [] [] - 1: HTML_ELEMENT@24..70 - 0: HTML_OPENING_ELEMENT@24..30 - 0: L_ANGLE@24..25 "<" [] [] + 0: HTML_ELEMENT@22..70 + 0: HTML_OPENING_ELEMENT@22..30 + 0: L_ANGLE@22..25 "<" [Newline("\n"), Whitespace("\t")] [] 1: HTML_NAME@25..29 0: HTML_LITERAL@25..29 "head" [] [] 2: HTML_ATTRIBUTE_LIST@29..29 3: R_ANGLE@29..30 ">" [] [] 1: HTML_ELEMENT_LIST@30..61 - 0: HTML_CONTENT@30..33 - 0: HTML_LITERAL@30..33 "\n\t\t" [] [] - 1: HTML_ELEMENT@33..61 - 0: HTML_OPENING_ELEMENT@33..40 - 0: L_ANGLE@33..34 "<" [] [] + 0: HTML_ELEMENT@30..61 + 0: HTML_OPENING_ELEMENT@30..40 + 0: L_ANGLE@30..34 "<" [Newline("\n"), Whitespace("\t\t")] [] 1: HTML_NAME@34..39 0: HTML_LITERAL@34..39 "title" [] [] 2: HTML_ATTRIBUTE_LIST@39..39 @@ -232,7 +219,7 @@ HtmlRoot { 2: HTML_NAME@65..69 0: HTML_LITERAL@65..69 "head" [] [] 3: R_ANGLE@69..70 ">" [] [] - 2: HTML_ELEMENT@70..147 + 1: HTML_ELEMENT@70..147 0: HTML_OPENING_ELEMENT@70..78 0: L_ANGLE@70..73 "<" [Newline("\n"), Whitespace("\t")] [] 1: HTML_NAME@73..77 @@ -240,11 +227,9 @@ HtmlRoot { 2: HTML_ATTRIBUTE_LIST@77..77 3: R_ANGLE@77..78 ">" [] [] 1: HTML_ELEMENT_LIST@78..138 - 0: HTML_CONTENT@78..81 - 0: HTML_LITERAL@78..81 "\n\t\t" [] [] - 1: HTML_ELEMENT@81..103 - 0: HTML_OPENING_ELEMENT@81..85 - 0: L_ANGLE@81..82 "<" [] [] + 0: HTML_ELEMENT@78..103 + 0: HTML_OPENING_ELEMENT@78..85 + 0: L_ANGLE@78..82 "<" [Newline("\n"), Whitespace("\t\t")] [] 1: HTML_NAME@82..84 0: HTML_LITERAL@82..84 "h1" [] [] 2: HTML_ATTRIBUTE_LIST@84..84 @@ -258,7 +243,7 @@ HtmlRoot { 2: HTML_NAME@100..102 0: HTML_LITERAL@100..102 "h1" [] [] 3: R_ANGLE@102..103 ">" [] [] - 2: HTML_ELEMENT@103..138 + 1: HTML_ELEMENT@103..138 0: HTML_OPENING_ELEMENT@103..109 0: L_ANGLE@103..107 "<" [Newline("\n"), Whitespace("\t\t")] [] 1: HTML_NAME@107..108 diff --git a/crates/biome_html_parser/tests/html_specs/ok/no-end-tags/meta.html.snap b/crates/biome_html_parser/tests/html_specs/ok/no-end-tags/meta.html.snap index 653aa286de9a..362d507f948c 100644 --- a/crates/biome_html_parser/tests/html_specs/ok/no-end-tags/meta.html.snap +++ b/crates/biome_html_parser/tests/html_specs/ok/no-end-tags/meta.html.snap @@ -28,11 +28,8 @@ HtmlRoot { r_angle_token: R_ANGLE@5..6 ">" [] [], }, children: HtmlElementList [ - HtmlContent { - value_token: HTML_LITERAL@6..8 "\n\t" [] [], - }, HtmlSelfClosingElement { - l_angle_token: L_ANGLE@8..9 "<" [] [], + l_angle_token: L_ANGLE@6..9 "<" [Newline("\n"), Whitespace("\t")] [], name: HtmlName { value_token: HTML_LITERAL@9..14 "meta" [] [Whitespace(" ")], }, @@ -80,10 +77,8 @@ HtmlRoot { 2: HTML_ATTRIBUTE_LIST@5..5 3: R_ANGLE@5..6 ">" [] [] 1: HTML_ELEMENT_LIST@6..30 - 0: HTML_CONTENT@6..8 - 0: HTML_LITERAL@6..8 "\n\t" [] [] - 1: HTML_SELF_CLOSING_ELEMENT@8..30 - 0: L_ANGLE@8..9 "<" [] [] + 0: HTML_SELF_CLOSING_ELEMENT@6..30 + 0: L_ANGLE@6..9 "<" [Newline("\n"), Whitespace("\t")] [] 1: HTML_NAME@9..14 0: HTML_LITERAL@9..14 "meta" [] [Whitespace(" ")] 2: HTML_ATTRIBUTE_LIST@14..29 diff --git a/crates/biome_html_parser/tests/html_specs/ok/no-end-tags/wbr.html.snap b/crates/biome_html_parser/tests/html_specs/ok/no-end-tags/wbr.html.snap index 006b2dbacef4..d42f98b3b47a 100644 --- a/crates/biome_html_parser/tests/html_specs/ok/no-end-tags/wbr.html.snap +++ b/crates/biome_html_parser/tests/html_specs/ok/no-end-tags/wbr.html.snap @@ -28,7 +28,10 @@ HtmlRoot { }, children: HtmlElementList [ HtmlContent { - value_token: HTML_LITERAL@3..55 "So then she pointed at the tiger and screamed\n\tthere" [] [], + value_token: HTML_LITERAL@3..48 "So then she pointed at the tiger and screamed" [] [], + }, + HtmlContent { + value_token: HTML_LITERAL@48..55 "there" [Newline("\n"), Whitespace("\t")] [], }, HtmlSelfClosingElement { l_angle_token: L_ANGLE@55..56 "<" [] [], @@ -178,97 +181,99 @@ HtmlRoot { 2: HTML_ATTRIBUTE_LIST@2..2 3: R_ANGLE@2..3 ">" [] [] 1: HTML_ELEMENT_LIST@3..137 - 0: HTML_CONTENT@3..55 - 0: HTML_LITERAL@3..55 "So then she pointed at the tiger and screamed\n\tthere" [] [] - 1: HTML_SELF_CLOSING_ELEMENT@55..60 + 0: HTML_CONTENT@3..48 + 0: HTML_LITERAL@3..48 "So then she pointed at the tiger and screamed" [] [] + 1: HTML_CONTENT@48..55 + 0: HTML_LITERAL@48..55 "there" [Newline("\n"), Whitespace("\t")] [] + 2: HTML_SELF_CLOSING_ELEMENT@55..60 0: L_ANGLE@55..56 "<" [] [] 1: HTML_NAME@56..59 0: HTML_LITERAL@56..59 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@59..59 3: (empty) 4: R_ANGLE@59..60 ">" [] [] - 2: HTML_CONTENT@60..62 + 3: HTML_CONTENT@60..62 0: HTML_LITERAL@60..62 "is" [] [] - 3: HTML_SELF_CLOSING_ELEMENT@62..67 + 4: HTML_SELF_CLOSING_ELEMENT@62..67 0: L_ANGLE@62..63 "<" [] [] 1: HTML_NAME@63..66 0: HTML_LITERAL@63..66 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@66..66 3: (empty) 4: R_ANGLE@66..67 ">" [] [] - 4: HTML_CONTENT@67..69 + 5: HTML_CONTENT@67..69 0: HTML_LITERAL@67..69 "no" [] [] - 5: HTML_SELF_CLOSING_ELEMENT@69..74 + 6: HTML_SELF_CLOSING_ELEMENT@69..74 0: L_ANGLE@69..70 "<" [] [] 1: HTML_NAME@70..73 0: HTML_LITERAL@70..73 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@73..73 3: (empty) 4: R_ANGLE@73..74 ">" [] [] - 6: HTML_CONTENT@74..77 + 7: HTML_CONTENT@74..77 0: HTML_LITERAL@74..77 "way" [] [] - 7: HTML_SELF_CLOSING_ELEMENT@77..82 + 8: HTML_SELF_CLOSING_ELEMENT@77..82 0: L_ANGLE@77..78 "<" [] [] 1: HTML_NAME@78..81 0: HTML_LITERAL@78..81 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@81..81 3: (empty) 4: R_ANGLE@81..82 ">" [] [] - 8: HTML_CONTENT@82..85 + 9: HTML_CONTENT@82..85 0: HTML_LITERAL@82..85 "you" [] [] - 9: HTML_SELF_CLOSING_ELEMENT@85..90 + 10: HTML_SELF_CLOSING_ELEMENT@85..90 0: L_ANGLE@85..86 "<" [] [] 1: HTML_NAME@86..89 0: HTML_LITERAL@86..89 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@89..89 3: (empty) 4: R_ANGLE@89..90 ">" [] [] - 10: HTML_CONTENT@90..93 + 11: HTML_CONTENT@90..93 0: HTML_LITERAL@90..93 "are" [] [] - 11: HTML_SELF_CLOSING_ELEMENT@93..98 + 12: HTML_SELF_CLOSING_ELEMENT@93..98 0: L_ANGLE@93..94 "<" [] [] 1: HTML_NAME@94..97 0: HTML_LITERAL@94..97 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@97..97 3: (empty) 4: R_ANGLE@97..98 ">" [] [] - 12: HTML_CONTENT@98..102 + 13: HTML_CONTENT@98..102 0: HTML_LITERAL@98..102 "ever" [] [] - 13: HTML_SELF_CLOSING_ELEMENT@102..107 + 14: HTML_SELF_CLOSING_ELEMENT@102..107 0: L_ANGLE@102..103 "<" [] [] 1: HTML_NAME@103..106 0: HTML_LITERAL@103..106 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@106..106 3: (empty) 4: R_ANGLE@106..107 ">" [] [] - 14: HTML_CONTENT@107..112 + 15: HTML_CONTENT@107..112 0: HTML_LITERAL@107..112 "going" [] [] - 15: HTML_SELF_CLOSING_ELEMENT@112..117 + 16: HTML_SELF_CLOSING_ELEMENT@112..117 0: L_ANGLE@112..113 "<" [] [] 1: HTML_NAME@113..116 0: HTML_LITERAL@113..116 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@116..116 3: (empty) 4: R_ANGLE@116..117 ">" [] [] - 16: HTML_CONTENT@117..119 + 17: HTML_CONTENT@117..119 0: HTML_LITERAL@117..119 "to" [] [] - 17: HTML_SELF_CLOSING_ELEMENT@119..124 + 18: HTML_SELF_CLOSING_ELEMENT@119..124 0: L_ANGLE@119..120 "<" [] [] 1: HTML_NAME@120..123 0: HTML_LITERAL@120..123 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@123..123 3: (empty) 4: R_ANGLE@123..124 ">" [] [] - 18: HTML_CONTENT@124..129 + 19: HTML_CONTENT@124..129 0: HTML_LITERAL@124..129 "catch" [] [] - 19: HTML_SELF_CLOSING_ELEMENT@129..134 + 20: HTML_SELF_CLOSING_ELEMENT@129..134 0: L_ANGLE@129..130 "<" [] [] 1: HTML_NAME@130..133 0: HTML_LITERAL@130..133 "wbr" [] [] 2: HTML_ATTRIBUTE_LIST@133..133 3: (empty) 4: R_ANGLE@133..134 ">" [] [] - 20: HTML_CONTENT@134..137 + 21: HTML_CONTENT@134..137 0: HTML_LITERAL@134..137 "me!" [] [] 2: HTML_CLOSING_ELEMENT@137..141 0: L_ANGLE@137..138 "<" [] [] diff --git a/crates/biome_html_parser/tests/html_specs/ok/special-chars.html.snap b/crates/biome_html_parser/tests/html_specs/ok/special-chars.html.snap index ccc7fca5f02b..65d8156dfa7d 100644 --- a/crates/biome_html_parser/tests/html_specs/ok/special-chars.html.snap +++ b/crates/biome_html_parser/tests/html_specs/ok/special-chars.html.snap @@ -31,11 +31,17 @@ HtmlRoot { }, children: HtmlElementList [ HtmlContent { - value_token: HTML_LITERAL@5..39 "\n\t4 / 2 == 2\n\t\"foo\"\n\thtml is cool\n" [] [], + value_token: HTML_LITERAL@5..17 "4 / 2 == 2" [Newline("\n"), Whitespace("\t")] [], + }, + HtmlContent { + value_token: HTML_LITERAL@17..24 "\"foo\"" [Newline("\n"), Whitespace("\t")] [], + }, + HtmlContent { + value_token: HTML_LITERAL@24..38 "html is cool" [Newline("\n"), Whitespace("\t")] [], }, ], closing_element: HtmlClosingElement { - l_angle_token: L_ANGLE@39..40 "<" [] [], + l_angle_token: L_ANGLE@38..40 "<" [Newline("\n")] [], slash_token: SLASH@40..41 "/" [] [], name: HtmlName { value_token: HTML_LITERAL@41..44 "div" [] [], @@ -60,11 +66,15 @@ HtmlRoot { 0: HTML_LITERAL@1..4 "div" [] [] 2: HTML_ATTRIBUTE_LIST@4..4 3: R_ANGLE@4..5 ">" [] [] - 1: HTML_ELEMENT_LIST@5..39 - 0: HTML_CONTENT@5..39 - 0: HTML_LITERAL@5..39 "\n\t4 / 2 == 2\n\t\"foo\"\n\thtml is cool\n" [] [] - 2: HTML_CLOSING_ELEMENT@39..45 - 0: L_ANGLE@39..40 "<" [] [] + 1: HTML_ELEMENT_LIST@5..38 + 0: HTML_CONTENT@5..17 + 0: HTML_LITERAL@5..17 "4 / 2 == 2" [Newline("\n"), Whitespace("\t")] [] + 1: HTML_CONTENT@17..24 + 0: HTML_LITERAL@17..24 "\"foo\"" [Newline("\n"), Whitespace("\t")] [] + 2: HTML_CONTENT@24..38 + 0: HTML_LITERAL@24..38 "html is cool" [Newline("\n"), Whitespace("\t")] [] + 2: HTML_CLOSING_ELEMENT@38..45 + 0: L_ANGLE@38..40 "<" [Newline("\n")] [] 1: SLASH@40..41 "/" [] [] 2: HTML_NAME@41..44 0: HTML_LITERAL@41..44 "div" [] []