From 5adee8acda208e68aba8f6c47d4bab7d928b9211 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 13 Dec 2023 19:32:53 +0100 Subject: [PATCH] HTML API: Add support for list elements. From https://github.com/html5lib/html5lib-tests/blob/a9f44960a9fedf265093d22b2aa3c7ca123727b9/tree-construction/webkit01.dat#L468-L482 Co-authored-by: Jon Surrell Add docblocks to tests, expand comments in class docblock. --- phpcs.xml.dist | 9 + .../html-api/class-wp-html-open-elements.php | 32 +- .../html-api/class-wp-html-processor.php | 115 ++++- .../tests/html-api/wpHtmlProcessor.php | 5 - .../html-api/wpHtmlProcessorBreadcrumbs.php | 40 +- .../html-api/wpHtmlProcessorSemanticRules.php | 103 +++++ ...HtmlProcessorSemanticRulesListElements.php | 431 ++++++++++++++++++ .../wpHtmlSupportRequiredHtmlProcessor.php | 6 - .../wpHtmlSupportRequiredOpenElements.php | 7 - 9 files changed, 704 insertions(+), 44 deletions(-) create mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRulesListElements.php diff --git a/phpcs.xml.dist b/phpcs.xml.dist index 3defbc290a6b2..ccb04303218ae 100644 --- a/phpcs.xml.dist +++ b/phpcs.xml.dist @@ -250,6 +250,15 @@ /wp-tests-config-sample\.php + + + /wp-includes/html-api/class-wp-html-processor\.php + + diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index 55c4d3a663c5b..1234abcb9dfe4 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -129,7 +129,7 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) { } if ( in_array( $node->node_name, $termination_list, true ) ) { - return true; + return false; } } @@ -166,18 +166,22 @@ public function has_element_in_scope( $tag_name ) { * Returns whether a particular element is in list item scope. * * @since 6.4.0 + * @since 6.5.0 Implemented: no longer throws on every invocation. * * @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope * - * @throws WP_HTML_Unsupported_Exception Always until this function is implemented. - * * @param string $tag_name Name of tag to check. * @return bool Whether given element is in scope. */ public function has_element_in_list_item_scope( $tag_name ) { - throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on list item scope.' ); - - return false; // The linter requires this unreachable code until the function is implemented and can return. + return $this->has_element_in_specific_scope( + $tag_name, + array( + // There are more elements that belong here which aren't currently supported. + 'OL', + 'UL', + ) + ); } /** @@ -375,10 +379,22 @@ public function walk_down() { * see WP_HTML_Open_Elements::walk_down(). * * @since 6.4.0 + * @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists. + * + * @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists. */ - public function walk_up() { + public function walk_up( $above_this_node = null ) { + $has_found_node = null === $above_this_node; + for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) { - yield $this->stack[ $i ]; + $node = $this->stack[ $i ]; + + if ( ! $has_found_node ) { + $has_found_node = $node === $above_this_node; + continue; + } + + yield $node; } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 41823af00ff93..cce26a60c5350 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -105,7 +105,7 @@ * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. * - Links: A. - * - Lists: DL. + * - Lists: DD, DL, DT, LI, OL, LI. * - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO. * - Paragraph: P. * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR. @@ -648,10 +648,12 @@ private function step_in_body() { case '+MAIN': case '+MENU': case '+NAV': + case '+OL': case '+P': case '+SEARCH': case '+SECTION': case '+SUMMARY': + case '+UL': if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); } @@ -685,9 +687,11 @@ private function step_in_body() { case '-MAIN': case '-MENU': case '-NAV': + case '-OL': case '-SEARCH': case '-SECTION': case '-SUMMARY': + case '-UL': if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) { // @todo Report parse error. // Ignore the token. @@ -755,6 +759,109 @@ private function step_in_body() { $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); return true; + /* + * > A start tag whose tag name is "li" + * > A start tag whose tag name is one of: "dd", "dt" + */ + case '+DD': + case '+DT': + case '+LI': + $this->state->frameset_ok = false; + $node = $this->state->stack_of_open_elements->current_node(); + $is_li = 'LI' === $tag_name; + + in_body_list_loop: + /* + * The logic for LI and DT/DD is the same except for one point: LI elements _only_ + * close other LI elements, but a DT or DD element closes _any_ open DT or DD element. + */ + if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) { + $node_name = $is_li ? 'LI' : $node->node_name; + $this->generate_implied_end_tags( $node_name ); + if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) { + // @todo Indicate a parse error once it's possible. This error does not impact the logic here. + } + + $this->state->stack_of_open_elements->pop_until( $node_name ); + goto in_body_list_done; + } + + if ( + 'ADDRESS' !== $node->node_name && + 'DIV' !== $node->node_name && + 'P' !== $node->node_name && + $this->is_special( $node->node_name ) + ) { + /* + * > If node is in the special category, but is not an address, div, + * > or p element, then jump to the step labeled done below. + */ + goto in_body_list_done; + } else { + /* + * > Otherwise, set node to the previous entry in the stack of open elements + * > and return to the step labeled loop. + */ + foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { + $node = $item; + break; + } + goto in_body_list_loop; + } + + in_body_list_done: + if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { + $this->close_a_p_element(); + } + + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > An end tag whose tag name is "li" + * > An end tag whose tag name is one of: "dd", "dt" + */ + case '-DD': + case '-DT': + case '-LI': + if ( + /* + * An end tag whose tag name is "li": + * If the stack of open elements does not have an li element in list item scope, + * then this is a parse error; ignore the token. + */ + ( + 'LI' === $tag_name && + ! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' ) + ) || + /* + * An end tag whose tag name is one of: "dd", "dt": + * If the stack of open elements does not have an element in scope that is an + * HTML element with the same tag name as that of the token, then this is a + * parse error; ignore the token. + */ + ( + 'LI' !== $tag_name && + ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) + ) + ) { + /* + * This is a parse error, ignore the token. + * + * @todo Indicate a parse error once it's possible. + */ + return $this->step(); + } + + $this->generate_implied_end_tags( $tag_name ); + + if ( $tag_name !== $this->state->stack_of_open_elements->current_node()->node_name ) { + // @todo Indicate a parse error once it's possible. This error does not impact the logic here. + } + + $this->state->stack_of_open_elements->pop_until( $tag_name ); + return true; + /* * > An end tag whose tag name is "p" */ @@ -1223,6 +1330,9 @@ private function close_a_p_element() { */ private function generate_implied_end_tags( $except_for_this_element = null ) { $elements_with_implied_end_tags = array( + 'DD', + 'DT', + 'LI', 'P', ); @@ -1248,6 +1358,9 @@ private function generate_implied_end_tags( $except_for_this_element = null ) { */ private function generate_implied_end_tags_thoroughly() { $elements_with_implied_end_tags = array( + 'DD', + 'DT', + 'LI', 'P', ); diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 2e5565c9734fa..d9f1357b5c66f 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -168,8 +168,6 @@ public function data_unsupported_special_in_body_tags() { 'CAPTION' => array( 'CAPTION' ), 'COL' => array( 'COL' ), 'COLGROUP' => array( 'COLGROUP' ), - 'DD' => array( 'DD' ), - 'DT' => array( 'DT' ), 'EMBED' => array( 'EMBED' ), 'FORM' => array( 'FORM' ), 'FRAME' => array( 'FRAME' ), @@ -180,7 +178,6 @@ public function data_unsupported_special_in_body_tags() { 'IFRAME' => array( 'IFRAME' ), 'INPUT' => array( 'INPUT' ), 'KEYGEN' => array( 'KEYGEN' ), - 'LI' => array( 'LI' ), 'LINK' => array( 'LINK' ), 'LISTING' => array( 'LISTING' ), 'MARQUEE' => array( 'MARQUEE' ), @@ -191,7 +188,6 @@ public function data_unsupported_special_in_body_tags() { 'NOFRAMES' => array( 'NOFRAMES' ), 'NOSCRIPT' => array( 'NOSCRIPT' ), 'OBJECT' => array( 'OBJECT' ), - 'OL' => array( 'OL' ), 'OPTGROUP' => array( 'OPTGROUP' ), 'OPTION' => array( 'OPTION' ), 'PARAM' => array( 'PARAM' ), @@ -218,7 +214,6 @@ public function data_unsupported_special_in_body_tags() { 'TITLE' => array( 'TITLE' ), 'TR' => array( 'TR' ), 'TRACK' => array( 'TRACK' ), - 'UL' => array( 'UL' ), 'WBR' => array( 'WBR' ), 'XMP' => array( 'XMP' ), ); diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 3b339e4f82ee9..15d38d6f70c6c 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -38,7 +38,7 @@ public function data_single_tag_of_supported_elements() { $supported_elements = array( 'A', 'ABBR', - 'ACRONYM', // Neutralized + 'ACRONYM', // Neutralized. 'ADDRESS', 'ARTICLE', 'ASIDE', @@ -47,13 +47,14 @@ public function data_single_tag_of_supported_elements() { 'BDI', 'BDO', 'BIG', - 'BLINK', // Deprecated + 'BLINK', // Deprecated. 'BUTTON', 'CANVAS', - 'CENTER', // Neutralized + 'CENTER', // Neutralized. 'CITE', 'CODE', 'DATA', + 'DD', 'DATALIST', 'DFN', 'DEL', @@ -62,6 +63,7 @@ public function data_single_tag_of_supported_elements() { 'DIR', 'DIV', 'DL', + 'DT', 'EM', 'FIELDSET', 'FIGCAPTION', @@ -79,6 +81,7 @@ public function data_single_tag_of_supported_elements() { 'I', 'IMG', 'INS', + 'LI', 'ISINDEX', // Deprecated 'KBD', 'LABEL', @@ -91,6 +94,7 @@ public function data_single_tag_of_supported_elements() { 'MULTICOL', // Deprecated 'NAV', 'NEXTID', // Deprecated + 'OL', 'OUTPUT', 'P', 'PICTURE', @@ -112,6 +116,7 @@ public function data_single_tag_of_supported_elements() { 'TIME', 'TT', 'U', + 'UL', 'VAR', 'VIDEO', ); @@ -156,7 +161,7 @@ public function test_fails_when_encountering_unsupported_tag( $html ) { */ public function data_unsupported_elements() { $unsupported_elements = array( - 'APPLET', // Deprecated + 'APPLET', // Deprecated. 'AREA', 'BASE', 'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal. @@ -165,8 +170,6 @@ public function data_unsupported_elements() { 'CAPTION', 'COL', 'COLGROUP', - 'DD', - 'DT', 'EMBED', 'FORM', 'FRAME', @@ -176,27 +179,25 @@ public function data_unsupported_elements() { 'HTML', 'IFRAME', 'INPUT', - 'KEYGEN', // Deprecated; void - 'LI', + 'KEYGEN', // Deprecated; void. 'LINK', 'LISTING', // Deprecated, use PRE instead. - 'MARQUEE', // Deprecated + 'MARQUEE', // Deprecated. 'MATH', 'META', - 'NOBR', // Neutralized - 'NOEMBED', // Neutralized - 'NOFRAMES', // Neutralized + 'NOBR', // Neutralized. + 'NOEMBED', // Neutralized. + 'NOFRAMES', // Neutralized. 'NOSCRIPT', 'OBJECT', - 'OL', 'OPTGROUP', 'OPTION', - 'PLAINTEXT', // Neutralized + 'PLAINTEXT', // Neutralized. 'PRE', - 'RB', // Neutralized + 'RB', // Neutralized. 'RP', 'RT', - 'RTC', // Neutralized + 'RTC', // Neutralized. 'SCRIPT', 'SELECT', 'SOURCE', @@ -213,7 +214,6 @@ public function data_unsupported_elements() { 'TITLE', 'TR', 'TRACK', - 'UL', 'WBR', 'XMP', // Deprecated, use PRE instead. ); @@ -348,6 +348,12 @@ public function data_html_target_with_breadcrumbs() { ), 'MAIN inside MAIN inside SPAN' => array( '
', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ), 'MAIN next to unclosed P' => array( '

', array( 'HTML', 'BODY', 'MAIN' ), 1 ), + 'LI after unclosed LI' => array( '
  • one
  • two
  • three', array( 'HTML', 'BODY', 'LI' ), 3 ), + 'LI in UL in LI' => array( '
    • one
      • two', array( 'HTML', 'BODY', 'UL', 'LI', 'UL', 'LI' ), 1 ), + 'DD and DT mutually close, LI self-closes (dt 2)' => array( '
      • ', array( 'HTML', 'BODY', 'DT' ), 2 ), + 'DD and DT mutually close, LI self-closes (dd 3)' => array( '
      • ', array( 'HTML', 'BODY', 'DD' ), 3 ), + 'DD and DT mutually close, LI self-closes (li 1)' => array( '
      • ', array( 'HTML', 'BODY', 'DD', 'LI' ), 1 ), + 'DD and DT mutually close, LI self-closes (li 2)' => array( '
      • ', array( 'HTML', 'BODY', 'DD', 'LI' ), 2 ), // H1 - H6 close out _any_ H1 - H6 when encountering _any_ of H1 - H6, making this section surprising. 'EM inside H3 after unclosed P' => array( '

        Important Message

        ', array( 'HTML', 'BODY', 'H3', 'EM' ), 1 ), diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index bd3996d51d7b7..c1adf9a71a3f8 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -224,6 +224,109 @@ public function test_in_body_button_with_button_in_scope_as_ancestor() { $this->assertSame( array( 'HTML', 'BODY', 'BUTTON' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting for third button.' ); } + /** + * Verifies that H1 through H6 elements close an open P element. + * + * @ticket 60215 + * + * @dataProvider data_heading_elements + * + * @param string $tag_name Name of H1 - H6 element under test. + */ + public function test_in_body_heading_element_closes_open_p_tag( $tag_name ) { + $processor = WP_HTML_Processor::create_fragment( + "

        Open<{$tag_name}>Closed P

        " + ); + + $processor->next_tag( $tag_name ); + $this->assertSame( + array( 'HTML', 'BODY', $tag_name ), + $processor->get_breadcrumbs(), + "Expected {$tag_name} to be a direct child of the BODY, having closed the open P element." + ); + + $processor->next_tag( 'IMG' ); + $this->assertSame( + array( 'HTML', 'BODY', 'IMG' ), + $processor->get_breadcrumbs(), + 'Expected IMG to be a direct child of BODY, having closed the open P element.' + ); + } + + /** + * Data provider. + * + * @return array[]. + */ + public function data_heading_elements() { + return array( + 'H1' => array( 'H1' ), + 'H2' => array( 'H2' ), + 'H3' => array( 'H3' ), + 'H4' => array( 'H4' ), + 'H5' => array( 'H5' ), + 'H6' => array( 'H5' ), + ); + } + + /** + * Verifies that H1 through H6 elements close an open H1 through H6 element. + * + * @ticket 60215 + * + * @dataProvider data_heading_combinations + * + * @param string $first_heading H1 - H6 element appearing (unclosed) before the second. + * @param string $second_heading H1 - H6 element appearing after the first. + */ + public function test_in_body_heading_element_closes_other_heading_elements( $first_heading, $second_heading ) { + $processor = WP_HTML_Processor::create_fragment( + "
        <{$first_heading} first> then <{$second_heading} second> and end
        " + ); + + while ( $processor->next_tag() && null === $processor->get_attribute( 'second' ) ) { + continue; + } + + $this->assertTrue( + $processor->get_attribute( 'second' ), + "Failed to find expected {$second_heading} tag." + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', $second_heading ), + $processor->get_breadcrumbs(), + "Expected {$second_heading} to be a direct child of the DIV, having closed the open {$first_heading} element." + ); + + $processor->next_tag( 'IMG' ); + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', 'IMG' ), + $processor->get_breadcrumbs(), + "Expected IMG to be a direct child of DIV, having closed the open {$first_heading} element." + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public function data_heading_combinations() { + $headings = array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ); + + $combinations = array(); + + // Create all unique pairs of H1 - H6 elements. + foreach ( $headings as $first_tag ) { + foreach ( $headings as $second_tag ) { + $combinations[ "{$first_tag} then {$second_tag}" ] = array( $first_tag, $second_tag ); + } + } + + return $combinations; + } + /** * Verifies that when "in body" and encountering "any other end tag" * that the HTML processor ignores the end tag if there's a special diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRulesListElements.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRulesListElements.php new file mode 100644 index 0000000000000..0c7e3422f09fc --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRulesListElements.php @@ -0,0 +1,431 @@ +
      • ' ); + + while ( + null === $processor->get_attribute( 'target' ) && + $processor->next_tag() + ) { + continue; + } + + $this->assertTrue( + $processor->get_attribute( 'target' ), + 'Failed to find target node.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'LI' ), + $processor->get_breadcrumbs(), + "LI should have closed open LI, but didn't." + ); + } + + /** + * Ensures that an opening LI element implicitly closes other open elements with optional closing tags. + * + * @ticket 60215 + */ + public function test_in_body_li_generates_implied_end_tags_inside_open_li() { + $processor = WP_HTML_Processor::create_fragment( '
      • ' ); + + while ( + null === $processor->get_attribute( 'target' ) && + $processor->next_tag() + ) { + continue; + } + + $this->assertTrue( + $processor->get_attribute( 'target' ), + 'Failed to find target node.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'LI' ), + $processor->get_breadcrumbs(), + "LI should have closed open LI, but didn't." + ); + } + + /** + * Ensures that when closing tags with optional tag closers, an opening LI tag doesn't close beyond a special boundary. + * + * @ticket 60215 + */ + public function test_in_body_li_generates_implied_end_tags_inside_open_li_but_stopping_at_special_tags() { + $processor = WP_HTML_Processor::create_fragment( '
      • ' ); + + while ( + null === $processor->get_attribute( 'target' ) && + $processor->next_tag() + ) { + continue; + } + + $this->assertTrue( + $processor->get_attribute( 'target' ), + 'Failed to find target node.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'LI', 'BLOCKQUOTE', 'LI' ), + $processor->get_breadcrumbs(), + 'LI should have left the BLOCKQOUTE open, but closed it.' + ); + } + + /** + * Ensures that an opening LI closes an open P in button scope. + * + * @ticket 60215 + */ + public function test_in_body_li_in_li_closes_p_in_button_scope() { + $processor = WP_HTML_Processor::create_fragment( '