From 9ec4b31d1f49c821d23974589b2816b8939618a8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 2 Feb 2024 17:00:08 +0100 Subject: [PATCH 01/55] Work on table support --- ...ass-wp-html-active-formatting-elements.php | 4 + .../html-api/class-wp-html-processor.php | 245 +++++++++++++++++- 2 files changed, 237 insertions(+), 12 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 9f7fee9076243..0f71d9d70fc0e 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -184,4 +184,8 @@ public function walk_up() { yield $this->stack[ $i ]; } } + + public function set_marker() { + $this->push( new WP_HTML_Token( null, 'marker', false ) ); + } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 40538491152ad..8f4b9e58ce2db 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -763,6 +763,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT: return $this->step_in_select(); + case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: + return $this->step_in_table(); + default: $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); @@ -1282,6 +1285,17 @@ private function step_in_body() { $this->run_adoption_agency_algorithm(); return true; + /* + * > A start tag whose tag name is "table" + */ + case '+TABLE': + if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { + $this->close_a_p_element(); + } + $this->insert_html_element( $this->state->current_token ); + $this->state->frameset_ok = false; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + /* * > An end tag whose tag name is "br" * > Parse error. Drop the attributes from the token, and act as described in the next @@ -1384,6 +1398,22 @@ private function step_in_body() { $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); return true; + + /* + * > A start tag whose tag name is one of: "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr" + */ + case 'CAPTION': + case 'COL': + case 'COLGROUP': + case 'FRAME': + case 'HEAD': + case 'TBODY': + case 'TD': + case 'TFOOT': + case 'TH': + case 'THEAD': + case 'TR': + return $this->step(); } /* @@ -1408,13 +1438,8 @@ private function step_in_body() { case 'BASEFONT': case 'BGSOUND': case 'BODY': - case 'CAPTION': - case 'COL': - case 'COLGROUP': case 'FORM': - case 'FRAME': case 'FRAMESET': - case 'HEAD': case 'HTML': case 'IFRAME': case 'LINK': @@ -1435,16 +1460,9 @@ private function step_in_body() { case 'SCRIPT': case 'STYLE': case 'SVG': - case 'TABLE': - case 'TBODY': - case 'TD': case 'TEMPLATE': case 'TEXTAREA': - case 'TFOOT': - case 'TH': - case 'THEAD': case 'TITLE': - case 'TR': case 'XMP': $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( "Cannot process {$token_name} element." ); @@ -1694,6 +1712,186 @@ private function step_in_select() { return $this->step(); } + /** + * Parses next element in the 'in table' insertion mode. + * + * This internal function performs the 'in table' insertion mode + * logic for the generalized WP_HTML_Processor::step() function. + * + * @since 6.5.0 + * + * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable + * @see WP_HTML_Processor::step + * + * @return bool Whether an element was found. + */ + private function step_in_table() { + $tag_name = $this->get_tag(); + $op_sigil = $this->is_tag_closer() ? '-' : '+'; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A character token, if the current node is table, tbody, template, tfoot, thead, or tr element + */ + /* + * > A comment token + */ + /* + * > A DOCTYPE token + */ + /* + * > A start tag whose tag name is "caption" + */ + case "+CAPTION": + $this->clear_stack_to_table_context(); + $this->state->active_formatting_elements->set_marker(); + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; + return true; + + /* + * > A start tag whose tag name is "colgroup" + */ + case "+COLGROUP": + $this->clear_stack_to_table_context(); + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + return true; + + /* + * > A start tag whose tag name is "col" + */ + case "+COL": + $this->clear_stack_to_table_context(); + $this->insert_html_element( + new WP_HTML_Token( null, 'COLGROUP', false ) + ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > A start tag whose tag name is one of: "tbody", "tfoot", "thead" + */ + case "+TBODY": + case "+TFOOT": + case "+THEAD": + $this->clear_stack_to_table_context(); + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; + return true; + + /* + * > A start tag whose tag name is one of: "td", "th", "tr" + */ + case "+TD": + case "+TH": + case "+TR": + $this->clear_stack_to_table_context(); + $this->insert_html_element( + new WP_HTML_Token( null, 'TBODY', false ) + ); + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > A start tag whose tag name is "table" + */ + case "+TABLE": + // pase error + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { + return $this->step(); + } + $this->state->stack_of_open_elements->pop_until( 'TABLE' ); + $this->reset_insertion_mode(); + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > An end tag whose tag name is "table" + */ + case "-TABLE": + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { + // parse error + return $this->step(); + } + $this->state->stack_of_open_elements->pop_until( 'TABLE' ); + $this->reset_insertion_mode(); + return true; + + /* + * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" + */ + case "-BODY": + case "-CAPTION": + case "-COL": + case "-COLGROUP": + case "-HTML": + case "-TBODY": + case "-TD": + case "-TFOOT": + case "-TH": + case "-THEAD": + case "-TR": + // parse error + return $this->step(); + + /* + * > A start tag whose tag name is one of: "style", "script", "template" + * > An end tag whose tag name is "template" + */ + case "+STYLE": + case "+SCRIPT": + case "+TEMPLATE": + case "-TEMPLATE": + // > Process the token using the rules for the "in head" insertion mode. + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + + /* + * > A start tag whose tag name is "input" + * + * > If the token does not have an attribute with the name "type", or if it does, but + * > that attribute's value is not an ASCII case-insensitive match for the string + * > "hidden", then: act as described in the "anything else" entry below. + */ + case "+INPUT": + $type_attribute = $this->get_attribute( 'type' ); + if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { + goto in_table_anything_else; + } + // parse error + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A start tag whose tag name is "form" + */ + case "+FORM": + if ( + $this->state->stack_of_open_elements->has_element_in_scope( 'TEMPLATE' ) || + + ) { + } + + /* + * > An end-of-file token + */ + /* + * > Anything else + * > Parse error. Enable foster parenting, process the token using the rules for the + * > "in body" insertion mode, and then disable foster parenting. + */ + default: + in_table_anything_else: + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + } + + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + } + /* * Internal helpers */ @@ -1717,6 +1915,29 @@ private function bookmark_token() { return "{$this->bookmark_counter}"; } + /** + * Clear the stack back to a table context. + * + * > When the steps above require the UA to clear the stack back to a table context, it means + * > that the UA must, while the current node is not a table, template, or html element, pop + * > elements from the stack of open elements. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-context + */ + private function clear_stack_to_table_context() { + // @todo we could add saftey here checking insertion modes… + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + if ( + $item->node_name === 'TABLE' || + $item->node_name === 'TEMPLATE' || + $item->node_name === 'HTML' + ) { + break; + } + $this->state->stack_of_open_elements->remove_node( $item ); + } + } + /* * HTML semantic overrides for Tag Processor */ From ab408c170607ab0949be5246b189e647c5bd91bd Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Sun, 4 Feb 2024 18:49:12 +0100 Subject: [PATCH 02/55] table processing --- .../html-api/class-wp-html-processor.php | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8f4b9e58ce2db..267e7ed060bc9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1295,6 +1295,7 @@ private function step_in_body() { $this->insert_html_element( $this->state->current_token ); $this->state->frameset_ok = false; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + return true; /* * > An end tag whose tag name is "br" @@ -1793,6 +1794,7 @@ private function step_in_table() { $this->insert_html_element( new WP_HTML_Token( null, 'TBODY', false ) ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); /* @@ -1870,13 +1872,18 @@ private function step_in_table() { case "+FORM": if ( $this->state->stack_of_open_elements->has_element_in_scope( 'TEMPLATE' ) || - + $this->has_element_pointer( 'FORM' ) ) { + return $this->step(); } + $this->insert_html_element( $this->state->current_token ); + $this->set_element_pointer( 'FORM' ); + return true; /* * > An end-of-file token */ + /* * > Anything else * > Parse error. Enable foster parenting, process the token using the rules for the @@ -2481,6 +2488,14 @@ public function has_bookmark( $bookmark_name ) { return parent::has_bookmark( "_{$bookmark_name}" ); } + private function set_element_pointer( string $tag_name ) { + return parent::set_bookmark( "element_pointer_{$tag_name}" ); + } + + private function has_element_pointer( string $tag_name ) { + return parent::has_bookmark( "element_pointer_{$tag_name}" ); + } + /* * HTML Parsing Algorithms */ From 5ec8ffe68e506fbb75be2e4f2b7e976fcd1b972e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Sun, 4 Feb 2024 19:30:25 +0100 Subject: [PATCH 03/55] Disable FRAME / HEAD --- src/wp-includes/html-api/class-wp-html-processor.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 267e7ed060bc9..7076887d434f3 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1440,7 +1440,9 @@ private function step_in_body() { case 'BGSOUND': case 'BODY': case 'FORM': + case 'FRAME': case 'FRAMESET': + case 'HEAD': case 'HTML': case 'IFRAME': case 'LINK': From 2be4e6365709105387d7cf1be1dd275681a629b6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 5 Feb 2024 12:26:09 +0100 Subject: [PATCH 04/55] In table body rules --- .../html-api/class-wp-html-processor.php | 140 +++++++++++++++++- 1 file changed, 137 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 7076887d434f3..d3eacd810e657 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -766,7 +766,12 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: return $this->step_in_table(); + case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: + return $this->step_in_table_body(); + default: + echo "\n MODE: " . $this->state->insertion_mode . "\n"; + $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } @@ -1896,9 +1901,111 @@ private function step_in_table() { $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); } + } - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + /** + * Parses next element in the 'in table body' insertion mode. + * + * This internal function performs the 'in table body' insertion mode + * logic for the generalized WP_HTML_Processor::step() function. + * + * @since 6.5.0 + * + * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intbody + * @see WP_HTML_Processor::step + * + * @return bool Whether an element was found. + */ + private function step_in_table_body() { + $tag_name = $this->get_tag(); + $op_sigil = $this->is_tag_closer() ? '-' : '+'; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A start tag whose tag name is "tr" + */ + case '+TR': + $this->clear_stack_to_table_body_context(); + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return true; + + /* + * > A start tag whose tag name is one of: "th", "td" + */ + case '+TH': + case '+TD': + // parse error + $this->clear_stack_to_table_body_context(); + $this->insert_html_element( + new WP_HTML_Token( null, 'TR', false ) + ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > An end tag whose tag name is one of: "tbody", "tfoot", "thead" + */ + case '-TBODY': + case '-TFOOT': + case '-THEAD': + if ( + ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) + ) { + // parse error + return $this->step(); + } + $this->state->stack_of_open_elements->pop(); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + return true; + + /* + * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead" + * > An end tag whose tag name is "table" + */ + case '+CAPTION': + case '+COL': + case '+COLGROUP': + case '+TBODY': + case '+TFOOT': + case '+THEAD': + case '-TABLE': + if ( + ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TBODY' ) && + ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'THEAD' ) && + ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TFOOT' ) + ) { + // parse error + return $this->step(); + } + $this->clear_stack_to_table_body_context(); + $this->state->stack_of_open_elements->pop(); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th", "tr" + */ + case '-BODY': + case '-CAPTION': + case '-COL': + case '-COLGROUP': + case '-HTML': + case '-TD': + case '-TH': + case '-TR': + // parse error + return $this->step(); + } + + /* + * > Anything else + * > Process the token using the rules for the "in table" insertion mode. + */ + return $this->step_in_table(); } /* @@ -1931,10 +2038,11 @@ private function bookmark_token() { * > that the UA must, while the current node is not a table, template, or html element, pop * > elements from the stack of open elements. * + * @todo move this to open elements class. + * * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-context */ private function clear_stack_to_table_context() { - // @todo we could add saftey here checking insertion modes… foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { if ( $item->node_name === 'TABLE' || @@ -1947,6 +2055,32 @@ private function clear_stack_to_table_context() { } } + /** + * Clear the stack back to a table body context. + * + * > When the steps above require the UA to clear the stack back to a table body context, it + * > means that the UA must, while the current node is not a tbody, tfoot, thead, template, or + * > html element, pop elements from the stack of open elements. + * + * @todo move this to open elements class. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-body-context + */ + private function clear_stack_to_table_body_context() { + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + if ( + $item->node_name === 'TBODY' || + $item->node_name === 'TFOOT' || + $item->node_name === 'THEAD' || + $item->node_name === 'TEMPLATE' || + $item->node_name === 'HTML' + ) { + break; + } + $this->state->stack_of_open_elements->remove_node( $item ); + } + } + /* * HTML semantic overrides for Tag Processor */ From 0c0484c03d8a5df09806063a699650f3ecd3dc77 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 6 Feb 2024 07:50:39 +0100 Subject: [PATCH 05/55] prep step_in_row --- .../html-api/class-wp-html-processor.php | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index d3eacd810e657..44f8863551e14 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -769,6 +769,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: return $this->step_in_table_body(); + case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: + return $this->step_in_row(); + default: echo "\n MODE: " . $this->state->insertion_mode . "\n"; @@ -2008,6 +2011,78 @@ private function step_in_table_body() { return $this->step_in_table(); } + /** + * Parses next element in the 'in row' insertion mode. + * + * This internal function performs the 'in row' insertion mode + * logic for the generalized WP_HTML_Processor::step() function. + * + * @since 6.5.0 + * + * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intr + * @see WP_HTML_Processor::step + * + * @return bool Whether an element was found. + */ + private function step_in_row() { + $tag_name = $this->get_tag(); + $op_sigil = $this->is_tag_closer() ? '-' : '+'; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A start tag whose tag name is one of: "th", "td" + */ + case '+TH': + case '+TD': + + /* + * > An end tag whose tag name is "tr" + */ + case '-TR': + + /* + * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr" + */ + case '+CAPTION': + case '+COL': + case '+COLGROUP': + case '+TBODY': + case '+TFOOT': + case '+THEAD': + case '+TR': + + /* + * > An end tag whose tag name is "table" + */ + case '-TABLE': + + /* + * > An end tag whose tag name is one of: "tbody", "tfoot", "thead" + */ + case '-TBODY': + case '-TFOOT': + case '-THEAD': + + /* + * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th" + */ + case '-BODY': + case '-CAPTION': + case '-COL': + case '-COLGROUP': + case '-HTML': + case '-TD': + case '-TH': + } + + /* + * > Anything else + */ + } + /* * Internal helpers */ From b91df28f0f08ed84f822745a06f06a3505d18f8c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 3 Jul 2024 21:04:54 +0200 Subject: [PATCH 06/55] Remove unsupported table element tests --- .../tests/html-api/wpHtmlProcessor.php | 10 ----- .../html-api/wpHtmlProcessorBreadcrumbs.php | 10 ----- .../wpHtmlSupportRequiredHtmlProcessor.php | 7 ---- .../wpHtmlSupportRequiredOpenElements.php | 40 ------------------- 4 files changed, 67 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index b842703a7a135..3faf1b201cad2 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -388,9 +388,6 @@ public static function data_unsupported_special_in_body_tags() { 'BASEFONT' => array( 'BASEFONT' ), 'BGSOUND' => array( 'BGSOUND' ), 'BODY' => array( 'BODY' ), - 'CAPTION' => array( 'CAPTION' ), - 'COL' => array( 'COL' ), - 'COLGROUP' => array( 'COLGROUP' ), 'FORM' => array( 'FORM' ), 'FRAME' => array( 'FRAME' ), 'FRAMESET' => array( 'FRAMESET' ), @@ -415,16 +412,9 @@ public static function data_unsupported_special_in_body_tags() { 'SCRIPT' => array( 'SCRIPT' ), 'STYLE' => array( 'STYLE' ), 'SVG' => array( 'SVG' ), - 'TABLE' => array( 'TABLE' ), - 'TBODY' => array( 'TBODY' ), - 'TD' => array( 'TD' ), 'TEMPLATE' => array( 'TEMPLATE' ), 'TEXTAREA' => array( 'TEXTAREA' ), - 'TFOOT' => array( 'TFOOT' ), - 'TH' => array( 'TH' ), - 'THEAD' => array( 'THEAD' ), 'TITLE' => array( 'TITLE' ), - 'TR' => array( 'TR' ), 'XMP' => array( 'XMP' ), ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 403f40a1da032..cc094e30372bd 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -171,9 +171,6 @@ public static function data_unsupported_elements() { 'BASE', 'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal. 'BODY', - 'CAPTION', - 'COL', - 'COLGROUP', 'FORM', 'FRAME', 'FRAMESET', @@ -197,16 +194,9 @@ public static function data_unsupported_elements() { 'SCRIPT', 'STYLE', 'SVG', - 'TABLE', - 'TBODY', - 'TD', 'TEMPLATE', 'TEXTAREA', - 'TFOOT', - 'TH', - 'THEAD', 'TITLE', - 'TR', 'XMP', // Deprecated, use PRE instead. ); diff --git a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredHtmlProcessor.php index 07943cd62a2f4..fc2d3f9245408 100644 --- a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredHtmlProcessor.php @@ -75,17 +75,10 @@ public function test_generate_implied_end_tags_needs_support() { * @covers WP_HTML_Processor::generate_implied_end_tags_thoroughly */ public function test_generate_implied_end_tags_thoroughly_needs_support() { - $this->ensure_support_is_added_everywhere( 'CAPTION' ); - $this->ensure_support_is_added_everywhere( 'COLGROUP' ); $this->ensure_support_is_added_everywhere( 'RB' ); $this->ensure_support_is_added_everywhere( 'RP' ); $this->ensure_support_is_added_everywhere( 'RT' ); $this->ensure_support_is_added_everywhere( 'RTC' ); - $this->ensure_support_is_added_everywhere( 'TBODY' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TFOOT' ); - $this->ensure_support_is_added_everywhere( 'TH' ); $this->ensure_support_is_added_everywhere( 'HEAD' ); - $this->ensure_support_is_added_everywhere( 'TR' ); } } diff --git a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php index 48255190ad50c..7e6b75466dbe2 100644 --- a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php +++ b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php @@ -63,11 +63,7 @@ private function ensure_support_is_added_everywhere( $tag_name ) { public function test_has_element_in_scope_needs_support() { // These elements impact all scopes. $this->ensure_support_is_added_everywhere( 'APPLET' ); - $this->ensure_support_is_added_everywhere( 'CAPTION' ); $this->ensure_support_is_added_everywhere( 'HTML' ); - $this->ensure_support_is_added_everywhere( 'TABLE' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TH' ); $this->ensure_support_is_added_everywhere( 'MARQUEE' ); $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); @@ -101,11 +97,7 @@ public function test_has_element_in_scope_needs_support() { public function test_has_element_in_list_item_scope_needs_support() { // These elements impact all scopes. $this->ensure_support_is_added_everywhere( 'APPLET' ); - $this->ensure_support_is_added_everywhere( 'CAPTION' ); $this->ensure_support_is_added_everywhere( 'HTML' ); - $this->ensure_support_is_added_everywhere( 'TABLE' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TH' ); $this->ensure_support_is_added_everywhere( 'MARQUEE' ); $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); @@ -135,11 +127,7 @@ public function test_has_element_in_list_item_scope_needs_support() { public function test_has_element_in_button_scope_needs_support() { // These elements impact all scopes. $this->ensure_support_is_added_everywhere( 'APPLET' ); - $this->ensure_support_is_added_everywhere( 'CAPTION' ); $this->ensure_support_is_added_everywhere( 'HTML' ); - $this->ensure_support_is_added_everywhere( 'TABLE' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TH' ); $this->ensure_support_is_added_everywhere( 'MARQUEE' ); $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); @@ -170,11 +158,7 @@ public function test_has_element_in_button_scope_needs_support() { public function test_after_element_pop_must_maintain_p_in_button_scope_flag() { // These elements impact all scopes. $this->ensure_support_is_added_everywhere( 'APPLET' ); - $this->ensure_support_is_added_everywhere( 'CAPTION' ); $this->ensure_support_is_added_everywhere( 'HTML' ); - $this->ensure_support_is_added_everywhere( 'TABLE' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TH' ); $this->ensure_support_is_added_everywhere( 'MARQUEE' ); $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); @@ -205,11 +189,7 @@ public function test_after_element_pop_must_maintain_p_in_button_scope_flag() { public function test_after_element_push_must_maintain_p_in_button_scope_flag() { // These elements impact all scopes. $this->ensure_support_is_added_everywhere( 'APPLET' ); - $this->ensure_support_is_added_everywhere( 'CAPTION' ); $this->ensure_support_is_added_everywhere( 'HTML' ); - $this->ensure_support_is_added_everywhere( 'TABLE' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TH' ); $this->ensure_support_is_added_everywhere( 'MARQUEE' ); $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); @@ -239,11 +219,7 @@ public function test_after_element_push_must_maintain_p_in_button_scope_flag() { public function test_has_element_in_table_scope_needs_support() { // These elements impact all scopes. $this->ensure_support_is_added_everywhere( 'APPLET' ); - $this->ensure_support_is_added_everywhere( 'CAPTION' ); $this->ensure_support_is_added_everywhere( 'HTML' ); - $this->ensure_support_is_added_everywhere( 'TABLE' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TH' ); $this->ensure_support_is_added_everywhere( 'MARQUEE' ); $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); @@ -261,19 +237,7 @@ public function test_has_element_in_table_scope_needs_support() { // These elements are specific to TABLE scope. $this->ensure_support_is_added_everywhere( 'HTML' ); - $this->ensure_support_is_added_everywhere( 'TABLE' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - - // These elements depend on table scope. - $this->ensure_support_is_added_everywhere( 'CAPTION' ); - $this->ensure_support_is_added_everywhere( 'COL' ); - $this->ensure_support_is_added_everywhere( 'COLGROUP' ); - $this->ensure_support_is_added_everywhere( 'TBODY' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TFOOT' ); - $this->ensure_support_is_added_everywhere( 'TH' ); - $this->ensure_support_is_added_everywhere( 'THEAD' ); - $this->ensure_support_is_added_everywhere( 'TR' ); } /** @@ -289,11 +253,7 @@ public function test_has_element_in_table_scope_needs_support() { public function test_has_element_in_select_scope_needs_support() { // These elements impact all scopes. $this->ensure_support_is_added_everywhere( 'APPLET' ); - $this->ensure_support_is_added_everywhere( 'CAPTION' ); $this->ensure_support_is_added_everywhere( 'HTML' ); - $this->ensure_support_is_added_everywhere( 'TABLE' ); - $this->ensure_support_is_added_everywhere( 'TD' ); - $this->ensure_support_is_added_everywhere( 'TH' ); $this->ensure_support_is_added_everywhere( 'MARQUEE' ); $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); From 84b3c74f41493edcc90f0c629660e8c06029be5a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 10:02:37 +0200 Subject: [PATCH 07/55] phpcbf and implement in row --- .../html-api/class-wp-html-processor.php | 139 ++++++++++++------ 1 file changed, 98 insertions(+), 41 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 44f8863551e14..f4407cc1df504 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1301,7 +1301,7 @@ private function step_in_body() { $this->close_a_p_element(); } $this->insert_html_element( $this->state->current_token ); - $this->state->frameset_ok = false; + $this->state->frameset_ok = false; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return true; @@ -1756,7 +1756,7 @@ private function step_in_table() { /* * > A start tag whose tag name is "caption" */ - case "+CAPTION": + case '+CAPTION': $this->clear_stack_to_table_context(); $this->state->active_formatting_elements->set_marker(); $this->insert_html_element( $this->state->current_token ); @@ -1766,7 +1766,7 @@ private function step_in_table() { /* * > A start tag whose tag name is "colgroup" */ - case "+COLGROUP": + case '+COLGROUP': $this->clear_stack_to_table_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; @@ -1775,7 +1775,7 @@ private function step_in_table() { /* * > A start tag whose tag name is "col" */ - case "+COL": + case '+COL': $this->clear_stack_to_table_context(); $this->insert_html_element( new WP_HTML_Token( null, 'COLGROUP', false ) @@ -1786,9 +1786,9 @@ private function step_in_table() { /* * > A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ - case "+TBODY": - case "+TFOOT": - case "+THEAD": + case '+TBODY': + case '+TFOOT': + case '+THEAD': $this->clear_stack_to_table_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; @@ -1797,9 +1797,9 @@ private function step_in_table() { /* * > A start tag whose tag name is one of: "td", "th", "tr" */ - case "+TD": - case "+TH": - case "+TR": + case '+TD': + case '+TH': + case '+TR': $this->clear_stack_to_table_context(); $this->insert_html_element( new WP_HTML_Token( null, 'TBODY', false ) @@ -1810,7 +1810,7 @@ private function step_in_table() { /* * > A start tag whose tag name is "table" */ - case "+TABLE": + case '+TABLE': // pase error if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { return $this->step(); @@ -1822,7 +1822,7 @@ private function step_in_table() { /* * > An end tag whose tag name is "table" */ - case "-TABLE": + case '-TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { // parse error return $this->step(); @@ -1834,17 +1834,17 @@ private function step_in_table() { /* * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ - case "-BODY": - case "-CAPTION": - case "-COL": - case "-COLGROUP": - case "-HTML": - case "-TBODY": - case "-TD": - case "-TFOOT": - case "-TH": - case "-THEAD": - case "-TR": + case '-BODY': + case '-CAPTION': + case '-COL': + case '-COLGROUP': + case '-HTML': + case '-TBODY': + case '-TD': + case '-TFOOT': + case '-TH': + case '-THEAD': + case '-TR': // parse error return $this->step(); @@ -1852,10 +1852,10 @@ private function step_in_table() { * > A start tag whose tag name is one of: "style", "script", "template" * > An end tag whose tag name is "template" */ - case "+STYLE": - case "+SCRIPT": - case "+TEMPLATE": - case "-TEMPLATE": + case '+STYLE': + case '+SCRIPT': + case '+TEMPLATE': + case '-TEMPLATE': // > Process the token using the rules for the "in head" insertion mode. $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); @@ -1867,7 +1867,7 @@ private function step_in_table() { * > that attribute's value is not an ASCII case-insensitive match for the string * > "hidden", then: act as described in the "anything else" entry below. */ - case "+INPUT": + case '+INPUT': $type_attribute = $this->get_attribute( 'type' ); if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { goto in_table_anything_else; @@ -1879,7 +1879,7 @@ private function step_in_table() { /* * > A start tag whose tag name is "form" */ - case "+FORM": + case '+FORM': if ( $this->state->stack_of_open_elements->has_element_in_scope( 'TEMPLATE' ) || $this->has_element_pointer( 'FORM' ) @@ -2037,14 +2037,27 @@ private function step_in_row() { */ case '+TH': case '+TD': + $this->clear_stack_to_table_row_context(); + $this->insert_html_element( $this->state->current_token ); + $this->state->active_formatting_elements->set_marker(); + return true; /* * > An end tag whose tag name is "tr" */ case '-TR': + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { + // this is a parse error; ignore the token. + return $this->step(); + } + $this->clear_stack_to_table_row_context(); + $this->state->stack_of_open_elements->pop(); + $this->state->insertion_mode = $this->state::INSERTION_MODE_IN_TABLE_BODY; + return true; /* * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr" + * > An end tag whose tag name is "table" */ case '+CAPTION': case '+COL': @@ -2053,11 +2066,15 @@ private function step_in_row() { case '+TFOOT': case '+THEAD': case '+TR': - - /* - * > An end tag whose tag name is "table" - */ case '-TABLE': + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { + // this is a parse error; ignore the token. + return $this->step(); + } + $this->clear_stack_to_table_row_context(); + $this->state->stack_of_open_elements->pop(); + $this->state->insertion_mode = $this->state::INSERTION_MODE_IN_TABLE_BODY; + return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > An end tag whose tag name is one of: "tbody", "tfoot", "thead" @@ -2065,6 +2082,18 @@ private function step_in_row() { case '-TBODY': case '-TFOOT': case '-THEAD': + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { + // this is a parse error; ignore the token. + return $this->step(); + } + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { + // ignore the token. + return $this->step(); + } + $this->clear_stack_to_table_row_context(); + $this->state->stack_of_open_elements->pop(); + $this->state->insertion_mode = $this->state::INSERTION_MODE_IN_TABLE_BODY; + return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th" @@ -2076,11 +2105,15 @@ private function step_in_row() { case '-HTML': case '-TD': case '-TH': + // this is a parse error; ignore the token. + return $this->step(); } /* * > Anything else + * > Process the token using the rules for the "in table" insertion mode. */ + return $this->step_in_table(); } /* @@ -2120,9 +2153,9 @@ private function bookmark_token() { private function clear_stack_to_table_context() { foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { if ( - $item->node_name === 'TABLE' || - $item->node_name === 'TEMPLATE' || - $item->node_name === 'HTML' + 'TABLE' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name ) { break; } @@ -2144,11 +2177,35 @@ private function clear_stack_to_table_context() { private function clear_stack_to_table_body_context() { foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { if ( - $item->node_name === 'TBODY' || - $item->node_name === 'TFOOT' || - $item->node_name === 'THEAD' || - $item->node_name === 'TEMPLATE' || - $item->node_name === 'HTML' + 'TBODY' === $item->node_name || + 'TFOOT' === $item->node_name || + 'THEAD' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name + ) { + break; + } + $this->state->stack_of_open_elements->remove_node( $item ); + } + } + + /** + * Clear the stack back to a table row context. + * + * > When the steps above require the UA to clear the stack back to a table row context, it + * > means that the UA must, while the current node is not a tr, template, or html element, pop + * > elements from the stack of open elements. + * + * @todo move this to open elements class. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-row-context + */ + private function clear_stack_to_table_row_context() { + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + if ( + 'TR' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name ) { break; } From 1a90c263e3baa1c9a444a46699e356dfe7a176a7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 13:23:29 +0200 Subject: [PATCH 08/55] Add clear_up_to_last_marker to active formatting This is an algorithm defined in the standard: https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker --- ...ass-wp-html-active-formatting-elements.php | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 0f71d9d70fc0e..3048e6d2ca3fd 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -188,4 +188,31 @@ public function walk_up() { public function set_marker() { $this->push( new WP_HTML_Token( null, 'marker', false ) ); } + + /** + * Clears the list of active formatting elements up to the last marker. + * + * > When the steps below require the UA to clear the list of active formatting elements up to + * > the last marker, the UA must perform the following steps: + * > + * > 1. Let entry be the last (most recently added) entry in the list of active + * > formatting elements. + * > 2. Remove entry from the list of active formatting elements. + * > 3. If entry was a marker, then stop the algorithm at this point. + * > The list has been cleared up to the last marker. + * > 4. Go to step 1. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker + * + * @since 6.7.0 + */ + public function clear_up_to_last_marker(): void { + foreach ( $this->walk_up() as $item ) { + $is_marker = 'marker' === $item->node_name; + array_pop( $this->stack ); + if ( $is_marker ) { + break; + } + } + } } From dafe4eacf2a4c51caa81a2dc928fe27393ae69bb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 13:24:01 +0200 Subject: [PATCH 09/55] Add step_in_cell method --- .../html-api/class-wp-html-processor.php | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index f4407cc1df504..8861a0a66011a 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -772,6 +772,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: return $this->step_in_row(); + case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: + return $this->step_in_cell(); + default: echo "\n MODE: " . $this->state->insertion_mode . "\n"; @@ -2116,6 +2119,105 @@ private function step_in_row() { return $this->step_in_table(); } + /** + * Parses next element in the 'in cell' insertion mode. + * + * This internal function performs the 'in cell' insertion mode + * logic for the generalized WP_HTML_Processor::step() function. + * + * @since 6.7.0 + * + * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intd + * @see WP_HTML_Processor::step + * + * @return bool Whether an element was found. + */ + private function step_in_cell() { + $tag_name = $this->get_tag(); + $op_sigil = $this->is_tag_closer() ? '-' : '+'; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > An end tag whose tag name is one of: "td", "th" + */ + case '-TD': + case '-TH': + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { + // this is a parse error; ignore the token. + return $this->step(); + } + $this->generate_implied_end_tags(); + /* + * @todo report a parse error when supported. + * + * if ( ! $this->state->stack_of_open_elements->current_node()->node_name ) {} + */ + $this->state->stack_of_open_elements->pop_until( $tag_name ); + $this->state->active_formatting_elements->clear_up_to_last_marker(); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return true; + + /* + * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "td", + * > "tfoot", "th", "thead", "tr" + */ + case '+CAPTION': + case '+COL': + case '+COLGROUP': + case '+TBODY': + case '+TD': + case '+TFOOT': + case '+TH': + case '+THEAD': + case '+TR': + // Assert: The stack of open elements has a td or th element in table scope. + if ( + ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TD' ) && + ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TH' ) + ) { + throw new Exception( 'Assertion failed @todo better message' ); + } + + $this->close_cell(); + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html" + */ + case '-BODY': + case '-CAPTION': + case '-COL': + case '-COLGROUP': + case '-HTML': + // Parse error. Ignore the token. + return $this->step(); + + /* + * > An end tag whose tag name is one of: "table", "tbody", "tfoot", "thead", "tr" + */ + case '-TABLE': + case '-TBODY': + case '-TFOOT': + case '-THEAD': + case '-TR': + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { + // Parse error. Ignore the token. + return $this->step(); + } + $this->close_cell(); + return $this->step( self::REPROCESS_CURRENT_NODE ); + } + + /* + * > Anything else + * > Process the token using the rules for the "in body" insertion mode. + */ + return $this->step_in_body(); + } + /* * Internal helpers */ From cd8d7e76c7b7f0b0dee4331705051507729136c6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 13:24:50 +0200 Subject: [PATCH 10/55] Use class name for processor state constants access --- src/wp-includes/html-api/class-wp-html-processor.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8861a0a66011a..e1c52beb14fad 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2055,7 +2055,7 @@ private function step_in_row() { } $this->clear_stack_to_table_row_context(); $this->state->stack_of_open_elements->pop(); - $this->state->insertion_mode = $this->state::INSERTION_MODE_IN_TABLE_BODY; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return true; /* @@ -2076,7 +2076,7 @@ private function step_in_row() { } $this->clear_stack_to_table_row_context(); $this->state->stack_of_open_elements->pop(); - $this->state->insertion_mode = $this->state::INSERTION_MODE_IN_TABLE_BODY; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); /* @@ -2095,7 +2095,7 @@ private function step_in_row() { } $this->clear_stack_to_table_row_context(); $this->state->stack_of_open_elements->pop(); - $this->state->insertion_mode = $this->state::INSERTION_MODE_IN_TABLE_BODY; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); /* From a7b565bee8864e21e40e588fb41d71a85b92bda9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 13:44:49 +0200 Subject: [PATCH 11/55] Update since tags --- src/wp-includes/html-api/class-wp-html-processor.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e1c52beb14fad..e2b7f4726400b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1732,7 +1732,7 @@ private function step_in_select() { * This internal function performs the 'in table' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.5.0 + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -1915,7 +1915,7 @@ private function step_in_table() { * This internal function performs the 'in table body' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.5.0 + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -2020,7 +2020,7 @@ private function step_in_table_body() { * This internal function performs the 'in row' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.5.0 + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * From 3971e2bb2ec852640f41a35863bded34aadeb903 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 13:55:48 +0200 Subject: [PATCH 12/55] Add close_cell method --- .../html-api/class-wp-html-processor.php | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e2b7f4726400b..9a5c1043b1ae8 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -3298,6 +3298,34 @@ private function run_adoption_agency_algorithm() { throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when looping required.' ); } + /** + * Runs the close cell algorithm. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell + * + * Where the steps above say to close the cell, they mean to run the following algorithm: + + * > 1. Generate implied end tags. + * > 2. If the current node is not now a td element or a th element, then this is a parse error. + * > 3. Pop elements from the stack of open elements stack until a td element or a th element has been popped from the stack. + * > 4. Clear the list of active formatting elements up to the last marker. + * > 5. Switch the insertion mode to "in row". + * + * @since 6.7.0 + */ + private function close_cell(): void { + $this->generate_implied_end_tags(); + // @todo Parse error if the current node is a "td" or "th" element. + foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) { + $this->state->stack_of_open_elements->pop(); + if ( 'TD' === $element->node_name || 'TH' === $element->node_name ) { + break; + } + } + $this->state->active_formatting_elements->clear_up_to_last_marker(); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + } + /** * Inserts an HTML element on the stack of open elements. * From 6316e4b67006a2172f368e7cba16ec34691b5f9a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 13:56:10 +0200 Subject: [PATCH 13/55] Pop from open elements instead of removing items --- src/wp-includes/html-api/class-wp-html-processor.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9a5c1043b1ae8..a76d733d85807 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2261,7 +2261,7 @@ private function clear_stack_to_table_context() { ) { break; } - $this->state->stack_of_open_elements->remove_node( $item ); + $this->state->stack_of_open_elements->pop(); } } @@ -2287,7 +2287,7 @@ private function clear_stack_to_table_body_context() { ) { break; } - $this->state->stack_of_open_elements->remove_node( $item ); + $this->state->stack_of_open_elements->pop(); } } @@ -2311,7 +2311,7 @@ private function clear_stack_to_table_row_context() { ) { break; } - $this->state->stack_of_open_elements->remove_node( $item ); + $this->state->stack_of_open_elements->pop(); } } From cff5faa2b1575c728a8c3d3f30c4b3f9e8ba95ea Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 15:35:03 +0200 Subject: [PATCH 14/55] Complete cases in step_in_table --- .../html-api/class-wp-html-processor.php | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index a76d733d85807..ec24c64fa00c4 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1742,20 +1742,36 @@ private function step_in_select() { * @return bool Whether an element was found. */ private function step_in_table() { - $tag_name = $this->get_tag(); - $op_sigil = $this->is_tag_closer() ? '-' : '+'; - $op = "{$op_sigil}{$tag_name}"; + $token_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$token_name}"; + switch ( $op ) { /* * > A character token, if the current node is table, tbody, template, tfoot, thead, or tr element */ + case '#text': + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( "Text in tables is not supported." ); + /* * > A comment token */ + case '#comment': + case '#funky-comment': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* * > A DOCTYPE token */ + case 'html': + // Parse error. Ignore the token. + return $this->step(); + /* * > A start tag whose tag name is "caption" */ @@ -1873,7 +1889,7 @@ private function step_in_table() { case '+INPUT': $type_attribute = $this->get_attribute( 'type' ); if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { - goto in_table_anything_else; + break; } // parse error $this->insert_html_element( $this->state->current_token ); @@ -1897,16 +1913,15 @@ private function step_in_table() { * > An end-of-file token */ - /* - * > Anything else - * > Parse error. Enable foster parenting, process the token using the rules for the - * > "in body" insertion mode, and then disable foster parenting. - */ - default: - in_table_anything_else: - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); } + + /* + * > Anything else + * > Parse error. Enable foster parenting, process the token using the rules for the + * > "in body" insertion mode, and then disable foster parenting. + */ + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); } /** From 6ef9df9b7c61e1e4ddc52d0dfe8fb40dd1c90f7f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 16:04:22 +0200 Subject: [PATCH 15/55] Implement in_table_scope --- .../html-api/class-wp-html-open-elements.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index d1585cdea5bf5..c9053f7e885bb 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -288,19 +288,19 @@ public function has_element_in_button_scope( $tag_name ) { /** * Returns whether a particular element is in table scope. * - * @since 6.4.0 + * @since 6.4.0 Stub implementation (throws). + * @since 6.7.0 Full implementation. * * @see https://html.spec.whatwg.org/#has-an-element-in-table-scope * - * @throws WP_HTML_Unsupported_Exception Always until this function is implemented. - * * @param string $tag_name Name of tag to check. * @return bool Whether given element is in scope. */ public function has_element_in_table_scope( $tag_name ) { - throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on table scope.' ); - - return false; // The linter requires this unreachable code until the function is implemented and can return. + return $this->has_element_in_specific_scope( + $tag_name, + array( 'HTML', 'TABLE', 'TEMPLATE' ) + ); } /** From 9c59014835b6bdfd559a2164de08f72b40690790 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 4 Jul 2024 16:12:09 +0200 Subject: [PATCH 16/55] Add HTML elements to has_element_in_scope handling --- .../html-api/class-wp-html-open-elements.php | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index c9053f7e885bb..8528895d04db8 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -227,6 +227,7 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) { * Returns whether a particular element is in scope. * * @since 6.4.0 + * @since 6.7.0 Add handling for all HTML elements. * * @see https://html.spec.whatwg.org/#has-an-element-in-scope * @@ -237,13 +238,27 @@ public function has_element_in_scope( $tag_name ) { return $this->has_element_in_specific_scope( $tag_name, array( - + 'APPLET', + 'CAPTION', + 'HTML', + 'TABLE', + 'TD', + 'TH', + 'MARQUEE', + 'OBJECT', + 'TEMPLATE', /* - * Because it's not currently possible to encounter - * one of the termination elements, they don't need - * to be listed here. If they were, they would be - * unreachable and only waste CPU cycles while - * scanning through HTML. + * Foreign content not yet supported + * + * - MathML mi + * - MathML mo + * - MathML mn + * - MathML ms + * - MathML mtext + * - MathML annotation-xml + * - SVG foreignObject + * - SVG desc + * - SVG title */ ) ); From fce641dc5f82d81c2554f9afaf79b7bf3b2c20d8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 13:57:42 +0200 Subject: [PATCH 17/55] Use insert_marker over set_marker Use the method implemented in https://github.com/WordPress/wordpress-develop/pull/6982 to avoid duplicating the same functionality. --- ...ass-wp-html-active-formatting-elements.php | 23 ++++++++++++++----- .../html-api/class-wp-html-processor.php | 4 ++-- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 3048e6d2ca3fd..02c0cc0f0a27b 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -86,6 +86,22 @@ public function current_node() { return $current_node ? $current_node : null; } + /** + * Inserts a "marker" at the end of the list of active formatting elements. + * + * > The markers are inserted when entering applet, object, marquee, + * > template, td, th, and caption elements, and are used to prevent + * > formatting from "leaking" into applet, object, marquee, template, + * > td, th, and caption elements. + * + * @see https://html.spec.whatwg.org/#concept-parser-marker + * + * @since 6.7.0 + */ + public function insert_marker() { + $this->push( new WP_HTML_Token( null, 'marker', false ) ); + } + /** * Pushes a node onto the stack of active formatting elements. * @@ -185,10 +201,6 @@ public function walk_up() { } } - public function set_marker() { - $this->push( new WP_HTML_Token( null, 'marker', false ) ); - } - /** * Clears the list of active formatting elements up to the last marker. * @@ -208,9 +220,8 @@ public function set_marker() { */ public function clear_up_to_last_marker(): void { foreach ( $this->walk_up() as $item ) { - $is_marker = 'marker' === $item->node_name; array_pop( $this->stack ); - if ( $is_marker ) { + if ( 'marker' === $item->node_name ) { break; } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 01073717329c4..91eea4a5e8d02 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1776,7 +1776,7 @@ private function step_in_table() { */ case '+CAPTION': $this->clear_stack_to_table_context(); - $this->state->active_formatting_elements->set_marker(); + $this->state->active_formatting_elements->insert_marker(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; return true; @@ -2113,7 +2113,7 @@ private function step_in_row() { case '+TD': $this->clear_stack_to_table_row_context(); $this->insert_html_element( $this->state->current_token ); - $this->state->active_formatting_elements->set_marker(); + $this->state->active_formatting_elements->insert_marker(); return true; /* From 693a791d4b09c58786d3e7c684ae789ae18be895 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:19:12 +0200 Subject: [PATCH 18/55] Use newly implemented step_in_X methods Instead of bailing directly, call the appropriate step_in_ method. --- src/wp-includes/html-api/class-wp-html-processor.php | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 91eea4a5e8d02..61a6a1da72fa5 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1746,14 +1746,12 @@ private function step_in_table() { $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$token_name}"; - switch ( $op ) { /* * > A character token, if the current node is table, tbody, template, tfoot, thead, or tr element */ case '#text': - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Text in tables is not supported." ); + return $this->step_in_table_text(); /* * > A comment token @@ -1875,8 +1873,7 @@ private function step_in_table() { case '+TEMPLATE': case '-TEMPLATE': // > Process the token using the rules for the "in head" insertion mode. - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + return $this->step_in_head(); /* * > A start tag whose tag name is "input" From d688c100f3d01d7a45c3fb06803b4436939963f9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:19:25 +0200 Subject: [PATCH 19/55] Clean whitespace --- src/wp-includes/html-api/class-wp-html-processor.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 61a6a1da72fa5..cbdc52a87ff4a 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1761,7 +1761,6 @@ private function step_in_table() { $this->insert_html_element( $this->state->current_token ); return true; - /* * > A DOCTYPE token */ From 39eba92dca9a5d481c65b8ae46155394334b5345 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:19:42 +0200 Subject: [PATCH 20/55] Use bail method in case of foster parenting --- src/wp-includes/html-api/class-wp-html-processor.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index cbdc52a87ff4a..22885ab83b436 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1915,8 +1915,7 @@ private function step_in_table() { * > Parse error. Enable foster parenting, process the token using the rules for the * > "in body" insertion mode, and then disable foster parenting. */ - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + $this->bail("Foster parenting is unsupported."); } /** From 2a3d7d43f6804fc47523916c78b6c53bd903b52e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:20:18 +0200 Subject: [PATCH 21/55] Add mising cell insertion mode on enter td,th --- src/wp-includes/html-api/class-wp-html-processor.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 22885ab83b436..904f7752bf4b9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2108,6 +2108,7 @@ private function step_in_row() { case '+TD': $this->clear_stack_to_table_row_context(); $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; $this->state->active_formatting_elements->insert_marker(); return true; From ff7541c3c5b0c8c62ec935605453e0b4006fbb99 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:26:23 +0200 Subject: [PATCH 22/55] PHPCBF --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 904f7752bf4b9..cc72f4f8239d1 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1915,7 +1915,7 @@ private function step_in_table() { * > Parse error. Enable foster parenting, process the token using the rules for the * > "in body" insertion mode, and then disable foster parenting. */ - $this->bail("Foster parenting is unsupported."); + $this->bail( 'Foster parenting is not supported.' ); } /** From 2cf10dff8b068c153458210627ec3989ca2602ba Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:36:29 +0200 Subject: [PATCH 23/55] Use todo comments for parse errors --- .../html-api/class-wp-html-processor.php | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index cc72f4f8239d1..1cb9bc35096b3 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2010,7 +2010,7 @@ private function step_in_table_body() { */ case '+TH': case '+TD': - // parse error + // @todo Indicate a parse error once it's possible. $this->clear_stack_to_table_body_context(); $this->insert_html_element( new WP_HTML_Token( null, 'TR', false ) @@ -2027,7 +2027,7 @@ private function step_in_table_body() { if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { - // parse error + // @todo Indicate a parse error once it's possible. return $this->step(); } $this->state->stack_of_open_elements->pop(); @@ -2050,7 +2050,7 @@ private function step_in_table_body() { ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'THEAD' ) && ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TFOOT' ) ) { - // parse error + // @todo Indicate a parse error once it's possible. return $this->step(); } $this->clear_stack_to_table_body_context(); @@ -2069,7 +2069,7 @@ private function step_in_table_body() { case '-TD': case '-TH': case '-TR': - // parse error + // @todo Indicate a parse error once it's possible. return $this->step(); } @@ -2117,7 +2117,7 @@ private function step_in_row() { */ case '-TR': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { - // this is a parse error; ignore the token. + // @todo Indicate a parse error once it's possible. return $this->step(); } $this->clear_stack_to_table_row_context(); @@ -2138,7 +2138,7 @@ private function step_in_row() { case '+TR': case '-TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { - // this is a parse error; ignore the token. + // @todo Indicate a parse error once it's possible. return $this->step(); } $this->clear_stack_to_table_row_context(); @@ -2153,7 +2153,7 @@ private function step_in_row() { case '-TFOOT': case '-THEAD': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { - // this is a parse error; ignore the token. + // @todo Indicate a parse error once it's possible. return $this->step(); } if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { @@ -2175,7 +2175,7 @@ private function step_in_row() { case '-HTML': case '-TD': case '-TH': - // this is a parse error; ignore the token. + // @todo Indicate a parse error once it's possible. return $this->step(); } @@ -2213,15 +2213,13 @@ private function step_in_cell() { case '-TD': case '-TH': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { - // this is a parse error; ignore the token. + // @todo Indicate a parse error once it's possible. return $this->step(); } $this->generate_implied_end_tags(); - /* - * @todo report a parse error when supported. - * - * if ( ! $this->state->stack_of_open_elements->current_node()->node_name ) {} - */ + if ( ! $this->state->stack_of_open_elements->current_node()->node_name ) { + // @todo Indicate a parse error once it's possible. + } $this->state->stack_of_open_elements->pop_until( $tag_name ); $this->state->active_formatting_elements->clear_up_to_last_marker(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; @@ -2259,7 +2257,7 @@ private function step_in_cell() { case '-COL': case '-COLGROUP': case '-HTML': - // Parse error. Ignore the token. + // @todo Indicate a parse error once it's possible. return $this->step(); /* @@ -2271,7 +2269,7 @@ private function step_in_cell() { case '-THEAD': case '-TR': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { - // Parse error. Ignore the token. + // @todo Indicate a parse error once it's possible. return $this->step(); } $this->close_cell(); From a216d557d290c3d766e70582bf2429b1a146ed5b Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:37:10 +0200 Subject: [PATCH 24/55] Remove EOF comment --- src/wp-includes/html-api/class-wp-html-processor.php | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 1cb9bc35096b3..1a34a8a9ab808 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1903,11 +1903,6 @@ private function step_in_table() { $this->insert_html_element( $this->state->current_token ); $this->set_element_pointer( 'FORM' ); return true; - - /* - * > An end-of-file token - */ - } /* From 7d7f6884020e0fd53a585496808d6a939c21610b Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:44:55 +0200 Subject: [PATCH 25/55] Move stack methods to stack class --- .../html-api/class-wp-html-open-elements.php | 74 ++++++++++++++ .../html-api/class-wp-html-processor.php | 98 +++---------------- 2 files changed, 86 insertions(+), 86 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index 8528895d04db8..a186379207140 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -601,6 +601,80 @@ public function after_element_pop( $item ) { } } + /** + * Clear the stack back to a table context. + * + * > When the steps above require the UA to clear the stack back to a table context, it means + * > that the UA must, while the current node is not a table, template, or html element, pop + * > elements from the stack of open elements. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-context + * + * @since 6.7.0 + */ + public function clear_to_table_context(): void { + foreach ( $this->walk_up() as $item ) { + if ( + 'TABLE' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name + ) { + break; + } + $this->pop(); + } + } + + /** + * Clear the stack back to a table body context. + * + * > When the steps above require the UA to clear the stack back to a table body context, it + * > means that the UA must, while the current node is not a tbody, tfoot, thead, template, or + * > html element, pop elements from the stack of open elements. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-body-context + * + * @since 6.7.0 + */ + public function clear_to_table_body_context(): void { + foreach ( $this->walk_up() as $item ) { + if ( + 'TBODY' === $item->node_name || + 'TFOOT' === $item->node_name || + 'THEAD' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name + ) { + break; + } + $this->pop(); + } + } + + /** + * Clear the stack back to a table row context. + * + * > When the steps above require the UA to clear the stack back to a table row context, it + * > means that the UA must, while the current node is not a tr, template, or html element, pop + * > elements from the stack of open elements. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-row-context + * + * @since 6.7.0 + */ + public function clear_to_table_row_context(): void { + foreach ( $this->walk_up() as $item ) { + if ( + 'TR' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name + ) { + break; + } + $this->pop(); + } + } + /** * Wakeup magic method. * diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 1a34a8a9ab808..a292d22e7cb9d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1772,7 +1772,7 @@ private function step_in_table() { * > A start tag whose tag name is "caption" */ case '+CAPTION': - $this->clear_stack_to_table_context(); + $this->state->stack_of_open_elements->clear_to_table_context(); $this->state->active_formatting_elements->insert_marker(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; @@ -1782,7 +1782,7 @@ private function step_in_table() { * > A start tag whose tag name is "colgroup" */ case '+COLGROUP': - $this->clear_stack_to_table_context(); + $this->state->stack_of_open_elements->clear_to_table_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; return true; @@ -1791,7 +1791,7 @@ private function step_in_table() { * > A start tag whose tag name is "col" */ case '+COL': - $this->clear_stack_to_table_context(); + $this->state->stack_of_open_elements->clear_to_table_context(); $this->insert_html_element( new WP_HTML_Token( null, 'COLGROUP', false ) ); @@ -1804,7 +1804,7 @@ private function step_in_table() { case '+TBODY': case '+TFOOT': case '+THEAD': - $this->clear_stack_to_table_context(); + $this->state->stack_of_open_elements->clear_to_table_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return true; @@ -1815,7 +1815,7 @@ private function step_in_table() { case '+TD': case '+TH': case '+TR': - $this->clear_stack_to_table_context(); + $this->state->stack_of_open_elements->clear_to_table_context(); $this->insert_html_element( new WP_HTML_Token( null, 'TBODY', false ) ); @@ -1995,7 +1995,7 @@ private function step_in_table_body() { * > A start tag whose tag name is "tr" */ case '+TR': - $this->clear_stack_to_table_body_context(); + $this->state->stack_of_open_elements->clear_to_table_body_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; return true; @@ -2006,7 +2006,7 @@ private function step_in_table_body() { case '+TH': case '+TD': // @todo Indicate a parse error once it's possible. - $this->clear_stack_to_table_body_context(); + $this->state->stack_of_open_elements->clear_to_table_body_context(); $this->insert_html_element( new WP_HTML_Token( null, 'TR', false ) ); @@ -2048,7 +2048,7 @@ private function step_in_table_body() { // @todo Indicate a parse error once it's possible. return $this->step(); } - $this->clear_stack_to_table_body_context(); + $this->state->stack_of_open_elements->clear_to_table_body_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return $this->step( self::REPROCESS_CURRENT_NODE ); @@ -2101,7 +2101,7 @@ private function step_in_row() { */ case '+TH': case '+TD': - $this->clear_stack_to_table_row_context(); + $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->insert_html_element( $this->state->current_token ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; $this->state->active_formatting_elements->insert_marker(); @@ -2115,7 +2115,7 @@ private function step_in_row() { // @todo Indicate a parse error once it's possible. return $this->step(); } - $this->clear_stack_to_table_row_context(); + $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return true; @@ -2136,7 +2136,7 @@ private function step_in_row() { // @todo Indicate a parse error once it's possible. return $this->step(); } - $this->clear_stack_to_table_row_context(); + $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); @@ -2155,7 +2155,7 @@ private function step_in_row() { // ignore the token. return $this->step(); } - $this->clear_stack_to_table_row_context(); + $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); @@ -2634,80 +2634,6 @@ private function bookmark_token() { return "{$this->bookmark_counter}"; } - /** - * Clear the stack back to a table context. - * - * > When the steps above require the UA to clear the stack back to a table context, it means - * > that the UA must, while the current node is not a table, template, or html element, pop - * > elements from the stack of open elements. - * - * @todo move this to open elements class. - * - * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-context - */ - private function clear_stack_to_table_context() { - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - if ( - 'TABLE' === $item->node_name || - 'TEMPLATE' === $item->node_name || - 'HTML' === $item->node_name - ) { - break; - } - $this->state->stack_of_open_elements->pop(); - } - } - - /** - * Clear the stack back to a table body context. - * - * > When the steps above require the UA to clear the stack back to a table body context, it - * > means that the UA must, while the current node is not a tbody, tfoot, thead, template, or - * > html element, pop elements from the stack of open elements. - * - * @todo move this to open elements class. - * - * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-body-context - */ - private function clear_stack_to_table_body_context() { - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - if ( - 'TBODY' === $item->node_name || - 'TFOOT' === $item->node_name || - 'THEAD' === $item->node_name || - 'TEMPLATE' === $item->node_name || - 'HTML' === $item->node_name - ) { - break; - } - $this->state->stack_of_open_elements->pop(); - } - } - - /** - * Clear the stack back to a table row context. - * - * > When the steps above require the UA to clear the stack back to a table row context, it - * > means that the UA must, while the current node is not a tr, template, or html element, pop - * > elements from the stack of open elements. - * - * @todo move this to open elements class. - * - * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-row-context - */ - private function clear_stack_to_table_row_context() { - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - if ( - 'TR' === $item->node_name || - 'TEMPLATE' === $item->node_name || - 'HTML' === $item->node_name - ) { - break; - } - $this->state->stack_of_open_elements->pop(); - } - } - /* * HTML semantic overrides for Tag Processor */ From e171589b9ca896728c26e556d1b5b9c84dc6b98b Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 14:50:46 +0200 Subject: [PATCH 26/55] Add and use form_element pointer --- .../html-api/class-wp-html-processor-state.php | 11 +++++++++++ src/wp-includes/html-api/class-wp-html-processor.php | 12 ++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index ab75041bee3d0..a003828492a46 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -390,6 +390,17 @@ class WP_HTML_Processor_State { */ public $head_element = null; + /** + * FORM element pointer. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#form-element-pointer + * + * @var WP_HTML_Token|null + */ + public $form_element = null; + /** * The frameset-ok flag indicates if a `FRAMESET` element is allowed in the current state. * diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index a292d22e7cb9d..21b9a99ac0f78 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1896,12 +1896,12 @@ private function step_in_table() { case '+FORM': if ( $this->state->stack_of_open_elements->has_element_in_scope( 'TEMPLATE' ) || - $this->has_element_pointer( 'FORM' ) + isset( $this->state->form_element ) ) { return $this->step(); } $this->insert_html_element( $this->state->current_token ); - $this->set_element_pointer( 'FORM' ); + $this->state->form_element = $this->state->current_token; return true; } @@ -3182,14 +3182,6 @@ public function has_bookmark( $bookmark_name ) { return parent::has_bookmark( "_{$bookmark_name}" ); } - private function set_element_pointer( string $tag_name ) { - return parent::set_bookmark( "element_pointer_{$tag_name}" ); - } - - private function has_element_pointer( string $tag_name ) { - return parent::has_bookmark( "element_pointer_{$tag_name}" ); - } - /* * HTML Parsing Algorithms */ From fbd635d979607f8ea321dfc104c6601ac9ffe2a2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 16:12:23 +0200 Subject: [PATCH 27/55] Handle presumptuous tags as if they were comments --- src/wp-includes/html-api/class-wp-html-processor.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 21b9a99ac0f78..1931b3847f613 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1758,6 +1758,7 @@ private function step_in_table() { */ case '#comment': case '#funky-comment': + case '#presumptuous-tag': $this->insert_html_element( $this->state->current_token ); return true; From af1142a4838c1cde5962df6ca420a8c4f16b744c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 16:18:51 +0200 Subject: [PATCH 28/55] Add test for table > form > #comment --- .../html-api/wpHtmlProcessorSemanticRules.php | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index adce614506429..e396101df33a0 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -429,4 +429,28 @@ public function test_br_end_tag_unsupported() { $this->assertFalse( $processor->next_tag(), 'Found a BR tag that should not be handled.' ); $this->assertSame( WP_HTML_Processor::ERROR_UNSUPPORTED, $processor->get_last_error() ); } + + /******************************************************************* + * RULES FOR "IN TABLE" MODE + *******************************************************************/ + + /** + * Ensure that form elements in tables (but not cells) are immediately popped off the stack. + * + * @ticket 61576 + */ + public function test_table_form_element_immediately_popped() { + $processor = WP_HTML_Processor::create_fragment( '' ); + + // There should be a FORM opener and a (virtual) FORM closer. + $this->assertTrue( $processor->next_tag( 'FORM' ) ); + $this->assertTrue( $processor->next_token() ); + $this->assertSame( 'FORM', $processor->get_token_name() ); + $this->assertTrue( $processor->is_tag_closer() ); + + // Followed by the comment token. + $this->assertTrue( $processor->next_token() ); + $this->assertSame( '#comment', $processor->get_token_name() ); + $this->assertsame( array( 'HTML', 'BODY', 'TABLE', '#comment' ), $processor->get_breadcrumbs() ); + } } From a7f5a221e2f3e03a41acd6dba9462107a6acdde2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 16:27:50 +0200 Subject: [PATCH 29/55] Pop FORM elements off the stack in tables --- src/wp-includes/html-api/class-wp-html-processor.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 1931b3847f613..a1c1a2884ba20 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1903,6 +1903,8 @@ private function step_in_table() { } $this->insert_html_element( $this->state->current_token ); $this->state->form_element = $this->state->current_token; + // > Pop that form element off the stack of open elements. + $this->state->stack_of_open_elements->pop(); return true; } From a6a7c7d68c7966aa21ef31d0c32bf38880fa33d7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 16:30:59 +0200 Subject: [PATCH 30/55] Be more consistent in parse error comments --- src/wp-includes/html-api/class-wp-html-processor.php | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index a1c1a2884ba20..252af50adc79b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1626,6 +1626,7 @@ private function step_in_body() { case 'TH': case 'THEAD': case 'TR': + // @todo Indicate a parse error once it's possible. return $this->step(); } @@ -1766,7 +1767,7 @@ private function step_in_table() { * > A DOCTYPE token */ case 'html': - // Parse error. Ignore the token. + // @todo Indicate a parse error once it's possible. return $this->step(); /* @@ -1840,7 +1841,7 @@ private function step_in_table() { */ case '-TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { - // parse error + // @todo Indicate a parse error once it's possible. return $this->step(); } $this->state->stack_of_open_elements->pop_until( 'TABLE' ); @@ -1861,7 +1862,7 @@ private function step_in_table() { case '-TH': case '-THEAD': case '-TR': - // parse error + // @todo Indicate a parse error once it's possible. return $this->step(); /* @@ -1887,7 +1888,7 @@ private function step_in_table() { if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { break; } - // parse error + // @todo Indicate a parse error once it's possible. $this->insert_html_element( $this->state->current_token ); return true; @@ -1912,6 +1913,8 @@ private function step_in_table() { * > Anything else * > Parse error. Enable foster parenting, process the token using the rules for the * > "in body" insertion mode, and then disable foster parenting. + * + * @todo Indicate a parse error once it's possible. */ $this->bail( 'Foster parenting is not supported.' ); } From 345b776fd5a95f64a8eb70c58f6f9a45993c88a2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 17:30:21 +0200 Subject: [PATCH 31/55] Remove outdated "stub implementation" notes --- src/wp-includes/html-api/class-wp-html-processor.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 252af50adc79b..4b9dbd3195879 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1732,7 +1732,7 @@ private function step_in_body() { * This internal function performs the 'in table' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.7.0 Stub implementation. + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -1982,7 +1982,7 @@ private function step_in_column_group() { * This internal function performs the 'in table body' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.7.0 Stub implementation. + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -2087,7 +2087,7 @@ private function step_in_table_body() { * This internal function performs the 'in row' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.7.0 Stub implementation. + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -2193,7 +2193,7 @@ private function step_in_row() { * This internal function performs the 'in cell' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.7.0 Stub implementation. + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * From 4966e7a1acdee977ba1a8f5eea62eca90c7c458a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 21:24:58 +0200 Subject: [PATCH 32/55] Add return types --- src/wp-includes/html-api/class-wp-html-processor.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4b9dbd3195879..372aa96bbbcb9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1741,7 +1741,7 @@ private function step_in_body() { * * @return bool Whether an element was found. */ - private function step_in_table() { + private function step_in_table(): bool { $token_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; @@ -1991,7 +1991,7 @@ private function step_in_column_group() { * * @return bool Whether an element was found. */ - private function step_in_table_body() { + private function step_in_table_body(): bool { $tag_name = $this->get_tag(); $op_sigil = $this->is_tag_closer() ? '-' : '+'; $op = "{$op_sigil}{$tag_name}"; @@ -2096,7 +2096,7 @@ private function step_in_table_body() { * * @return bool Whether an element was found. */ - private function step_in_row() { + private function step_in_row(): bool { $tag_name = $this->get_tag(); $op_sigil = $this->is_tag_closer() ? '-' : '+'; $op = "{$op_sigil}{$tag_name}"; @@ -2202,7 +2202,7 @@ private function step_in_row() { * * @return bool Whether an element was found. */ - private function step_in_cell() { + private function step_in_cell(): bool { $tag_name = $this->get_tag(); $op_sigil = $this->is_tag_closer() ? '-' : '+'; $op = "{$op_sigil}{$tag_name}"; From c111a74da29e83826c10028e518d982ebdf91147 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 22:44:11 +0200 Subject: [PATCH 33/55] Handle whitespace in TABLE text --- .../html-api/class-wp-html-processor.php | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 372aa96bbbcb9..6db22e2a7a0c9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1752,7 +1752,19 @@ private function step_in_table(): bool { * > A character token, if the current node is table, tbody, template, tfoot, thead, or tr element */ case '#text': - return $this->step_in_table_text(); + $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; + if ( + strspn( $this->html, "\u{0000}\u{0009}\u{000A}\u{000C}\u{000D}\u{0020}", $current_token->start, $current_token->length ) === $current_token->length + ) { + if ( strspn( $this->html, "\u{0000}", $current_token->start, $current_token->length ) === $current_token->length ) { + // @todo Indicate a parse error once it's possible. + return $this->step(); + } + $this->insert_html_element( $this->state->current_token ); + return true; + } + $this->bail( 'Foster parenting is not supported.' ); + break; /* * > A comment token From c7f6da6696a0ee5e366ed2391ef68b1ea08a912e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 16 Jul 2024 22:53:01 +0200 Subject: [PATCH 34/55] Fix table start tag handling --- .../html-api/class-wp-html-processor.php | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 6db22e2a7a0c9..280328263c60d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1615,17 +1615,17 @@ private function step_in_body() { /* * > A start tag whose tag name is one of: "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr" */ - case 'CAPTION': - case 'COL': - case 'COLGROUP': - case 'FRAME': - case 'HEAD': - case 'TBODY': - case 'TD': - case 'TFOOT': - case 'TH': - case 'THEAD': - case 'TR': + case '+CAPTION': + case '+COL': + case '+COLGROUP': + case '+FRAME': + case '+HEAD': + case '+TBODY': + case '+TD': + case '+TFOOT': + case '+TH': + case '+THEAD': + case '+TR': // @todo Indicate a parse error once it's possible. return $this->step(); } From d14eaf318a3e51428b41cb8c8dee179ce6082d15 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 17 Jul 2024 09:18:07 +0200 Subject: [PATCH 35/55] Remove "COL" from void tags test The COL element can only appear in TABLE and is not suitable for the void tag nesting test. --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 3faf1b201cad2..b9a4d8c375aba 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -342,7 +342,6 @@ public static function data_void_tags() { 'AREA' => array( 'AREA' ), 'BASE' => array( 'BASE' ), 'BR' => array( 'BR' ), - 'COL' => array( 'COL' ), 'EMBED' => array( 'EMBED' ), 'HR' => array( 'HR' ), 'IMG' => array( 'IMG' ), From eaa83598f866c48003d7d50c9a5a28896e9facf7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 17 Jul 2024 17:40:07 +0200 Subject: [PATCH 36/55] Fix handling of table text according to specification --- .../html-api/class-wp-html-processor.php | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index a1fce26d41c04..64d2ad7043fc5 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1753,18 +1753,31 @@ private function step_in_table(): bool { * > A character token, if the current node is table, tbody, template, tfoot, thead, or tr element */ case '#text': - $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; if ( - strspn( $this->html, "\u{0000}\u{0009}\u{000A}\u{000C}\u{000D}\u{0020}", $current_token->start, $current_token->length ) === $current_token->length + $this->state->stack_of_open_elements->current_node_is( 'TABLE' ) || + $this->state->stack_of_open_elements->current_node_is( 'TBODY' ) || + $this->state->stack_of_open_elements->current_node_is( 'TEMPLATE' ) || + $this->state->stack_of_open_elements->current_node_is( 'TFOOT' ) || + $this->state->stack_of_open_elements->current_node_is( 'THEAD' ) || + $this->state->stack_of_open_elements->current_node_is( 'TR' ) ) { - if ( strspn( $this->html, "\u{0000}", $current_token->start, $current_token->length ) === $current_token->length ) { + $text = str_replace( "\0", '', $this->get_modifiable_text() ); + // Ignore text nodes that are entirely U+0000 NULL. + if ( '' === $text ) { // @todo Indicate a parse error once it's possible. return $this->step(); } - $this->insert_html_element( $this->state->current_token ); - return true; + + // Whitespace-only text nodes are inserted in-place. + if ( strlen( $text ) === strspn( $text, "\u{0009}\u{000A}\u{000C}\u{000D}\u{0020}", 0, strlen( $text ) ) ) { + $this->insert_html_element( $this->state->current_token ); + return true; + } + + // Non-whitespace would trigger fostering, unsupported at this time. + $this->bail( 'Foster parenting is not supported.' ); + break; } - $this->bail( 'Foster parenting is not supported.' ); break; /* From 380b9c6c30f153f161b6ecb4a373c36866c98fbc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 18 Jul 2024 11:28:42 +0200 Subject: [PATCH 37/55] Expand text processing comment and whitespace special character form --- .../html-api/class-wp-html-processor.php | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 64d2ad7043fc5..0b98b075cdb18 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1768,8 +1768,26 @@ private function step_in_table(): bool { return $this->step(); } - // Whitespace-only text nodes are inserted in-place. - if ( strlen( $text ) === strspn( $text, "\u{0009}\u{000A}\u{000C}\u{000D}\u{0020}", 0, strlen( $text ) ) ) { + /* + * This follows the rules for "in table text" insertion mode. + * + * Whitespace-only text nodes are inserted in-place. Otherwise + * foster parenting is enabled and the nodes would be + * inserted out-of-place. + * + * > If any of the tokens in the pending table character tokens + * > list are character tokens that are not ASCII whitespace, + * > then this is a parse error: reprocess the character tokens + * > in the pending table character tokens list using the rules + * > given in the "anything else" entry in the "in table" + * > insertion mode. + * > + * > Otherwise, insert the characters given by the pending table + * > character tokens list. + * + * @see https://html.spec.whatwg.org/#parsing-main-intabletext + */ + if ( strlen( $text ) === strspn( $text, "\x09\x0A\x0C\x0D\x20", 0, strlen( $text ) ) ) { $this->insert_html_element( $this->state->current_token ); return true; } From 995719492c70ef784a2c35b23b9eb1a05114aa1e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 18 Jul 2024 11:42:46 +0200 Subject: [PATCH 38/55] Fix comment whitespace --- .../html-api/class-wp-html-processor.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 0b98b075cdb18..67758bbe6406e 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1776,17 +1776,17 @@ private function step_in_table(): bool { * inserted out-of-place. * * > If any of the tokens in the pending table character tokens - * > list are character tokens that are not ASCII whitespace, - * > then this is a parse error: reprocess the character tokens - * > in the pending table character tokens list using the rules - * > given in the "anything else" entry in the "in table" - * > insertion mode. - * > - * > Otherwise, insert the characters given by the pending table - * > character tokens list. + * > list are character tokens that are not ASCII whitespace, + * > then this is a parse error: reprocess the character tokens + * > in the pending table character tokens list using the rules + * > given in the "anything else" entry in the "in table" + * > insertion mode. + * > + * > Otherwise, insert the characters given by the pending table + * > character tokens list. * * @see https://html.spec.whatwg.org/#parsing-main-intabletext - */ + */ if ( strlen( $text ) === strspn( $text, "\x09\x0A\x0C\x0D\x20", 0, strlen( $text ) ) ) { $this->insert_html_element( $this->state->current_token ); return true; From ffd0e1c9aecf02f485ceb2ca5ee47dee0e627c74 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 19 Jul 2024 09:38:23 +0200 Subject: [PATCH 39/55] Clarify empty check after processing and null-remove --- src/wp-includes/html-api/class-wp-html-processor.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 67758bbe6406e..120d1082844e1 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1762,7 +1762,10 @@ private function step_in_table(): bool { $this->state->stack_of_open_elements->current_node_is( 'TR' ) ) { $text = str_replace( "\0", '', $this->get_modifiable_text() ); - // Ignore text nodes that are entirely U+0000 NULL. + /* + * If the text is empty after processing HTML entities and stripping + * U+0000 NULL bytes then ignore the token. + */ if ( '' === $text ) { // @todo Indicate a parse error once it's possible. return $this->step(); From f49812e92fc90b749e9c6036256d1428f083d7e8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 19 Jul 2024 09:40:25 +0200 Subject: [PATCH 40/55] Use consistent "\n" style character escapes Follow the most common whitespace character escape forms. Quote the spec referencing the ASCII whitespace characters. List the characters in the same order mentioned in the spec. --- src/wp-includes/html-api/class-wp-html-processor.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 120d1082844e1..00e8a06a23ff8 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1142,7 +1142,7 @@ private function step_in_body() { * contain character references which decode only to whitespace. */ $text = $this->get_modifiable_text(); - if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) { + if ( strlen( $text ) !== strspn( $text, "\t\n\f\r " ) ) { $this->state->frameset_ok = false; } @@ -1787,10 +1787,12 @@ private function step_in_table(): bool { * > * > Otherwise, insert the characters given by the pending table * > character tokens list. + * > … + * > ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 SPACE. * * @see https://html.spec.whatwg.org/#parsing-main-intabletext */ - if ( strlen( $text ) === strspn( $text, "\x09\x0A\x0C\x0D\x20", 0, strlen( $text ) ) ) { + if ( strlen( $text ) === strspn( $text, "\t\n\f\r ", 0, strlen( $text ) ) ) { $this->insert_html_element( $this->state->current_token ); return true; } From 40b55b44d449ff807ee0ea5465604ee79732719a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 10:27:29 +0200 Subject: [PATCH 41/55] Remove redundant null byte text replacement --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index dacbe3711f461..47b999406cf40 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2145,7 +2145,7 @@ private function step_in_table(): bool { $this->state->stack_of_open_elements->current_node_is( 'THEAD' ) || $this->state->stack_of_open_elements->current_node_is( 'TR' ) ) { - $text = str_replace( "\0", '', $this->get_modifiable_text() ); + $text = $this->get_modifiable_text(); /* * If the text is empty after processing HTML entities and stripping * U+0000 NULL bytes then ignore the token. From 9046cb31d3fe7f28d70fdc6f85bcbff37af4b0fb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 10:35:37 +0200 Subject: [PATCH 42/55] Apply suggestion to compare multiple elements against node name --- .../html-api/class-wp-html-processor.php | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 47b999406cf40..0faaf8c3687b1 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2137,13 +2137,17 @@ private function step_in_table(): bool { * > A character token, if the current node is table, tbody, template, tfoot, thead, or tr element */ case '#text': + $current_node = $this->state->stack_of_open_elements->current_node(); + $current_node_name = $current_node ? $current_node->node_name : null; if ( - $this->state->stack_of_open_elements->current_node_is( 'TABLE' ) || - $this->state->stack_of_open_elements->current_node_is( 'TBODY' ) || - $this->state->stack_of_open_elements->current_node_is( 'TEMPLATE' ) || - $this->state->stack_of_open_elements->current_node_is( 'TFOOT' ) || - $this->state->stack_of_open_elements->current_node_is( 'THEAD' ) || - $this->state->stack_of_open_elements->current_node_is( 'TR' ) + $current_node_name && ( + 'TABLE' === $current_node_name || + 'TBODY' === $current_node_name || + 'TEMPLATE' === $current_node_name || + 'TFOOT' === $current_node_name || + 'THEAD' === $current_node_name || + 'TR'=== $current_node_name + ) ) { $text = $this->get_modifiable_text(); /* From e4b874c78d2ed02b495dbd604751183276635a74 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 10:37:25 +0200 Subject: [PATCH 43/55] Add spec quote when generating a COLGROUP token The spec inserts some elements not present in the DOM to preserve proper table structure. Add explanatory comment. --- src/wp-includes/html-api/class-wp-html-processor.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 0faaf8c3687b1..30445500776c1 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2231,9 +2231,11 @@ private function step_in_table(): bool { */ case '+COL': $this->state->stack_of_open_elements->clear_to_table_context(); - $this->insert_html_element( - new WP_HTML_Token( null, 'COLGROUP', false ) - ); + /* + * > Insert an HTML element for a "colgroup" start tag token with no attributes, + * > then switch the insertion mode to "in column group". + */ + $this->insert_html_element( new WP_HTML_Token( null, 'COLGROUP', false ) ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; return $this->step( self::REPROCESS_CURRENT_NODE ); From 1cca15f758310ed887c9efab4b0c78b5222cd409 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 10:42:58 +0200 Subject: [PATCH 44/55] Add spec quote when generating a TBODY token The spec inserts some elements not present in the DOM to preserve proper table structure. Add explanatory comment. --- src/wp-includes/html-api/class-wp-html-processor.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 30445500776c1..9ce52caba892a 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2257,9 +2257,11 @@ private function step_in_table(): bool { case '+TH': case '+TR': $this->state->stack_of_open_elements->clear_to_table_context(); - $this->insert_html_element( - new WP_HTML_Token( null, 'TBODY', false ) - ); + /* + * > Insert an HTML element for a "tbody" start tag token with no attributes, + * > then switch the insertion mode to "in table body". + */ + $this->insert_html_element( new WP_HTML_Token( null, 'TBODY', false ) ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); From b29e1d35054d5229697b9e0d27a19a9cd847d5bf Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 10:44:49 +0200 Subject: [PATCH 45/55] Use goto for safer move to "anything else" condition --- src/wp-includes/html-api/class-wp-html-processor.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9ce52caba892a..0f3d016879e72 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2327,7 +2327,7 @@ private function step_in_table(): bool { case '+INPUT': $type_attribute = $this->get_attribute( 'type' ); if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { - break; + goto anything_else; } // @todo Indicate a parse error once it's possible. $this->insert_html_element( $this->state->current_token ); @@ -2357,6 +2357,7 @@ private function step_in_table(): bool { * * @todo Indicate a parse error once it's possible. */ + anything_else: $this->bail( 'Foster parenting is not supported.' ); } From 3f780fcb6ab60208d940f2e3a40f5b93062b1746 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 10:47:50 +0200 Subject: [PATCH 46/55] Revert "Remove "COL" from void tags test" This reverts commit d14eaf318a3e51428b41cb8c8dee179ce6082d15. --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 0ab59fcd7b1dc..12f36ca742989 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -331,6 +331,7 @@ public static function data_void_tags() { 'AREA' => array( 'AREA' ), 'BASE' => array( 'BASE' ), 'BR' => array( 'BR' ), + 'COL' => array( 'COL' ), 'EMBED' => array( 'EMBED' ), 'HR' => array( 'HR' ), 'IMG' => array( 'IMG' ), From b44f7a35055dc517014b368f1bd12982b679fc09 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 10:51:45 +0200 Subject: [PATCH 47/55] fixup! Apply suggestion to compare multiple elements against node name --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 0f3d016879e72..625f4cfdb6b5d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2146,7 +2146,7 @@ private function step_in_table(): bool { 'TEMPLATE' === $current_node_name || 'TFOOT' === $current_node_name || 'THEAD' === $current_node_name || - 'TR'=== $current_node_name + 'TR' === $current_node_name ) ) { $text = $this->get_modifiable_text(); From 085950ed5b67f67a325f218dcf66a9099163c292 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 11:00:17 +0200 Subject: [PATCH 48/55] Add comment for no-quirks p table nesting --- src/wp-includes/html-api/class-wp-html-processor.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 625f4cfdb6b5d..e267e0e6790e5 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1786,6 +1786,10 @@ private function step_in_body(): bool { * > A start tag whose tag name is "table" */ case '+TABLE': + /* + * > If the Document is not set to quirks mode, and the stack of open elements + * > has a p element in button scope, then close a p element. + */ if ( WP_HTML_Processor_State::QUIRKS_MODE !== $this->state->document_mode && $this->state->stack_of_open_elements->has_p_in_button_scope() From e057ff99dec01fcfb4d3ac732999781f546cb6ff Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 21:15:11 +0200 Subject: [PATCH 49/55] Remove strspn default args --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e267e0e6790e5..8a3b5236454d6 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2184,7 +2184,7 @@ private function step_in_table(): bool { * * @see https://html.spec.whatwg.org/#parsing-main-intabletext */ - if ( strlen( $text ) === strspn( $text, "\t\n\f\r ", 0, strlen( $text ) ) ) { + if ( strlen( $text ) === strspn( $text, "\t\n\f\r " ) ) { $this->insert_html_element( $this->state->current_token ); return true; } From dc752ff017403ab6b52b501125530ba8c02dcb30 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 23 Jul 2024 21:25:25 +0200 Subject: [PATCH 50/55] Remove assertion in implementation from HTML spec Assertions from the spec are explicitly optional in implementation. Remove the assertion, it's checking an invariant condition. --- src/wp-includes/html-api/class-wp-html-processor.php | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8a3b5236454d6..f5deedc1bf488 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2685,14 +2685,6 @@ private function step_in_cell(): bool { case '+TH': case '+THEAD': case '+TR': - // Assert: The stack of open elements has a td or th element in table scope. - if ( - ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TD' ) && - ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TH' ) - ) { - throw new Exception( 'Assertion failed @todo better message' ); - } - $this->close_cell(); return $this->step( self::REPROCESS_CURRENT_NODE ); From 2cfe504f80a89d6dc49a2f4260add6732d69a300 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 23 Jul 2024 16:44:59 -0700 Subject: [PATCH 51/55] Adjust code after review. --- .../html-api/class-wp-html-processor.php | 138 +++++++++++++----- 1 file changed, 103 insertions(+), 35 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index f5deedc1bf488..5636118d26462 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -765,6 +765,7 @@ public function expects_closer( $node = null ): ?bool { } return ! ( + ( $node->has_self_closing_flag ?? false ) || // Comments, text nodes, and other atomic tokens. '#' === $token_name[0] || // Doctype declarations. @@ -2138,7 +2139,8 @@ private function step_in_table(): bool { switch ( $op ) { /* - * > A character token, if the current node is table, tbody, template, tfoot, thead, or tr element + * > A character token, if the current node is table, + * > tbody, template, tfoot, thead, or tr element */ case '#text': $current_node = $this->state->stack_of_open_elements->current_node(); @@ -2159,7 +2161,6 @@ private function step_in_table(): bool { * U+0000 NULL bytes then ignore the token. */ if ( '' === $text ) { - // @todo Indicate a parse error once it's possible. return $this->step(); } @@ -2179,12 +2180,10 @@ private function step_in_table(): bool { * > * > Otherwise, insert the characters given by the pending table * > character tokens list. - * > … - * > ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 SPACE. * * @see https://html.spec.whatwg.org/#parsing-main-intabletext */ - if ( strlen( $text ) === strspn( $text, "\t\n\f\r " ) ) { + if ( strlen( $text ) === strspn( $text, " \t\f\r\n" ) ) { $this->insert_html_element( $this->state->current_token ); return true; } @@ -2208,7 +2207,7 @@ private function step_in_table(): bool { * > A DOCTYPE token */ case 'html': - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); /* @@ -2235,11 +2234,12 @@ private function step_in_table(): bool { */ case '+COL': $this->state->stack_of_open_elements->clear_to_table_context(); + /* * > Insert an HTML element for a "colgroup" start tag token with no attributes, * > then switch the insertion mode to "in column group". */ - $this->insert_html_element( new WP_HTML_Token( null, 'COLGROUP', false ) ); + $this->insert_virtual_node( 'COLGROUP' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; return $this->step( self::REPROCESS_CURRENT_NODE ); @@ -2265,18 +2265,20 @@ private function step_in_table(): bool { * > Insert an HTML element for a "tbody" start tag token with no attributes, * > then switch the insertion mode to "in table body". */ - $this->insert_html_element( new WP_HTML_Token( null, 'TBODY', false ) ); + $this->insert_virtual_node( 'TBODY' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > A start tag whose tag name is "table" + * + * This tag in the IN TABLE insertion mode is a parse error. */ case '+TABLE': - // pase error if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { return $this->step(); } + $this->state->stack_of_open_elements->pop_until( 'TABLE' ); $this->reset_insertion_mode(); return $this->step( self::REPROCESS_CURRENT_NODE ); @@ -2289,6 +2291,7 @@ private function step_in_table(): bool { // @todo Indicate a parse error once it's possible. return $this->step(); } + $this->state->stack_of_open_elements->pop_until( 'TABLE' ); $this->reset_insertion_mode(); return true; @@ -2307,7 +2310,7 @@ private function step_in_table(): bool { case '-TH': case '-THEAD': case '-TR': - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); /* @@ -2318,7 +2321,9 @@ private function step_in_table(): bool { case '+SCRIPT': case '+TEMPLATE': case '-TEMPLATE': - // > Process the token using the rules for the "in head" insertion mode. + /* + * > Process the token using the rules for the "in head" insertion mode. + */ return $this->step_in_head(); /* @@ -2339,6 +2344,8 @@ private function step_in_table(): bool { /* * > A start tag whose tag name is "form" + * + * This tag in the IN TABLE insertion mode is a parse error. */ case '+FORM': if ( @@ -2347,10 +2354,12 @@ private function step_in_table(): bool { ) { return $this->step(); } + + // This FORM is special because it immediately closes and cannot have other children. + $this->state->current_token->has_self_closing_flag = true; + $this->insert_html_element( $this->state->current_token ); $this->state->form_element = $this->state->current_token; - // > Pop that form element off the stack of open elements. - $this->state->stack_of_open_elements->pop(); return true; } @@ -2459,9 +2468,7 @@ private function step_in_table_body(): bool { case '+TD': // @todo Indicate a parse error once it's possible. $this->state->stack_of_open_elements->clear_to_table_body_context(); - $this->insert_html_element( - new WP_HTML_Token( null, 'TR', false ) - ); + $this->insert_virtual_node( 'TR' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; return $this->step( self::REPROCESS_CURRENT_NODE ); @@ -2471,18 +2478,24 @@ private function step_in_table_body(): bool { case '-TBODY': case '-TFOOT': case '-THEAD': - if ( - ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) - ) { - // @todo Indicate a parse error once it's possible. + /* + * @todo This needs to check if the element in scope is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as `
`, and a foreign element of + * the same given name. + */ + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { + // Parse error: ignore the token. return $this->step(); } + + $this->state->stack_of_open_elements->clear_to_table_body_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return true; /* - * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead" + * > A start tag whose tag name is one of: "caption", "col", "colgroup","tbody", "tfoot", "thead" * > An end tag whose tag name is "table" */ case '+CAPTION': @@ -2497,7 +2510,7 @@ private function step_in_table_body(): bool { ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'THEAD' ) && ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TFOOT' ) ) { - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); } $this->state->stack_of_open_elements->clear_to_table_body_context(); @@ -2516,7 +2529,7 @@ private function step_in_table_body(): bool { case '-TD': case '-TH': case '-TR': - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); } @@ -2564,9 +2577,10 @@ private function step_in_row(): bool { */ case '-TR': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); } + $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; @@ -2585,9 +2599,10 @@ private function step_in_row(): bool { case '+TR': case '-TABLE': if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); } + $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; @@ -2599,14 +2614,22 @@ private function step_in_row(): bool { case '-TBODY': case '-TFOOT': case '-THEAD': + /* + * @todo This needs to check if the element in scope is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as `
`, and a foreign element of + * the same given name. + */ if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); } + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { - // ignore the token. + // Ignore the token. return $this->step(); } + $this->state->stack_of_open_elements->clear_to_table_row_context(); $this->state->stack_of_open_elements->pop(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; @@ -2622,7 +2645,7 @@ private function step_in_row(): bool { case '-HTML': case '-TD': case '-TH': - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); } @@ -2659,14 +2682,29 @@ private function step_in_cell(): bool { */ case '-TD': case '-TH': + /* + * @todo This needs to check if the element in scope is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as `
`, and a foreign element of + * the same given name. + */ if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); } + $this->generate_implied_end_tags(); - if ( ! $this->state->stack_of_open_elements->current_node()->node_name ) { + + /* + * @todo This needs to check if the current node is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as `
`, and a foreign element of + * the same given name. + */ + if ( ! $this->state->stack_of_open_elements->current_node_is( $tag_name ) ) { // @todo Indicate a parse error once it's possible. } + $this->state->stack_of_open_elements->pop_until( $tag_name ); $this->state->active_formatting_elements->clear_up_to_last_marker(); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; @@ -2685,6 +2723,12 @@ private function step_in_cell(): bool { case '+TH': case '+THEAD': case '+TR': + /* + * > Assert: The stack of open elements has a td or th element in table scope. + * + * Nothing to do here, except to verify in tests that this never appears. + */ + $this->close_cell(); return $this->step( self::REPROCESS_CURRENT_NODE ); @@ -2696,7 +2740,7 @@ private function step_in_cell(): bool { case '-COL': case '-COLGROUP': case '-HTML': - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); /* @@ -2707,8 +2751,14 @@ private function step_in_cell(): bool { case '-TFOOT': case '-THEAD': case '-TR': + /* + * @todo This needs to check if the element in scope is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as `
`, and a foreign element of + * the same given name. + */ if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { - // @todo Indicate a parse error once it's possible. + // Parse error: ignore the token. return $this->step(); } $this->close_cell(); @@ -4057,16 +4107,16 @@ private function run_adoption_agency_algorithm(): void { /** * Runs the close cell algorithm. * - * @see https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell - * * Where the steps above say to close the cell, they mean to run the following algorithm: - + * * > 1. Generate implied end tags. * > 2. If the current node is not now a td element or a th element, then this is a parse error. * > 3. Pop elements from the stack of open elements stack until a td element or a th element has been popped from the stack. * > 4. Clear the list of active formatting elements up to the last marker. * > 5. Switch the insertion mode to "in row". * + * @see https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell + * * @since 6.7.0 */ private function close_cell(): void { @@ -4095,6 +4145,24 @@ private function insert_html_element( WP_HTML_Token $token ): void { $this->state->stack_of_open_elements->push( $token ); } + /** + * Inserts a virtual element on the stack of open elements. + * + * @since 6.7.0 + * + * @param string $token_name Name of token to create and insert into the stack of open elements. + * @param string|null $bookmark_name Optional. Name to give bookmark for created virtual node. + * Defaults to auto-creating a bookmark name. + */ + private function insert_virtual_node( $token_name, $bookmark_name = null ): void { + $here = $this->bookmarks[ $this->state->current_token->bookmark_name ]; + $name = $bookmark_name ?? $this->bookmark_token(); + + $this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 ); + + $this->insert_html_element( new WP_HTML_Token( $name, $token_name, false ) ); + } + /* * HTML Specification Helpers */ From 9590793437a721f40ccff2b9f94f669130713978 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 23 Jul 2024 16:49:44 -0700 Subject: [PATCH 52/55] Remove typo. --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 5636118d26462..8a260e087f609 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2495,7 +2495,7 @@ private function step_in_table_body(): bool { return true; /* - * > A start tag whose tag name is one of: "caption", "col", "colgroup","tbody", "tfoot", "thead" + * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead" * > An end tag whose tag name is "table" */ case '+CAPTION': From cf1637368035791e5c4cd81e01b39996d8ec3180 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 24 Jul 2024 09:04:26 +0200 Subject: [PATCH 53/55] Fix HTML spec quoting close_cell method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "> …" quoting omitted part of the quoted spec. Include the complete spec in the quoting. --- .../html-api/class-wp-html-processor.php | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8a260e087f609..1e6a18389b1ea 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4105,15 +4105,14 @@ private function run_adoption_agency_algorithm(): void { } /** - * Runs the close cell algorithm. - * - * Where the steps above say to close the cell, they mean to run the following algorithm: - * - * > 1. Generate implied end tags. - * > 2. If the current node is not now a td element or a th element, then this is a parse error. - * > 3. Pop elements from the stack of open elements stack until a td element or a th element has been popped from the stack. - * > 4. Clear the list of active formatting elements up to the last marker. - * > 5. Switch the insertion mode to "in row". + * Runs the "close the cell" algorithm. + * + * > Where the steps above say to close the cell, they mean to run the following algorithm: + * > 1. Generate implied end tags. + * > 2. If the current node is not now a td element or a th element, then this is a parse error. + * > 3. Pop elements from the stack of open elements stack until a td element or a th element has been popped from the stack. + * > 4. Clear the list of active formatting elements up to the last marker. + * > 5. Switch the insertion mode to "in row". * * @see https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell * From 4c4bfc8ee83a98dc7dbba65df34cce4856f82dba Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 24 Jul 2024 09:30:06 +0200 Subject: [PATCH 54/55] Use pop instruction for form elements that are immediately popped in table --- src/wp-includes/html-api/class-wp-html-processor.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 1e6a18389b1ea..0d1c7c1496835 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2356,10 +2356,9 @@ private function step_in_table(): bool { } // This FORM is special because it immediately closes and cannot have other children. - $this->state->current_token->has_self_closing_flag = true; - $this->insert_html_element( $this->state->current_token ); $this->state->form_element = $this->state->current_token; + $this->state->stack_of_open_elements->pop(); return true; } From 0686de8d67f2d856432fda853835bcfcf126628a Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 24 Jul 2024 11:11:49 -0700 Subject: [PATCH 55/55] Remove unwanted change to expects-closer --- src/wp-includes/html-api/class-wp-html-processor.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 0d1c7c1496835..975f21a0f0d77 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -765,7 +765,6 @@ public function expects_closer( $node = null ): ?bool { } return ! ( - ( $node->has_self_closing_flag ?? false ) || // Comments, text nodes, and other atomic tokens. '#' === $token_name[0] || // Doctype declarations.