Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Implement active format reconstruction #6982

Draft
wants to merge 9 commits into
base: trunk
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,22 @@ class WP_HTML_Active_Formatting_Elements {
*/
private $stack = array();

/**
* Returns the node at the given 1-offset index in the list of active formatting elements.
*
* Do not use this method; it is meant to be used only by the HTML Processor.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to say this? The entire class is marked @acess private and considered internal.

*
* @since 6.7.0
*
* @access private
*
Comment on lines +53 to +54
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same, maybe this is redundant given the entire class has this private tag.

Suggested change
* @access private
*

* @param int $index Number of nodes from the top node to return.
* @return WP_HTML_Token|null Node at the given index in the stack, if one exists, otherwise null.
*/
public function at( $nth ) {
return $this->stack[ $nth - 1 ];
}

/**
* Reports if a specific node is in the stack of active formatting elements.
*
Expand Down
88 changes: 72 additions & 16 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -5082,6 +5082,7 @@ private function get_adjusted_current_node(): ?WP_HTML_Token {
* > been explicitly closed.
*
* @since 6.4.0
* @since 6.7.0 Added additional support.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
Expand All @@ -5090,34 +5091,89 @@ private function get_adjusted_current_node(): ?WP_HTML_Token {
* @return bool Whether any formatting elements needed to be reconstructed.
*/
private function reconstruct_active_formatting_elements(): bool {
$count = $this->state->active_formatting_elements->count();
/*
* > If there are no entries in the list of active formatting elements, then there is nothing
* > to reconstruct; stop this algorithm.
* > 1. If there are no entries in the list of active formatting elements,
* > then there is nothing to reconstruct; stop this algorithm.
*/
if ( 0 === $this->state->active_formatting_elements->count() ) {
if ( 0 === $count ) {
return false;
}

$last_entry = $this->state->active_formatting_elements->current_node();
$currently_at = $count;
$last_entry = $this->state->active_formatting_elements->at( $currently_at );
/*
* > 2. If the last (most recently added) entry in the list of active formatting
* > elements is a marker, or if it is an element that is in the stack of open
* > elements, then there is nothing to reconstruct; stop this algorithm.
*/
if (

/*
* > If the last (most recently added) entry in the list of active formatting elements is a marker;
* > stop this algorithm.
*/
'marker' === $last_entry->node_name ||

/*
* > If the last (most recently added) entry in the list of active formatting elements is an
* > element that is in the stack of open elements, then there is nothing to reconstruct;
* > stop this algorithm.
*/
$this->state->stack_of_open_elements->contains_node( $last_entry )
) {
return false;
}

$this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
/*
* > 3. Let entry be the last (most recently added) element
* > in the list of active formatting elements.
*/
$entry = $last_entry;

/*
* > 4. Rewind: If there are no entries before entry in the list of active
* > formatting elements, then jump to the step labeled create.
*/
rewind:
if ( 1 === $currently_at ) {
goto create;
}

/*
* > 5. Let entry be the entry one earlier than entry
* > in the list of active formatting elements.
*/
$entry = $this->state->active_formatting_elements->at( --$currently_at );

/*
* > 6. If entry is neither a marker nor an element that is also in
* > the stack of open elements, go to the step labeled rewind.
*/
if (
'marker' !== $entry->node_name &&
! $this->state->stack_of_open_elements->contains_node( $entry )
) {
goto rewind;
}

/*
* > 7. Advance: Let entry be the element one later than entry
* > in the list of active formatting elements.
*/
advance:
$entry = $this->state->active_formatting_elements->at( ++$currently_at );

/*
* > 8. Create: Insert an HTML element for the token for which the
* > element entry was created, to obtain new element.
*/
create:
$this->insert_html_element( $entry );

/*
* > 9. Replace the entry for entry in the list with an entry for new element.
* > This doesn't need to happen here since no DOM is being created.
*/

/*
* > 10. If the entry for new element in the list of active formatting elements
* > is not the last entry in the list, return to the step labeled advance.
*/
if ( $count !== $currently_at ) {
goto advance;
}

return true;
}

/**
Expand Down
15 changes: 10 additions & 5 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,23 @@ public function test_clear_to_navigate_after_seeking() {
}

/**
* Ensures that support is added for reconstructing active formatting elements
* before the HTML Processor handles situations with unclosed formats requiring it.
* Ensures that support is added for reconstructing active formatting elements.
*
* @ticket 58517
*
* @covers WP_HTML_Processor::reconstruct_active_formatting_elements
*/
public function test_fails_to_reconstruct_formatting_elements() {
$processor = WP_HTML_Processor::create_fragment( '<p><em>One<p><em>Two<p><em>Three<p><em>Four' );
public function test_reconstructs_formatting_elements() {
$processor = WP_HTML_Processor::create_fragment( '<p><em>One<p><em><span>Two<p><em>Three<p><em>Four' );

$this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find first EM.' );
$this->assertFalse( $processor->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
$this->assertSame( array( 'HTML', 'BODY', 'P', 'EM' ), $processor->get_breadcrumbs(), 'Found incorrect breadcrumbs for first EM.' );
$this->assertTrue( $processor->next_tag( 'SPAN' ), 'Could not find test span.' );
$this->assertSame(
array( 'HTML', 'BODY', 'P', 'EM', 'EM', 'SPAN' ),
$processor->get_breadcrumbs(),
'Found incorrect breadcrumbs for test SPAN; should have created two EMs.'
);
}

/**
Expand Down
66 changes: 38 additions & 28 deletions tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
Original file line number Diff line number Diff line change
Expand Up @@ -165,45 +165,55 @@ public static function data_single_tag_of_supported_elements() {
}

/**
* @ticket 58517
*
* @dataProvider data_unsupported_markup
* Ensures that formats inside unclosed A elements are reconstructed.
*
* @param string $html HTML containing unsupported markup.
* @ticket 61576
*/
public function test_fails_when_encountering_unsupported_markup( $html, $description ) {
$processor = WP_HTML_Processor::create_fragment( $html );

while ( $processor->next_token() && null === $processor->get_attribute( 'supported' ) ) {
continue;
}
public function test_reconstructs_formatting_from_unclosed_a_elements() {
$processor = WP_HTML_Processor::create_fragment( '<a><strong>Click <a><big>Here</big></a></strong></a>' );

$this->assertNull(
$processor->get_last_error(),
'Bailed on unsupported input before finding supported checkpoint: check test code.'
$processor->next_tag( 'STRONG' );
$this->assertSame(
array( 'HTML', 'BODY', 'A', 'STRONG' ),
$processor->get_breadcrumbs(),
'Failed to construct starting breadcrumbs properly.'
);

$this->assertTrue( $processor->get_attribute( 'supported' ), 'Did not find required supported element.' );
$processor->next_token();
$this->assertNotNull( $processor->get_last_error(), "Didn't properly reject unsupported markup: {$description}" );
$processor->next_tag( 'BIG' );
$this->assertSame(
array( 'HTML', 'BODY', 'STRONG', 'A', 'BIG' ),
$processor->get_breadcrumbs(),
'Failed to reconstruct the active formatting elements after an unclosed A element.'
);
}

/**
* Data provider.
* Ensures that unclosed A elements are reconstructed.
*
* @return array[]
* @ticket 61576
*/
public static function data_unsupported_markup() {
return array(
'A with formatting following unclosed A' => array(
'<a><strong>Click <span supported><a unsupported><big>Here</big></a></strong></a>',
'Unclosed formatting requires complicated reconstruction.',
),
public function test_reconstructs_unclosed_a_elements() {
$processor = WP_HTML_Processor::create_fragment( '<a><div><a></div></a>' );

'A after unclosed A inside DIV' => array(
'<a><div supported><a unsupported></div></a>',
'A is a formatting element, which requires more complicated reconstruction.',
),
$processor->next_tag( 'DIV' );
$this->assertSame(
array( 'HTML', 'BODY', 'DIV' ),
$processor->get_breadcrumbs(),
'Failed to construct breadcrumbs properly - the DIV should have closed the A element.'
);

// When the DIV re-opens, it reconstructs an unclosed A, then the A in the text is a second A.
$processor->next_tag( 'A' );
$this->assertSame(
array( 'HTML', 'BODY', 'DIV', 'A' ),
'Failed to create proper breadcrumbs for recreated A element.'
);

// This is the one that's second in the raw text.
$processor->next_tag( 'A' );
$this->assertSame(
array( 'HTML', 'BODY', 'DIV', 'A' ),
'Failed to create proper breadcrumbs for explicit A element - this A should have closed the reconstructed A.'
);
}

Expand Down
Loading