Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Use full parser for html5lib tests #7117

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ed6d1c7
HTML5Lib: enable head tests
sirreal Jul 31, 2024
e83b01a
HTML5Lib: Use full parser when context not provided
sirreal Jul 31, 2024
301c935
HTML5Lib: Strip doctypes from expected output
sirreal Jul 31, 2024
fd8dd1f
HTML5Lib: Ignore tests with known issues
sirreal Jul 31, 2024
53b044f
HTML5Lib: Handle PI lookalike comments
sirreal Jul 31, 2024
6fe4d93
HTML5Lib: Handle funky comments in tree construction
sirreal Jul 31, 2024
f4805f7
PICKME: Bugfix on ?-initial invalid comment texts
sirreal Jul 31, 2024
f09a026
HTML5Lib: Add special handling for missing html, head, body tags
sirreal Jul 31, 2024
28ba1a7
Finish skipping tests
sirreal Jul 31, 2024
cd6e126
Revert "PICKME: Bugfix on ?-initial invalid comment texts"
sirreal Jul 31, 2024
3315063
Revert "fixup! PICKME: Bugfix on ?-initial invalid comment texts"
sirreal Jul 31, 2024
1662852
Disable some unimplemented tests
sirreal Jul 31, 2024
3805b1c
Read the script-on flag and ignore tests
sirreal Jul 31, 2024
f673e0a
Fix up ignores
sirreal Jul 31, 2024
1c834ba
Test ignores cleanup
sirreal Jul 31, 2024
b5df8df
Lints
sirreal Jul 31, 2024
f5ca994
PICKME: Fix infinite loop in skip_script_data
sirreal Aug 1, 2024
a385685
HTML API: Allow any fragment context.
dmsnell Aug 1, 2024
bb178ab
Merge branch 'trunk' into html-api/support-initial-mode
dmsnell Aug 5, 2024
08eabad
Revert "HTML API: Allow any fragment context."
sirreal Aug 6, 2024
89ce774
Remove SKIP_HEAD_TESTS code
sirreal Aug 6, 2024
49365af
Remove skip for bug
sirreal Aug 6, 2024
1de7514
Add test case for bug
sirreal Aug 6, 2024
c08c379
Set state to incomplete on short HTML funky comments
sirreal Aug 6, 2024
6563eec
Merge branch 'html-api/fix-unclosed-comment-bug' into html-api/suppor…
sirreal Aug 6, 2024
d0ad5c5
Remove unnecessary test skips
sirreal Aug 6, 2024
0645a3d
Improve comment where html, head, body tags are appended.
sirreal Aug 6, 2024
00fe961
Merge branch 'trunk' into html-api/support-initial-mode
dmsnell Aug 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1909,6 +1909,8 @@ private function parse_next_tag(): bool {
if ( $this->is_closing_tag ) {
// No chance of finding a closer.
if ( $at + 3 > $doc_length ) {
$this->parser_state = self::STATE_INCOMPLETE_INPUT;

return false;
}

Expand Down
139 changes: 90 additions & 49 deletions tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,26 @@
* @group html-api-html5lib-tests
*/
class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
/**
* The HTML Processor only accepts HTML in document <body>.
* Do not run tests that look for anything in document <head>.
*/
const SKIP_HEAD_TESTS = true;

/**
* Skip specific tests that may not be supported or have known issues.
*/
const SKIP_TESTS = array(
'adoption01/line0046' => 'Unimplemented: Reconstruction of active formatting elements.',
'adoption01/line0159' => 'Unimplemented: Reconstruction of active formatting elements.',
'adoption01/line0318' => 'Unimplemented: Reconstruction of active formatting elements.',
'template/line0885' => 'Unimplemented: no parsing of attributes on context node.',
'tests1/line0720' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests15/line0001' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests15/line0022' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests15/line0068' => 'Unimplemented: no support outside of IN BODY yet.',
'tests2/line0650' => 'Whitespace only test never enters "in body" parsing mode.',
'tests19/line0965' => 'Unimplemented: no support outside of IN BODY yet.',
'tests23/line0001' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests23/line0041' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests23/line0069' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests23/line0101' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests26/line0263' => 'Bug: An active formatting element should be created for a trailing text node.',
'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'webkit02/line0013' => "Asserting behavior with scripting flag enabled, which this parser doesn't support.",
'webkit01/line0300' => 'Unimplemented: no support outside of IN BODY yet.',
'webkit01/line0310' => 'Unimplemented: no support outside of IN BODY yet.',
'webkit01/line0336' => 'Unimplemented: no support outside of IN BODY yet.',
'webkit01/line0349' => 'Unimplemented: no support outside of IN BODY yet.',
'webkit01/line0362' => 'Unimplemented: no support outside of IN BODY yet.',
'webkit01/line0375' => 'Unimplemented: no support outside of IN BODY yet.',
'comments01/line0155' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
'comments01/line0169' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
'html5test-com/line0129' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
'noscript01/line0014' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests1/line0692' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests19/line0965' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests5/line0013' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
'tests5/line0077' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
'tests5/line0091' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
);

/**
Expand All @@ -68,14 +55,43 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
* @param string $html Given test HTML.
* @param string $expected_tree Tree structure of parsed HTML.
*/
public function test_parse( $fragment_context, $html, $expected_tree ) {
public function test_parse( ?string $fragment_context, string $html, string $expected_tree ) {
$processed_tree = self::build_tree_representation( $fragment_context, $html );

if ( null === $processed_tree ) {
$this->markTestSkipped( 'Test includes unsupported markup.' );
}
$fragment_detail = $fragment_context ? " in context <{$fragment_context}>" : '';

$this->assertSame( $expected_tree, $processed_tree, "HTML was not processed correctly:\n{$html}" );
/*
* The HTML processor does not produce html, head, body tags if the processor does not reach them.
* These should all be produced when reaching the end-of-file.
* For now, append the missing tags when necessary.
*
* @todo remove this section when when the processor handles this.
sirreal marked this conversation as resolved.
Show resolved Hide resolved
*/
$auto_generated_html_head_body = "<html>\n <head>\n <body>\n\n";
$auto_generated_head_body = " <head>\n <body>\n\n";
$auto_generated_body = " <body>\n\n";
if ( str_ends_with( $expected_tree, $auto_generated_html_head_body ) && ! str_ends_with( $processed_tree, $auto_generated_html_head_body ) ) {
if ( str_ends_with( $processed_tree, "<html>\n <head>\n\n" ) ) {
$processed_tree = substr_replace( $processed_tree, " <body>\n\n", -1 );
} elseif ( str_ends_with( $processed_tree, "<html>\n\n" ) ) {
$processed_tree = substr_replace( $processed_tree, " <head>\n <body>\n\n", -1 );
} else {
$processed_tree = substr_replace( $processed_tree, $auto_generated_html_head_body, -1 );
}
} elseif ( str_ends_with( $expected_tree, $auto_generated_head_body ) && ! str_ends_with( $processed_tree, $auto_generated_head_body ) ) {
if ( str_ends_with( $processed_tree, "<head>\n\n" ) ) {
$processed_tree = substr_replace( $processed_tree, " <body>\n\n", -1 );
} else {
$processed_tree = substr_replace( $processed_tree, $auto_generated_head_body, -1 );
}
} elseif ( str_ends_with( $expected_tree, $auto_generated_body ) && ! str_ends_with( $processed_tree, $auto_generated_body ) ) {
$processed_tree = substr_replace( $processed_tree, $auto_generated_body, -1 );
}
sirreal marked this conversation as resolved.
Show resolved Hide resolved

$this->assertSame( $expected_tree, $processed_tree, "HTML was not processed correctly{$fragment_detail}:\n{$html}" );
}

/**
Expand All @@ -100,7 +116,9 @@ public function data_external_html5lib_tests() {
$line = str_pad( strval( $test[0] ), 4, '0', STR_PAD_LEFT );
$test_name = "{$test_suite}/line{$line}";

if ( self::should_skip_test( $test_name, $test[3] ) ) {
$test_context_element = $test[1];

if ( self::should_skip_test( $test_context_element, $test_name, $test[3] ) ) {
continue;
}

Expand All @@ -118,15 +136,9 @@ public function data_external_html5lib_tests() {
*
* @return bool True if the test case should be skipped. False otherwise.
*/
private static function should_skip_test( $test_name, $expected_tree ): bool {
if ( self::SKIP_HEAD_TESTS ) {
$html_start = "<html>\n <head>\n <body>\n";
if (
strlen( $expected_tree ) < strlen( $html_start ) ||
substr( $expected_tree, 0, strlen( $html_start ) ) !== $html_start
) {
return true;
}
private static function should_skip_test( ?string $test_context_element, string $test_name, string $expected_tree ): bool {
if ( null !== $test_context_element && 'body' !== $test_context_element ) {
return true;
}

if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) {
Expand All @@ -146,15 +158,18 @@ private static function should_skip_test( $test_name, $expected_tree ): bool {
private static function build_tree_representation( ?string $fragment_context, string $html ) {
$processor = $fragment_context
? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" )
: WP_HTML_Processor::create_fragment( $html );
: WP_HTML_Processor::create_full_parser( $html );
if ( null === $processor ) {
return null;
}

$output = "<html>\n <head>\n <body>\n";

// Initially, assume we're 2 levels deep at: html > body > [position]
$indent_level = 2;
/*
* The fragment parser will start in 2 levels deep at: html > body > [position]
* and requires adjustment to initial parameters.
* The full parser will not.
*/
$output = $fragment_context ? "<html>\n <head>\n <body>\n" : '';
$indent_level = $fragment_context ? 2 : 0;
$indent = ' ';
$was_text = null;
$text_node = '';
Expand Down Expand Up @@ -238,6 +253,11 @@ private static function build_tree_representation( ?string $fragment_context, st
$text_node .= $processor->get_modifiable_text();
break;

case '#funky-comment':
// Comments must be "<" then "!-- " then the data then " -->".
$output .= str_repeat( $indent, $indent_level ) . "<!-- {$processor->get_modifiable_text()} -->\n";
break;

case '#comment':
switch ( $processor->get_comment_type() ) {
case WP_HTML_Processor::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT:
Expand All @@ -250,6 +270,10 @@ private static function build_tree_representation( ?string $fragment_context, st
$comment_text_content = "[CDATA[{$processor->get_modifiable_text()}]]";
break;

case WP_HTML_Processor::COMMENT_AS_PI_NODE_LOOKALIKE:
$comment_text_content = "?{$processor->get_tag()}{$processor->get_modifiable_text()}?";
break;

default:
throw new Error( "Unhandled comment type for tree construction: {$processor->get_comment_type()}" );
}
Expand Down Expand Up @@ -301,6 +325,7 @@ public static function parse_html5_dat_testfile( $filename ) {
$test_html = '';
$test_dom = '';
$test_context_element = null;
$test_script_flag = false;
$test_line_number = 0;

while ( false !== ( $line = fgets( $handle ) ) ) {
Expand All @@ -309,8 +334,12 @@ public static function parse_html5_dat_testfile( $filename ) {
if ( '#' === $line[0] ) {
// Finish section.
if ( "#data\n" === $line ) {
// Yield when switching from a previous state.
if ( $state ) {
/*
* Yield when switching from a previous state.
* Do not yield tests with the scripting flag enabled. The scripting flag
* is always disabled in the HTML API.
*/
if ( $state && ! $test_script_flag ) {
yield array(
$test_line_number,
$test_context_element,
Expand All @@ -325,6 +354,10 @@ public static function parse_html5_dat_testfile( $filename ) {
$test_html = '';
$test_dom = '';
$test_context_element = null;
$test_script_flag = false;
}
if ( "#script-on\n" === $line ) {
$test_script_flag = true;
}

$state = trim( substr( $line, 1 ) );
Expand Down Expand Up @@ -376,7 +409,15 @@ public static function parse_html5_dat_testfile( $filename ) {
*/
case 'document':
if ( '|' === $line[0] ) {
$test_dom .= substr( $line, 2 );
/*
* The next_token() method these tests rely on do not stop
* at doctype nodes. Strip doctypes from output.
* @todo Restore this line if and when the processor
* exposes doctypes.
*/
if ( '| <!DOCTYPE ' !== substr( $line, 0, 12 ) ) {
$test_dom .= substr( $line, 2 );
}
} else {
// This is a text node that includes unescaped newlines.
// Everything else should be singles lines starting with "| ".
Expand Down
11 changes: 11 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -2903,4 +2903,15 @@ public function test_script_tag_processing_no_infinite_loop_final_left_angle_bra
$this->assertFalse( $processor->next_tag() );
$this->assertTrue( $processor->paused_at_incomplete_token() );
}

/**
* Test a bugfix where the input ends abruptly with a funky comment started.
*
* @ticket 61831
*/
public function test_unclosed_funky_comment_input_too_short() {
$processor = new WP_HTML_Tag_Processor( '</#' );
$this->assertFalse( $processor->next_tag() );
$this->assertTrue( $processor->paused_at_incomplete_token() );
}
}
Loading