Skip to content

Commit 259fa03

Browse files
committed
HTML API: Track spans of text with (offset, length) instead of (start, end)
This patch follows-up with earlier design questions around how to represent spans of strings inside the class. It's relevant now as preparation for #5683. The mixture of (offset, length) and (start, end) coordinates becomes confusing at times and all final string operations are performed with the (offset, length) pair, since these feed into `strlen()`. In preparation for exposing all tokens within an HTML document this change: - Unifies the representation throughout the class. - It creates `token_starts_at` to track the start of the current token. - It replaces `tag_ends_at` with `token_length` for re-use with other token types. There should be no functional or behavioral changes in this patch. For the internal helper classes this patch introduces breaking changes, but those classes are marked private and should not be used outside of the HTML API itself.
1 parent 4d19f6c commit 259fa03

File tree

4 files changed

+112
-68
lines changed

4 files changed

+112
-68
lines changed

src/wp-includes/html-api/class-wp-html-attribute-token.php

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*
1616
* @access private
1717
* @since 6.2.0
18+
* @since {WP_VERSION} Replaced `end` with `length` to more closely match `substr()`.
1819
*
1920
* @see WP_HTML_Tag_Processor
2021
*/
@@ -23,6 +24,7 @@ class WP_HTML_Attribute_Token {
2324
* Attribute name.
2425
*
2526
* @since 6.2.0
27+
*
2628
* @var string
2729
*/
2830
public $name;
@@ -31,6 +33,7 @@ class WP_HTML_Attribute_Token {
3133
* Attribute value.
3234
*
3335
* @since 6.2.0
36+
*
3437
* @var int
3538
*/
3639
public $value_starts_at;
@@ -39,6 +42,7 @@ class WP_HTML_Attribute_Token {
3942
* How many bytes the value occupies in the input HTML.
4043
*
4144
* @since 6.2.0
45+
*
4246
* @var int
4347
*/
4448
public $value_length;
@@ -47,22 +51,36 @@ class WP_HTML_Attribute_Token {
4751
* The string offset where the attribute name starts.
4852
*
4953
* @since 6.2.0
54+
*
5055
* @var int
5156
*/
5257
public $start;
5358

5459
/**
55-
* The string offset after the attribute value or its name.
60+
* Byte length of the entire attribute name or name and value pair expression.
61+
*
62+
* Example:
63+
*
64+
* <div class="post">
65+
* ------------ length is 12, including quotes
66+
*
67+
* <input type="checked" checked id="selector">
68+
* ------- length is 6
69+
*
70+
* <a rel=noopener>
71+
* ------------ length is 11
72+
*
73+
* @since {WP_VERSION}
5674
*
57-
* @since 6.2.0
5875
* @var int
5976
*/
60-
public $end;
77+
public $length;
6178

6279
/**
6380
* Whether the attribute is a boolean attribute with value `true`.
6481
*
6582
* @since 6.2.0
83+
*
6684
* @var bool
6785
*/
6886
public $is_true;
@@ -76,15 +94,15 @@ class WP_HTML_Attribute_Token {
7694
* @param int $value_start Attribute value.
7795
* @param int $value_length Number of bytes attribute value spans.
7896
* @param int $start The string offset where the attribute name starts.
79-
* @param int $end The string offset after the attribute value or its name.
97+
* @param int $length Byte length of the entire attribute name or name and value pair expression.
8098
* @param bool $is_true Whether the attribute is a boolean attribute with true value.
8199
*/
82-
public function __construct( $name, $value_start, $value_length, $start, $end, $is_true ) {
100+
public function __construct( $name, $value_start, $value_length, $start, $length, $is_true ) {
83101
$this->name = $name;
84102
$this->value_starts_at = $value_start;
85103
$this->value_length = $value_length;
86104
$this->start = $start;
87-
$this->end = $end;
105+
$this->length = $length;
88106
$this->is_true = $is_true;
89107
}
90108
}

src/wp-includes/html-api/class-wp-html-span.php

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*
1919
* @access private
2020
* @since 6.2.0
21+
* @since {WP_VERSION} Replaced `end` with `length` to more closely align with `substr()`.
2122
*
2223
* @see WP_HTML_Tag_Processor
2324
*/
@@ -31,23 +32,23 @@ class WP_HTML_Span {
3132
public $start;
3233

3334
/**
34-
* Byte offset into document where span ends.
35+
* Byte length of span.
3536
*
36-
* @since 6.2.0
37+
* @since {WP_VERSION}
3738
* @var int
3839
*/
39-
public $end;
40+
public $length;
4041

4142
/**
4243
* Constructor.
4344
*
4445
* @since 6.2.0
4546
*
46-
* @param int $start Byte offset into document where replacement span begins.
47-
* @param int $end Byte offset into document where replacement span ends.
47+
* @param int $start Byte offset into document where replacement span begins.
48+
* @param int $length Byte length of span.
4849
*/
49-
public function __construct( $start, $end ) {
50-
$this->start = $start;
51-
$this->end = $end;
50+
public function __construct( $start, $length ) {
51+
$this->start = $start;
52+
$this->length = $length;
5253
}
5354
}

0 commit comments

Comments
 (0)