Skip to content

Commit

Permalink
Backport commits from 62270
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronjorbin committed Jan 31, 2025
1 parent 1c02e68 commit 5c0e7d5
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 6 deletions.
33 changes: 27 additions & 6 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,22 @@ public function next_tag( $query = null ): bool {
return false;
}

/**
* Finds the next token in the HTML document.
*
* This doesn't currently have a way to represent non-tags and doesn't process
* semantic rules for text nodes. For access to the raw tokens consider using
* WP_HTML_Tag_Processor instead.
*
* @since 6.5.0 Added for internal support; do not use.
* @since 6.7.2 Refactored so subclasses may extend.
*
* @return bool Whether a token was parsed.
*/
public function next_token(): bool {
return $this->next_visitable_token();
}

/**
* Ensures internal accounting is maintained for HTML semantic rules while
* the underlying Tag Processor class is seeking to a bookmark.
Expand All @@ -615,13 +631,18 @@ public function next_tag( $query = null ): bool {
* semantic rules for text nodes. For access to the raw tokens consider using
* WP_HTML_Tag_Processor instead.
*
* @since 6.5.0 Added for internal support; do not use.
* Note that this method may call itself recursively. This is why it is not
* implemented as {@see WP_HTML_Processor::next_token()}, which instead calls
* this method similarly to how {@see WP_HTML_Tag_Processor::next_token()}
* calls the {@see WP_HTML_Tag_Processor::base_class_next_token()} method.
*
* @since 6.7.2 Added for internal support.
*
* @access private
*
* @return bool
*/
public function next_token(): bool {
private function next_visitable_token(): bool {
$this->current_element = null;

if ( isset( $this->last_error ) ) {
Expand All @@ -639,7 +660,7 @@ public function next_token(): bool {
* tokens works in the meantime and isn't obviously wrong.
*/
if ( empty( $this->element_queue ) && $this->step() ) {
return $this->next_token();
return $this->next_visitable_token();
}

// Process the next event on the queue.
Expand All @@ -650,7 +671,7 @@ public function next_token(): bool {
continue;
}

return empty( $this->element_queue ) ? false : $this->next_token();
return empty( $this->element_queue ) ? false : $this->next_visitable_token();
}

$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;
Expand All @@ -661,7 +682,7 @@ public function next_token(): bool {
* the breadcrumbs.
*/
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
return $this->next_token();
return $this->next_visitable_token();
}

// Adjust the breadcrumbs for this event.
Expand All @@ -673,7 +694,7 @@ public function next_token(): bool {

// Avoid sending close events for elements which don't expect a closing.
if ( $is_pop && ! $this->expects_closer( $this->current_element->token ) ) {
return $this->next_token();
return $this->next_visitable_token();
}

return true;
Expand Down
35 changes: 35 additions & 0 deletions tests/phpunit/data/html-api/token-counting-html-processor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php

class Token_Counting_HTML_Processor extends WP_HTML_Processor {

/**
* List of tokens that have already been seen.
*
* @var array<string, int>
*/
public $token_seen_count = array();

/**
* Gets next token.
*
* @return bool Whether next token was matched.
*/
public function next_token(): bool {
$result = parent::next_token();

if ( $this->get_token_type() === '#tag' ) {
$token_name = ( $this->is_tag_closer() ? '-' : '+' ) . $this->get_tag();
} else {
$token_name = $this->get_token_name();
}

if ( ! isset( $this->token_seen_count[ $token_name ] ) ) {
$this->token_seen_count[ $token_name ] = 1;
} else {
++$this->token_seen_count[ $token_name ];
}

return $result;
}

}
135 changes: 135 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,141 @@ public function test_ensure_form_tag_closer_token_is_reachable() {
$this->assertTrue( $processor->is_tag_closer() );
}

/**
* Data provider.
*
* @return array
*/
public function data_html_processor_with_extended_next_token() {
return array(
'single_instance_per_tag' => array(
'html' => '
<html>
<head>
<meta charset="utf-8">
<title>Hello World</title>
</head>
<body>
<h1>Hello World!</h1>
<img src="example.png">
<p>Each tag should occur only once in this document.<!--Closing P tag omitted intentionally.-->
<footer>The end.</footer>
</body>
</html>
',
'expected_token_counts' => array(
'+HTML' => 1,
'+HEAD' => 1,
'#text' => 14,
'+META' => 1,
'+TITLE' => 1,
'-HEAD' => 1,
'+BODY' => 1,
'+H1' => 1,
'-H1' => 1,
'+IMG' => 1,
'+P' => 1,
'#comment' => 1,
'-P' => 1,
'+FOOTER' => 1,
'-FOOTER' => 1,
'-BODY' => 1,
'-HTML' => 1,
'' => 1,
),
),

'multiple_tag_instances' => array(
'html' => '
<html>
<body>
<h1>Hello World!</h1>
<p>First
<p>Second
<p>Third
<ul>
<li>1
<li>2
<li>3
</ul>
</body>
</html>
',
'expected_token_counts' => array(
'+HTML' => 1,
'+HEAD' => 1,
'-HEAD' => 1,
'+BODY' => 1,
'#text' => 13,
'+H1' => 1,
'-H1' => 1,
'+P' => 3,
'-P' => 3,
'+UL' => 1,
'+LI' => 3,
'-LI' => 3,
'-UL' => 1,
'-BODY' => 1,
'-HTML' => 1,
'' => 1,
),
),

'extreme_nested_formatting' => array(
'html' => '
<html>
<body>
<p>
<strong><em><strike><i><b><u>FORMAT</u></b></i></strike></em></strong>
</p>
</body>
</html>
',
'expected_token_counts' => array(
'+HTML' => 1,
'+HEAD' => 1,
'-HEAD' => 1,
'+BODY' => 1,
'#text' => 7,
'+P' => 1,
'+STRONG' => 1,
'+EM' => 1,
'+STRIKE' => 1,
'+I' => 1,
'+B' => 1,
'+U' => 1,
'-U' => 1,
'-B' => 1,
'-I' => 1,
'-STRIKE' => 1,
'-EM' => 1,
'-STRONG' => 1,
'-P' => 1,
'-BODY' => 1,
'-HTML' => 1,
'' => 1,
),
),
);
}

/**
* Ensures that subclasses to WP_HTML_Processor can do bookkeeping by extending the next_token() method.
*
* @ticket 62269
* @dataProvider data_html_processor_with_extended_next_token
*/
public function test_ensure_next_token_method_extensibility( $html, $expected_token_counts ) {
require_once DIR_TESTDATA . '/html-api/token-counting-html-processor.php';

$processor = Token_Counting_HTML_Processor::create_full_parser( $html );
while ( $processor->next_tag() ) {
continue;
}

$this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) );
}

/**
* Ensure that lowercased tag_name query matches tags case-insensitively.
*
Expand Down

0 comments on commit 5c0e7d5

Please sign in to comment.