Skip to content

Don't clone input buffer inside html5ever benchmark loop #634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 20 additions & 14 deletions html5ever/benches/html5ever.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ extern crate html5ever;
use std::fs;
use std::path::PathBuf;

use criterion::Criterion;
use criterion::{BatchSize, Criterion};

use html5ever::tendril::*;
use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};
use html5ever::{tendril::*, TokenizerResult};

struct Sink;

Expand Down Expand Up @@ -51,19 +51,25 @@ fn run_bench(c: &mut Criterion, name: &str) {

let test_name = format!("html tokenizing {name}");

// Construct a buffer queue to feed to the tokenizer
let buffer_queue = BufferQueue::default();
for buf in input.into_iter() {
buffer_queue.push_back(buf);
}

c.bench_function(&test_name, move |b| {
b.iter(|| {
let tok = Tokenizer::new(Sink, Default::default());
let buffer = BufferQueue::default();
// We are doing clone inside the bench function, this is not ideal, but possibly
// necessary since our iterator consumes the underlying buffer.
for buf in input.clone().into_iter() {
buffer.push_back(buf);
let _ = tok.feed(&buffer);
}
let _ = tok.feed(&buffer);
tok.end();
})
b.iter_batched(
|| buffer_queue.clone(),
|buffer_queue| {
let tok = Tokenizer::new(Sink, Default::default());

// Tokenize the entire input, ignoring any <script> elements we find along the way
while tok.feed(&buffer_queue) != TokenizerResult::Done {}

tok.end();
},
BatchSize::SmallInput,
)
});
}

Expand Down
2 changes: 1 addition & 1 deletion markup5ever/interface/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ impl fmt::Debug for ExpandedName<'_> {
}

#[must_use]
#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub enum TokenizerResult<Handle> {
Done,
Script(Handle),
Expand Down
2 changes: 1 addition & 1 deletion markup5ever/util/buffer_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub enum SetResult {
/// Internally it uses [`VecDeque`] and has the same complexity properties.
///
/// [`VecDeque`]: https://doc.rust-lang.org/std/collections/struct.VecDeque.html
#[derive(Debug)]
#[derive(Clone, Debug)]
pub struct BufferQueue {
/// Buffers to process.
buffers: RefCell<VecDeque<StrTendril>>,
Expand Down