Skip to content

Commit

Permalink
Upgrade/remove dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
kornelski authored and orium committed Dec 4, 2024
1 parent 788b3f7 commit 48adb22
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 82 deletions.
14 changes: 6 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,31 +34,29 @@ name = "bench"
[dependencies]
bitflags = "2.0.0"
cfg-if = "1.0.0"
cssparser = "0.28.1"
cssparser = "0.29"
encoding_rs = "0.8.13"
lazycell = "1.3.0"
lazy_static = "1.3.0"
memchr = "2.1.2"
selectors = "0.23.0"
thiserror = "1.0.2"
hashbrown = "0.15.0"
mime = "0.3.16"
selectors = "0.24"
thiserror = "2.0"

[dev-dependencies]
criterion = "0.5.1"
# Needed for criterion <= v0.5.1. See https://github.com/bheisler/criterion.rs/pull/703.
clap = { version = "4.5.21", features = ["help"] }
glob = "0.3.0"
html5ever = "0.26.0"
markup5ever_rcdom = "0.2.0"
html5ever = "0.29"
markup5ever_rcdom = "0.5.0-unofficial"
hashbrown = { version = "0.15.0", features = ["serde"] }
serde = "1.0.126"
serde_derive = "1.0.19"
serde_json = "1.0.65"
static_assertions = "1.1.0"
rand = "0.8.5"
rustc-test = "0.3.1"
itertools = "0.10.1"
itertools = "0.13"

[lints.rust]
keyword_idents = { level = "deny", priority = 1 }
Expand Down
48 changes: 23 additions & 25 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use criterion::{criterion_group, criterion_main};
use glob::glob;
use lazy_static::lazy_static;
use std::fmt::{self, Debug};
use std::fs::File;
use std::io::Read;
use std::sync::LazyLock;

const CHUNK_SIZE: usize = 1024;

Expand All @@ -19,30 +19,28 @@ impl Debug for Input {
}
}

lazy_static! {
static ref INPUTS: Vec<Input> = {
glob("benches/data/*.html")
.unwrap()
.map(|path| {
let mut data = String::new();
let path = path.unwrap();

File::open(&path)
.unwrap()
.read_to_string(&mut data)
.unwrap();

let data = data.into_bytes();

Input {
name: path.file_name().unwrap().to_string_lossy().to_string(),
length: data.len(),
chunks: data.chunks(CHUNK_SIZE).map(|c| c.to_owned()).collect(),
}
})
.collect()
};
}
static INPUTS: LazyLock<Vec<Input>> = LazyLock::new(|| {
glob("benches/data/*.html")
.unwrap()
.map(|path| {
let mut data = String::new();
let path = path.unwrap();

File::open(&path)
.unwrap()
.read_to_string(&mut data)
.unwrap();

let data = data.into_bytes();

Input {
name: path.file_name().unwrap().to_string_lossy().to_string(),
length: data.len(),
chunks: data.chunks(CHUNK_SIZE).map(|c| c.to_owned()).collect(),
}
})
.collect()
});

macro_rules! create_runner {
($settings:expr) => {
Expand Down
2 changes: 1 addition & 1 deletion c-api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ publish = false
encoding_rs = "0.8.13"
lol_html = { path = "../" }
libc = "0"
thiserror = "1"
thiserror = "2"

[profile.release]
panic = "abort"
Expand Down
6 changes: 6 additions & 0 deletions src/base/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ pub struct HasReplacementsError;
#[allow(unnameable_types)] // accidentally exposed via `tag.set_name()`
pub struct Bytes<'b>(Cow<'b, [u8]>);

impl Bytes<'static> {
pub const fn from_static(string: &'static str) -> Self {
Self(Cow::Borrowed(string.as_bytes()))
}
}

impl<'b> Bytes<'b> {
#[inline]
pub fn from_str(string: &'b str, encoding: &'static Encoding) -> Self {
Expand Down
22 changes: 6 additions & 16 deletions src/rewritable_units/tokens/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::html::escape_double_quotes_only;
use crate::parser::AttributeBuffer;
use crate::rewritable_units::Serialize;
use encoding_rs::Encoding;
use lazycell::LazyCell;
use std::cell::OnceCell;
use std::fmt::{self, Debug};
use std::ops::Deref;
use thiserror::Error;
Expand Down Expand Up @@ -154,7 +154,7 @@ impl Debug for Attribute<'_> {
pub(crate) struct Attributes<'i> {
input: &'i Bytes<'i>,
attribute_buffer: &'i AttributeBuffer,
items: LazyCell<Vec<Attribute<'i>>>,
items: OnceCell<Vec<Attribute<'i>>>,
encoding: &'static Encoding,
}

Expand All @@ -169,7 +169,7 @@ impl<'i> Attributes<'i> {
Attributes {
input,
attribute_buffer,
items: LazyCell::default(),
items: OnceCell::default(),
encoding,
}
}
Expand Down Expand Up @@ -226,19 +226,9 @@ impl<'i> Attributes<'i> {

#[inline]
fn as_mut_vec(&mut self) -> &mut Vec<Attribute<'i>> {
// NOTE: we can't use borrow_mut_with here as we'll need
// because `self` is a mutable reference and we'll have
// two mutable references by passing it to the initializer
// closure.
if !self.items.filled() {
self.items
.fill(self.init_items())
.expect("Cell should be empty at this point");
}
let _ = self.items.get_or_init(|| self.init_items());

self.items
.borrow_mut()
.expect("Items should be initialized")
self.items.get_mut().expect("Items should be initialized")
}

#[cfg(test)]
Expand All @@ -252,7 +242,7 @@ impl<'i> Deref for Attributes<'i> {

#[inline]
fn deref(&self) -> &[Attribute<'i>] {
self.items.borrow_with(|| self.init_items())
self.items.get_or_init(|| self.init_items())
}
}

Expand Down
20 changes: 8 additions & 12 deletions src/selectors_vm/attribute_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,19 @@ use super::compiler::AttrExprOperands;
use crate::base::Bytes;
use crate::html::Namespace;
use crate::parser::{AttributeBuffer, AttributeOutline};
use encoding_rs::UTF_8;
use lazy_static::lazy_static;
use lazycell::LazyCell;
use memchr::{memchr, memchr2};
use selectors::attr::CaseSensitivity;
use std::cell::OnceCell;

lazy_static! {
static ref ID_ATTR: Bytes<'static> = Bytes::from_str("id", UTF_8);
static ref CLASS_ATTR: Bytes<'static> = Bytes::from_str("class", UTF_8);
}
static ID_ATTR: Bytes<'static> = Bytes::from_static("id");
static CLASS_ATTR: Bytes<'static> = Bytes::from_static("class");

#[inline]
const fn is_attr_whitespace(b: u8) -> bool {
b == b' ' || b == b'\n' || b == b'\r' || b == b'\t' || b == b'\x0c'
}

type MemoizedAttrValue<'i> = LazyCell<Option<Bytes<'i>>>;
type MemoizedAttrValue<'i> = OnceCell<Option<Bytes<'i>>>;

pub(crate) struct AttributeMatcher<'i> {
input: &'i Bytes<'i>,
Expand All @@ -35,8 +31,8 @@ impl<'i> AttributeMatcher<'i> {
AttributeMatcher {
input,
attributes,
id: LazyCell::default(),
class: LazyCell::default(),
id: OnceCell::new(),
class: OnceCell::new(),
is_html_element: ns == Namespace::Html,
}
}
Expand Down Expand Up @@ -78,7 +74,7 @@ impl<'i> AttributeMatcher<'i> {
#[inline]
#[must_use]
pub fn has_id(&self, id: &Bytes<'_>) -> bool {
match self.id.borrow_with(|| self.get_value(&ID_ATTR)) {
match self.id.get_or_init(|| self.get_value(&ID_ATTR)) {
Some(actual_id) => actual_id == id,
None => false,
}
Expand All @@ -87,7 +83,7 @@ impl<'i> AttributeMatcher<'i> {
#[inline]
#[must_use]
pub fn has_class(&self, class_name: &Bytes<'_>) -> bool {
match self.class.borrow_with(|| self.get_value(&CLASS_ATTR)) {
match self.class.get_or_init(|| self.get_value(&CLASS_ATTR)) {
Some(class) => class
.split(|&b| is_attr_whitespace(b))
.any(|actual_class_name| actual_class_name == &**class_name),
Expand Down
7 changes: 0 additions & 7 deletions tests/harness/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
use lazy_static::lazy_static;
use std::sync::Mutex;

lazy_static! {
pub static ref TEST_CRITICAL_SECTION_MUTEX: Mutex<()> = Mutex::new(());
}

macro_rules! ignore {
(@info $($args:expr),+) => {
if std::env::var("IGNORES_VERBOSE").is_ok() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,35 @@ use html5ever::tokenizer::{
};
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts};
use markup5ever_rcdom::RcDom;
use std::cell::RefCell;
use std::iter::FromIterator;
use std::string::ToString;

// sends tokens to a given sink, while at the same time converting and
// recording them into the provided array
pub struct TokenSinkProxy<'a, Sink> {
pub inner: Sink,
pub tokens: &'a mut Vec<TestToken>,
pub tokens: RefCell<&'a mut Vec<TestToken>>,
}

impl<Sink> TokenSinkProxy<'_, Sink> {
fn push_text_token(&mut self, s: &str) {
if let Some(&mut TestToken::Text(ref mut last)) = self.tokens.last_mut() {
fn push_text_token(&self, s: &str) {
let tokens = &mut *self.tokens.borrow_mut();
if let Some(&mut TestToken::Text(ref mut last)) = tokens.last_mut() {
*last += s;
} else {
self.tokens.push(TestToken::Text(s.to_string()));
tokens.push(TestToken::Text(s.to_string()));
}
}
}

impl<Sink: TokenSink> TokenSink for TokenSinkProxy<'_, Sink> {
type Handle = Sink::Handle;

fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult<Self::Handle> {
fn process_token(&self, token: Token, line_number: u64) -> TokenSinkResult<Self::Handle> {
match token {
Token::DoctypeToken(ref doctype) => {
self.tokens.push(TestToken::Doctype {
self.tokens.borrow_mut().push(TestToken::Doctype {
name: doctype.name.as_ref().map(ToString::to_string),
public_id: doctype.public_id.as_ref().map(ToString::to_string),
system_id: doctype.system_id.as_ref().map(ToString::to_string),
Expand All @@ -43,7 +45,7 @@ impl<Sink: TokenSink> TokenSink for TokenSinkProxy<'_, Sink> {
Token::TagToken(ref tag) => {
let name = tag.name.to_string();

self.tokens.push(match tag.kind {
self.tokens.borrow_mut().push(match tag.kind {
TagKind::StartTag => TestToken::StartTag {
name,
attributes: HashMap::from_iter(
Expand All @@ -58,7 +60,9 @@ impl<Sink: TokenSink> TokenSink for TokenSinkProxy<'_, Sink> {
});
}
Token::CommentToken(ref s) => {
self.tokens.push(TestToken::Comment(s.to_string()));
self.tokens
.borrow_mut()
.push(TestToken::Comment(s.to_string()));
}
Token::CharacterTokens(ref s) => {
if !s.is_empty() {
Expand All @@ -73,7 +77,7 @@ impl<Sink: TokenSink> TokenSink for TokenSinkProxy<'_, Sink> {
self.inner.process_token(token, line_number)
}

fn end(&mut self) {
fn end(&self) {
self.inner.end();
}

Expand All @@ -85,20 +89,20 @@ impl<Sink: TokenSink> TokenSink for TokenSinkProxy<'_, Sink> {

pub fn get(input: &str) -> Vec<TestToken> {
let mut tokens = Vec::default();
let mut b = BufferQueue::new();
let b = BufferQueue::default();

b.push_back(StrTendril::from(input));

{
let mut t = Tokenizer::new(
let t = Tokenizer::new(
TokenSinkProxy {
inner: TreeBuilder::new(RcDom::default(), TreeBuilderOpts::default()),
tokens: &mut tokens,
tokens: RefCell::new(&mut tokens),
},
TokenizerOpts::default(),
);

while let TokenizerResult::Script(_) = t.feed(&mut b) {
while let TokenizerResult::Script(_) = t.feed(&b) {
// ignore script markers
}

Expand Down

0 comments on commit 48adb22

Please sign in to comment.