diff --git a/html5ever/src/lib.rs b/html5ever/src/lib.rs
index 4b8029d9..11e3fef0 100644
--- a/html5ever/src/lib.rs
+++ b/html5ever/src/lib.rs
@@ -29,6 +29,12 @@ mod util {
pub mod str;
}
+pub trait Sendable {
+ type SendableSelf: Send;
+ fn get_sendable(&self) -> Self::SendableSelf;
+ fn get_self_from_sendable(sendable: Self::SendableSelf) -> Self;
+}
+
pub mod serialize;
pub mod tokenizer;
pub mod tree_builder;
diff --git a/html5ever/src/tokenizer/char_ref/mod.rs b/html5ever/src/tokenizer/char_ref/mod.rs
index be7fbe3a..317ac4f6 100644
--- a/html5ever/src/tokenizer/char_ref/mod.rs
+++ b/html5ever/src/tokenizer/char_ref/mod.rs
@@ -7,10 +7,11 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-use super::{Tokenizer, TokenSink};
+use super::{Tokenizer, TokenSink, Sendable};
use buffer_queue::BufferQueue;
use data;
-use tendril::StrTendril;
+use tendril::{SendTendril, StrTendril};
+use tendril::fmt::UTF8;
use util::str::{is_ascii_alnum};
use std::char::from_u32;
@@ -20,6 +21,7 @@ pub use self::Status::*;
use self::State::*;
//§ tokenizing-character-references
+#[derive(Clone, Copy)]
pub struct CharRef {
/// The resulting character(s)
pub chars: [char; 2],
@@ -34,7 +36,7 @@ pub enum Status {
Done,
}
-#[derive(Debug)]
+#[derive(Clone, Copy, Debug)]
enum State {
Begin,
Octothorpe,
@@ -44,6 +46,22 @@ enum State {
BogusName,
}
+pub struct SendableCharRefTokenizer {
+ state: State,
+ addnl_allowed: Option,
+ result: Option,
+
+ num: u32,
+ num_too_big: bool,
+ seen_digit: bool,
+ hex_marker: Option,
+
+ name_buf_opt: Option>,
+ name_match: Option<(u32, u32)>,
+ name_len: usize,
+}
+
+#[derive(Clone)]
pub struct CharRefTokenizer {
state: State,
addnl_allowed: Option,
@@ -110,6 +128,40 @@ impl CharRefTokenizer {
}
}
+impl Sendable for CharRefTokenizer {
+ type SendableSelf = SendableCharRefTokenizer;
+
+ fn get_sendable(&self) -> Self::SendableSelf {
+ SendableCharRefTokenizer {
+ state: self.state,
+ addnl_allowed: self.addnl_allowed,
+ result: self.result,
+ num: self.num,
+ num_too_big: self.num_too_big,
+ seen_digit: self.seen_digit,
+ hex_marker: self.hex_marker,
+ name_buf_opt: self.name_buf_opt.clone().map(|s| SendTendril::from(s)),
+ name_match: self.name_match,
+ name_len: self.name_len
+ }
+ }
+
+ fn get_self_from_sendable(sendable_self: Self::SendableSelf) -> Self {
+ CharRefTokenizer {
+ state: sendable_self.state,
+ addnl_allowed: sendable_self.addnl_allowed,
+ result: sendable_self.result,
+ num: sendable_self.num,
+ num_too_big: sendable_self.num_too_big,
+ seen_digit: sendable_self.seen_digit,
+ hex_marker: sendable_self.hex_marker,
+ name_buf_opt: sendable_self.name_buf_opt.clone().map(|s| StrTendril::from(s)),
+ name_match: sendable_self.name_match,
+ name_len: sendable_self.name_len
+ }
+ }
+}
+
impl CharRefTokenizer {
pub fn step(
&mut self,
diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs
index c4301714..3082a39c 100644
--- a/html5ever/src/tokenizer/mod.rs
+++ b/html5ever/src/tokenizer/mod.rs
@@ -19,7 +19,7 @@ use self::states::{Escaped, DoubleEscaped};
use self::states::{Unquoted, SingleQuoted, DoubleQuoted};
use self::states::{DoctypeIdKind, Public, System};
-use self::char_ref::{CharRef, CharRefTokenizer};
+use self::char_ref::{CharRef, CharRefTokenizer, SendableCharRefTokenizer};
use util::str::lower_ascii_letter;
@@ -30,9 +30,12 @@ use std::borrow::Cow::{self, Borrowed};
use std::collections::BTreeMap;
use {LocalName, QualName, Attribute, SmallCharSet};
-use tendril::StrTendril;
+use tendril::{SendTendril, StrTendril};
+use tendril::fmt::UTF8;
pub use buffer_queue::{BufferQueue, SetResult, FromSet, NotFromSet};
+use super::Sendable;
+
pub mod states;
mod interface;
mod char_ref;
@@ -95,6 +98,38 @@ impl Default for TokenizerOpts {
}
}
+/// Similar to Tokenizer, except this type uses SendTendril instead of StrTendril.
+pub struct SendableTokenizer {
+ opts: TokenizerOpts,
+ sink: Sink,
+ state: states::State,
+ at_eof: bool,
+ char_ref_tokenizer: Option,
+ current_char: char,
+ reconsume: bool,
+ ignore_lf: bool,
+ discard_bom: bool,
+ current_tag_kind: TagKind,
+ current_tag_name: SendTendril,
+ current_tag_self_closing: bool,
+ current_tag_attrs: Vec<(QualName, SendTendril)>,
+ current_attr_name: SendTendril,
+ current_attr_value: SendTendril,
+ current_comment: SendTendril,
+
+ /// current doctype's fields
+ curr_doctype_name: Option>,
+ curr_doctype_public_id: Option>,
+ curr_doctype_system_id: Option>,
+ curr_doctype_force_quirks: bool,
+
+ last_start_tag_name: Option,
+ temp_buf: SendTendril,
+ state_profile: BTreeMap,
+ time_in_sink: u64,
+ current_line: u64,
+}
+
/// The HTML tokenizer.
pub struct Tokenizer {
/// Options controlling the behavior of the tokenizer.
@@ -559,6 +594,97 @@ impl Tokenizer {
self.process_token_and_continue(ParseError(error));
}
}
+
+impl Sendable for Tokenizer
+{
+ type SendableSelf = SendableTokenizer<::SendableSelf>;
+
+ /// Returns an instance containing the necessary information required to
+ /// create a Tokenizer with the exact same state. Instances of this
+ /// type can be sent between threads.
+ fn get_sendable(&self) -> Self::SendableSelf {
+ let mut sendable_current_tag_attrs = vec!();
+ let mut current_tag_attrs = self.current_tag_attrs.iter();
+ while let Some(attr) = current_tag_attrs.next() {
+ sendable_current_tag_attrs.push((attr.name.clone(), SendTendril::from(attr.value.clone())));
+ }
+
+ SendableTokenizer {
+ opts: self.opts.clone(),
+ sink: self.sink.get_sendable(),
+ state: self.state,
+ char_ref_tokenizer: self.char_ref_tokenizer.clone().map(|tok| tok.get_sendable()),
+ at_eof: self.at_eof,
+ current_char: self.current_char,
+ reconsume: self.reconsume,
+ ignore_lf: self.ignore_lf,
+ discard_bom: self.discard_bom,
+ current_tag_kind: self.current_tag_kind,
+ current_tag_name: SendTendril::from(self.current_tag_name.clone()),
+ current_tag_self_closing: self.current_tag_self_closing,
+ current_tag_attrs: sendable_current_tag_attrs,
+ current_attr_name: SendTendril::from(self.current_attr_name.clone()),
+ current_attr_value: SendTendril::from(self.current_attr_value.clone()),
+ current_comment: SendTendril::from(self.current_comment.clone()),
+
+ curr_doctype_name: self.current_doctype.name.clone().map(|s| SendTendril::from(s)),
+ curr_doctype_public_id: self.current_doctype.public_id.clone().map(|s| SendTendril::from(s)),
+ curr_doctype_system_id: self.current_doctype.system_id.clone().map(|s| SendTendril::from(s)),
+ curr_doctype_force_quirks: self.current_doctype.force_quirks,
+
+ last_start_tag_name: self.last_start_tag_name.clone(),
+ temp_buf: SendTendril::from(self.temp_buf.clone()),
+ state_profile: self.state_profile.clone(),
+ time_in_sink: self.time_in_sink,
+ current_line: self.current_line
+ }
+ }
+
+ fn get_self_from_sendable(sendable_self: Self::SendableSelf) -> Self {
+ let mut current_tag_attrs = vec!();
+ let mut sendable_current_tag_attrs = sendable_self.current_tag_attrs.iter();
+ while let Some(attr) = sendable_current_tag_attrs.next() {
+ let (name, value) = attr.clone();
+ current_tag_attrs.push(Attribute {
+ name: name,
+ value: StrTendril::from(value),
+ });
+ }
+
+ Tokenizer {
+ opts: sendable_self.opts,
+ sink: Sink::get_self_from_sendable(sendable_self.sink),
+ state: sendable_self.state,
+ char_ref_tokenizer: sendable_self.char_ref_tokenizer
+ .map(|tok| Box::new(CharRefTokenizer::get_self_from_sendable(tok))),
+ at_eof: sendable_self.at_eof,
+ current_char: sendable_self.current_char,
+ reconsume: sendable_self.reconsume,
+ ignore_lf: sendable_self.ignore_lf,
+ discard_bom: sendable_self.discard_bom,
+ current_tag_kind: sendable_self.current_tag_kind,
+ current_tag_name: StrTendril::from(sendable_self.current_tag_name),
+ current_tag_self_closing: sendable_self.current_tag_self_closing,
+ current_tag_attrs: current_tag_attrs,
+ current_attr_name: StrTendril::from(sendable_self.current_attr_name),
+ current_attr_value: StrTendril::from(sendable_self.current_attr_value),
+ current_comment: StrTendril::from(sendable_self.current_comment),
+
+ current_doctype: Doctype {
+ name: sendable_self.curr_doctype_name.map(|s| StrTendril::from(s)),
+ public_id: sendable_self.curr_doctype_public_id.map(|s| StrTendril::from(s)),
+ system_id: sendable_self.curr_doctype_system_id.map(|s| StrTendril::from(s)),
+ force_quirks: sendable_self.curr_doctype_force_quirks,
+ },
+
+ last_start_tag_name: sendable_self.last_start_tag_name,
+ temp_buf: StrTendril::from(sendable_self.temp_buf),
+ state_profile: sendable_self.state_profile,
+ time_in_sink: sendable_self.time_in_sink,
+ current_line: sendable_self.current_line
+ }
+ }
+}
//§ END
// Shorthand for common state machine behaviors.
diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs
index 445e0a36..7b5440b5 100644
--- a/html5ever/src/tree_builder/mod.rs
+++ b/html5ever/src/tree_builder/mod.rs
@@ -18,7 +18,8 @@ pub use interface::{TreeSink, Tracer, NextParserState, create_element, ElementFl
use self::types::*;
use {ExpandedName, QualName, LocalName, Namespace};
-use tendril::StrTendril;
+use tendril::{SendTendril, StrTendril};
+use tendril::fmt::UTF8;
use tokenizer;
use tokenizer::{Doctype, StartTag, Tag, EndTag, TokenSink, TokenSinkResult};
@@ -39,6 +40,8 @@ use tree_builder::types::*;
use tree_builder::tag_sets::*;
use util::str::to_escaped_string;
+use super::Sendable;
+
pub use self::PushFlag::*;
#[macro_use] mod tag_sets;
@@ -84,6 +87,27 @@ impl Default for TreeBuilderOpts {
}
}
+/// Similar to TreeBuilder, except this type uses SendTendril instead of StrTendril.
+pub struct SendableTreeBuilder {
+ opts: TreeBuilderOpts,
+ sink: Sink,
+ mode: InsertionMode,
+ orig_mode: Option,
+ template_modes: Vec,
+ pending_table_text: Vec<(SplitStatus, SendTendril)>,
+ quirks_mode: QuirksMode,
+ doc_handle: Handle,
+ open_elems: Vec,
+ active_formatting: Vec>,
+ head_elem: Option,
+ form_elem: Option,
+ frameset_ok: bool,
+ ignore_lf: bool,
+ foster_parenting: bool,
+ context_elem: Option,
+ current_line: u64,
+}
+
/// The HTML tree builder.
pub struct TreeBuilder {
/// Options controlling the behavior of the tree builder.
@@ -408,6 +432,132 @@ impl TreeBuilder
}
}
+impl Sendable for TreeBuilder
+ where Handle: Clone + Send,
+ Sink: TreeSink + Sendable,
+{
+ type SendableSelf = SendableTreeBuilder::SendableSelf>;
+
+ /// Returns an instance containing the necessary information required to
+ /// create a TreeBuilder with the exact same state. Instances of this
+ /// type can be sent between threads.
+ fn get_sendable(&self) -> Self::SendableSelf {
+ let mut sendable_pending_table_text = vec!();
+ let mut pending_table_text = self.pending_table_text.iter();
+ while let Some(elem) = pending_table_text.next() {
+ let (split_status, ref str) = *elem;
+ sendable_pending_table_text.push((split_status, SendTendril::from(str.clone())));
+ }
+
+ let mut sendable_active_formatting = vec!();
+ let mut active_formatting = self.active_formatting.iter();
+ while let Some(elem) = active_formatting.next() {
+ let sendable_elem = match *elem {
+ FormatEntry::Element(ref handle, ref tag) => {
+ let mut sendable_attrs = vec!();
+ let mut attrs = tag.attrs.iter();
+ while let Some(ref attr) = attrs.next() {
+ sendable_attrs.push((attr.name.clone(), SendTendril::from(attr.value.clone())));
+ }
+ SendableFormatEntry::Element {
+ handle: handle.clone(),
+ tag_kind: tag.kind,
+ tag_name: tag.name.clone(),
+ tag_self_closing: tag.self_closing,
+ tag_attrs: sendable_attrs
+ }
+ },
+ FormatEntry::Marker => SendableFormatEntry::Marker,
+ };
+ sendable_active_formatting.push(sendable_elem);
+ }
+
+ SendableTreeBuilder {
+ opts: self.opts,
+ sink: self.sink.get_sendable(),
+ mode: self.mode,
+ orig_mode: self.orig_mode,
+ template_modes: self.template_modes.clone(),
+ pending_table_text: sendable_pending_table_text,
+ quirks_mode: self.quirks_mode,
+ doc_handle: self.doc_handle.clone(),
+ open_elems: self.open_elems.clone(),
+ active_formatting: sendable_active_formatting,
+ head_elem: self.head_elem.clone(),
+ form_elem: self.form_elem.clone(),
+ frameset_ok: self.frameset_ok,
+ ignore_lf: self.ignore_lf,
+ foster_parenting: self.foster_parenting,
+ context_elem: self.context_elem.clone(),
+ current_line: self.current_line
+ }
+ }
+
+ fn get_self_from_sendable(sendable_self: Self::SendableSelf) -> Self {
+ let mut pending_table_text = vec!();
+ let mut sendable_pending_table_text = sendable_self.pending_table_text.iter();
+ while let Some(elem) = sendable_pending_table_text.next() {
+ let (split_status, str_tendril) = elem.clone();
+ pending_table_text.push((split_status, StrTendril::from(str_tendril)));
+ }
+
+ let mut active_formatting = vec!();
+ let mut sendable_active_formatting = sendable_self.active_formatting.iter();
+ while let Some(sendable_elem) = sendable_active_formatting.next() {
+ let elem = match sendable_elem.clone() {
+ SendableFormatEntry::Element {
+ handle,
+ tag_kind,
+ tag_name,
+ tag_self_closing,
+ tag_attrs,
+ } => {
+ let mut attrs = vec!();
+ let mut tag_attrs = tag_attrs.iter();
+ while let Some(attr) = tag_attrs.next() {
+ let (name, value) = attr.clone();
+ attrs.push(Attribute {
+ name: name,
+ value: StrTendril::from(value),
+ });
+ }
+ FormatEntry::Element(
+ handle,
+ Tag {
+ kind: tag_kind,
+ name: tag_name,
+ self_closing: tag_self_closing,
+ attrs: attrs,
+ }
+ )
+ },
+ SendableFormatEntry::Marker => FormatEntry::Marker,
+ };
+ active_formatting.push(elem);
+ }
+
+ TreeBuilder {
+ opts: sendable_self.opts,
+ sink: Sink::get_self_from_sendable(sendable_self.sink),
+ mode: sendable_self.mode,
+ orig_mode: sendable_self.orig_mode,
+ template_modes: sendable_self.template_modes,
+ pending_table_text: pending_table_text,
+ quirks_mode: sendable_self.quirks_mode,
+ doc_handle: sendable_self.doc_handle,
+ open_elems: sendable_self.open_elems,
+ active_formatting: active_formatting,
+ head_elem: sendable_self.head_elem,
+ form_elem: sendable_self.form_elem,
+ frameset_ok: sendable_self.frameset_ok,
+ ignore_lf: sendable_self.ignore_lf,
+ foster_parenting: sendable_self.foster_parenting,
+ context_elem: sendable_self.context_elem,
+ current_line: sendable_self.current_line
+ }
+ }
+}
+
impl TokenSink
for TreeBuilder
where Handle: Clone,
diff --git a/html5ever/src/tree_builder/types.rs b/html5ever/src/tree_builder/types.rs
index 1192472c..0bc51da1 100644
--- a/html5ever/src/tree_builder/types.rs
+++ b/html5ever/src/tree_builder/types.rs
@@ -9,10 +9,13 @@
//! Types used within the tree builder code. Not exported to users.
+use {LocalName, QualName};
use tokenizer::Tag;
use tokenizer::states::RawKind;
+use tokenizer::TagKind;
-use tendril::StrTendril;
+use tendril::{SendTendril, StrTendril};
+use tendril::fmt::UTF8;
pub use self::InsertionMode::*;
pub use self::SplitStatus::*;
@@ -77,6 +80,18 @@ pub enum ProcessResult {
ToRawData(RawKind),
}
+#[derive(Clone)]
+pub enum SendableFormatEntry {
+ Element {
+ handle: Handle,
+ tag_kind: TagKind,
+ tag_name: LocalName,
+ tag_self_closing: bool,
+ tag_attrs: Vec<(QualName, SendTendril)>,
+ },
+ Marker
+}
+
pub enum FormatEntry {
Element(Handle, Tag),
Marker,
diff --git a/markup5ever/util/buffer_queue.rs b/markup5ever/util/buffer_queue.rs
index ab32961a..cc99a9ea 100644
--- a/markup5ever/util/buffer_queue.rs
+++ b/markup5ever/util/buffer_queue.rs
@@ -19,6 +19,7 @@
//! [`BufferQueue`]: struct.BufferQueue.html
+use std::cmp::Ordering;
use std::collections::VecDeque;
use tendril::StrTendril;
@@ -47,6 +48,8 @@ pub enum SetResult {
pub struct BufferQueue {
/// Buffers to process.
buffers: VecDeque,
+ /// Used during speculative parsing.
+ recorded_len: Option,
}
impl BufferQueue {
@@ -55,6 +58,31 @@ impl BufferQueue {
pub fn new() -> BufferQueue {
BufferQueue {
buffers: VecDeque::with_capacity(16),
+ recorded_len: None,
+ }
+ }
+
+ pub fn notify_speculative_parsing_has_started(&mut self) {
+ self.recorded_len = Some(self.buffers.len());
+ }
+
+ /// During speculative parsing, the async tokenizer might mutate network input's contents.
+ /// Also, some chunks might be pushed onto network input. This method is used to
+ /// update network input accordingly.
+ pub fn update_with_new_data(&mut self, new_data: Option>) {
+ let recorded_len = self.recorded_len.expect("This should contain some value!");
+ self.recorded_len.take();
+ let mut new_data = match new_data {
+ None => return,
+ Some(data) => data,
+ };
+
+ let len = self.buffers.len();
+ assert_ne!(len.cmp(&recorded_len), Ordering::Less);
+ self.buffers = self.buffers.split_off(recorded_len);
+ new_data.append(&mut self.buffers);
+ while let Some(chunk) = new_data.pop_front() {
+ self.buffers.push_back(chunk);
}
}