From bfca145e271f4dc109517b54bab8c4f21466cbe3 Mon Sep 17 00:00:00 2001 From: mandreyel Date: Mon, 17 Dec 2018 23:31:57 +0100 Subject: [PATCH 1/2] Add Servo's rustfmt config file --- rustfmt.toml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 rustfmt.toml diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 00000000..de839bae --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,3 @@ +match_block_trailing_comma = true +binop_separator = "Back" +reorder_imports = true From 12b4ce0fc54a5a7dbc47c2f099bf0c8dba7570fc Mon Sep 17 00:00:00 2001 From: mandreyel Date: Mon, 17 Dec 2018 23:45:03 +0100 Subject: [PATCH 2/2] Run rustfmt on everything --- html5ever/benches/html5ever.rs | 39 +- html5ever/build.rs | 15 +- html5ever/examples/arena.rs | 98 +- html5ever/examples/html2html.rs | 14 +- html5ever/examples/noop-tokenize.rs | 4 +- html5ever/examples/noop-tree-builder.rs | 45 +- html5ever/examples/print-rcdom.rs | 37 +- html5ever/examples/print-tree-actions.rs | 62 +- html5ever/examples/tokenize.rs | 45 +- html5ever/macros/match_token.rs | 79 +- html5ever/src/driver.rs | 63 +- html5ever/src/lib.rs | 18 +- html5ever/src/macros.rs | 6 +- html5ever/src/serialize/mod.rs | 67 +- html5ever/src/tokenizer/char_ref/mod.rs | 175 ++-- html5ever/src/tokenizer/interface.rs | 10 +- html5ever/src/tokenizer/mod.rs | 978 +++++++++++-------- html5ever/src/tokenizer/states.rs | 4 +- html5ever/src/tree_builder/data.rs | 40 +- html5ever/src/tree_builder/mod.rs | 601 +++++++----- html5ever/src/tree_builder/rules.rs | 9 +- html5ever/src/tree_builder/tag_sets.rs | 37 +- html5ever/src/tree_builder/types.rs | 13 +- html5ever/src/util/str.rs | 12 +- html5ever/tests/foreach_html5lib_test/mod.rs | 17 +- html5ever/tests/serializer.rs | 141 ++- html5ever/tests/tokenizer.rs | 210 ++-- html5ever/tests/tree_builder.rs | 141 +-- markup5ever/build.rs | 64 +- markup5ever/data/mod.rs | 40 +- markup5ever/interface/mod.rs | 33 +- markup5ever/interface/tree_builder.rs | 60 +- markup5ever/lib.rs | 12 +- markup5ever/rcdom.rs | 191 ++-- markup5ever/serialize.rs | 10 +- markup5ever/util/buffer_queue.rs | 19 +- markup5ever/util/smallcharset.rs | 5 +- xml5ever/benches/xml5ever.rs | 39 +- xml5ever/examples/hello_xml.rs | 11 +- xml5ever/examples/simple_xml_tokenizer.rs | 24 +- xml5ever/examples/xml_tokenizer.rs | 49 +- xml5ever/examples/xml_tree_printer.rs | 25 +- xml5ever/src/driver.rs | 86 +- xml5ever/src/lib.rs | 23 +- xml5ever/src/serialize/mod.rs | 23 +- xml5ever/src/tokenizer/char_ref/mod.rs | 173 ++-- xml5ever/src/tokenizer/interface.rs | 10 +- xml5ever/src/tokenizer/mod.rs | 689 ++++++------- xml5ever/src/tokenizer/qname.rs | 8 +- xml5ever/src/tokenizer/states.rs | 3 +- xml5ever/src/tree_builder/mod.rs | 254 +++-- xml5ever/src/tree_builder/types.rs | 4 +- xml5ever/src/util/mod.rs | 4 +- xml5ever/tests/tokenizer.rs | 175 ++-- xml5ever/tests/tree_builder.rs | 125 +-- xml5ever/tests/util/find_tests.rs | 11 +- 56 files changed, 2989 insertions(+), 2161 deletions(-) diff --git a/html5ever/benches/html5ever.rs b/html5ever/benches/html5ever.rs index bf6ab461..19e10b3a 100644 --- a/html5ever/benches/html5ever.rs +++ b/html5ever/benches/html5ever.rs @@ -5,10 +5,12 @@ extern crate html5ever; use std::fs; use std::path::PathBuf; -use criterion::{Criterion, black_box}; +use criterion::{black_box, Criterion}; -use html5ever::tokenizer::{BufferQueue, TokenSink, Token, Tokenizer, TokenizerOpts, TokenSinkResult}; use html5ever::tendril::*; +use html5ever::tokenizer::{ + BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts, +}; struct Sink; @@ -23,7 +25,6 @@ impl TokenSink for Sink { } } - fn run_bench(c: &mut Criterion, name: &str) { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); path.push("data/bench/"); @@ -32,7 +33,9 @@ fn run_bench(c: &mut Criterion, name: &str) { // Read the file and treat it as an infinitely repeating sequence of characters. let mut file_input = ByteTendril::new(); - file.read_to_tendril(&mut file_input).ok().expect("can't read file"); + file.read_to_tendril(&mut file_input) + .ok() + .expect("can't read file"); let file_input: StrTendril = file_input.try_reinterpret().unwrap(); let size = file_input.len(); let mut stream = file_input.chars().cycle(); @@ -51,22 +54,22 @@ fn run_bench(c: &mut Criterion, name: &str) { let test_name = format!("html tokenizing {}", name); - c.bench_function(&test_name, move |b| b.iter(|| { - let mut tok = Tokenizer::new(Sink, Default::default()); - let mut buffer = BufferQueue::new(); - // We are doing clone inside the bench function, this is not ideal, but possibly - // necessary since our iterator consumes the underlying buffer. - for buf in input.clone().into_iter() { - buffer.push_back(buf); + c.bench_function(&test_name, move |b| { + b.iter(|| { + let mut tok = Tokenizer::new(Sink, Default::default()); + let mut buffer = BufferQueue::new(); + // We are doing clone inside the bench function, this is not ideal, but possibly + // necessary since our iterator consumes the underlying buffer. + for buf in input.clone().into_iter() { + buffer.push_back(buf); + let _ = tok.feed(&mut buffer); + } let _ = tok.feed(&mut buffer); - } - let _ = tok.feed(&mut buffer); - tok.end(); - })); + tok.end(); + }) + }); } - - fn html5ever_benchmark(c: &mut Criterion) { run_bench(c, "lipsum.html"); run_bench(c, "lipsum-zh.html"); @@ -77,4 +80,4 @@ fn html5ever_benchmark(c: &mut Criterion) { } criterion_group!(benches, html5ever_benchmark); -criterion_main!(benches); \ No newline at end of file +criterion_main!(benches); diff --git a/html5ever/build.rs b/html5ever/build.rs index 2f5209e0..17fd6c59 100644 --- a/html5ever/build.rs +++ b/html5ever/build.rs @@ -7,8 +7,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[macro_use] extern crate quote; -#[macro_use] extern crate syn; +#[macro_use] +extern crate quote; +#[macro_use] +extern crate syn; extern crate proc_macro2; use std::env; @@ -26,9 +28,12 @@ fn main() { println!("cargo:rerun-if-changed={}", input.display()); // We have stack overflows on Servo's CI. - let handle = Builder::new().stack_size(128 * 1024 * 1024).spawn(move || { - match_token::expand(&input, &output); - }).unwrap(); + let handle = Builder::new() + .stack_size(128 * 1024 * 1024) + .spawn(move || { + match_token::expand(&input, &output); + }) + .unwrap(); handle.join().unwrap(); } diff --git a/html5ever/examples/arena.rs b/html5ever/examples/arena.rs index d786cbc0..ae15d1cc 100644 --- a/html5ever/examples/arena.rs +++ b/html5ever/examples/arena.rs @@ -10,9 +10,9 @@ extern crate html5ever; extern crate typed_arena; -use html5ever::{parse_document, QualName, Attribute, ExpandedName}; -use html5ever::tendril::{TendrilSink, StrTendril}; -use html5ever::interface::tree_builder::{TreeSink, QuirksMode, NodeOrText, ElementFlags}; +use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +use html5ever::tendril::{StrTendril, TendrilSink}; +use html5ever::{parse_document, Attribute, ExpandedName, QualName}; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::HashSet; @@ -32,7 +32,9 @@ fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<' document: arena.alloc(Node::new(NodeData::Document)), quirks_mode: QuirksMode::NoQuirks, }; - parse_document(sink, Default::default()).from_utf8().one(bytes) + parse_document(sink, Default::default()) + .from_utf8() + .one(bytes) } type Arena<'arena> = &'arena typed_arena::Arena>; @@ -131,7 +133,10 @@ impl<'arena> Node<'arena> { new_sibling.next_sibling.set(Some(self)); if let Some(previous_sibling) = self.previous_sibling.take() { new_sibling.previous_sibling.set(Some(previous_sibling)); - debug_assert!(ptr::eq::(previous_sibling.next_sibling.get().unwrap(), self)); + debug_assert!(ptr::eq::( + previous_sibling.next_sibling.get().unwrap(), + self + )); previous_sibling.next_sibling.set(Some(new_sibling)); } else if let Some(parent) = self.parent.get() { debug_assert!(ptr::eq::(parent.first_child.get().unwrap(), self)); @@ -147,19 +152,26 @@ impl<'arena> Sink<'arena> { } fn append_common(&self, child: NodeOrText>, previous: P, append: A) - where P: FnOnce() -> Option>, - A: FnOnce(Ref<'arena>), + where + P: FnOnce() -> Option>, + A: FnOnce(Ref<'arena>), { let new_node = match child { NodeOrText::AppendText(text) => { // Append to an existing Text node if we have one. - if let Some(&Node { data: NodeData::Text { ref contents }, .. }) = previous() { + if let Some(&Node { + data: NodeData::Text { ref contents }, + .. + }) = previous() + { contents.borrow_mut().push_tendril(&text); - return + return; } - self.new_node(NodeData::Text { contents: RefCell::new(text) }) - } - NodeOrText::AppendNode(node) => node + self.new_node(NodeData::Text { + contents: RefCell::new(text), + }) + }, + NodeOrText::AppendNode(node) => node, }; append(new_node) @@ -196,7 +208,11 @@ impl<'arena> TreeSink for Sink<'arena> { } fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { - if let NodeData::Element { template_contents: Some(ref contents), .. } = target.data { + if let NodeData::Element { + template_contents: Some(ref contents), + .. + } = target.data + { contents } else { panic!("not a template element!") @@ -204,14 +220,23 @@ impl<'arena> TreeSink for Sink<'arena> { } fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { - if let NodeData::Element { mathml_annotation_xml_integration_point, .. } = target.data { + if let NodeData::Element { + mathml_annotation_xml_integration_point, + .. + } = target.data + { mathml_annotation_xml_integration_point } else { panic!("not an element!") } } - fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) -> Ref<'arena> { + fn create_element( + &mut self, + name: QualName, + attrs: Vec, + flags: ElementFlags, + ) -> Ref<'arena> { self.new_node(NodeData::Element { name: name, attrs: RefCell::new(attrs), @@ -221,7 +246,6 @@ impl<'arena> TreeSink for Sink<'arena> { None }, mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, - }) } @@ -230,14 +254,17 @@ impl<'arena> TreeSink for Sink<'arena> { } fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { - self.new_node(NodeData::ProcessingInstruction { target: target, contents: data }) + self.new_node(NodeData::ProcessingInstruction { + target: target, + contents: data, + }) } fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText>) { self.append_common( child, || parent.last_child.get(), - |new_node| parent.append(new_node) + |new_node| parent.append(new_node), ) } @@ -245,12 +272,16 @@ impl<'arena> TreeSink for Sink<'arena> { self.append_common( child, || sibling.previous_sibling.get(), - |new_node| sibling.insert_before(new_node) + |new_node| sibling.insert_before(new_node), ) } - fn append_based_on_parent_node(&mut self, element: &Ref<'arena>, - prev_element: &Ref<'arena>, child: NodeOrText>) { + fn append_based_on_parent_node( + &mut self, + element: &Ref<'arena>, + prev_element: &Ref<'arena>, + child: NodeOrText>, + ) { if element.parent.get().is_some() { self.append_before_sibling(element, child) } else { @@ -258,14 +289,16 @@ impl<'arena> TreeSink for Sink<'arena> { } } - fn append_doctype_to_document(&mut self, - name: StrTendril, - public_id: StrTendril, - system_id: StrTendril) { + fn append_doctype_to_document( + &mut self, + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + ) { self.document.append(self.new_node(NodeData::Doctype { name: name, public_id: public_id, - system_id: system_id + system_id: system_id, })) } @@ -276,10 +309,15 @@ impl<'arena> TreeSink for Sink<'arena> { panic!("not an element") }; - let existing_names = existing.iter().map(|e| e.name.clone()).collect::>(); - existing.extend(attrs.into_iter().filter(|attr| { - !existing_names.contains(&attr.name) - })); + let existing_names = existing + .iter() + .map(|e| e.name.clone()) + .collect::>(); + existing.extend( + attrs + .into_iter() + .filter(|attr| !existing_names.contains(&attr.name)), + ); } fn remove_from_parent(&mut self, target: &Ref<'arena>) { diff --git a/html5ever/examples/html2html.rs b/html5ever/examples/html2html.rs index a3eba1bb..f5340548 100644 --- a/html5ever/examples/html2html.rs +++ b/html5ever/examples/html2html.rs @@ -17,15 +17,14 @@ extern crate html5ever; -use std::io::{self, Write}; use std::default::Default; +use std::io::{self, Write}; - -use html5ever::{parse_document, serialize}; use html5ever::driver::ParseOpts; use html5ever::rcdom::RcDom; use html5ever::tendril::TendrilSink; use html5ever::tree_builder::TreeBuilderOpts; +use html5ever::{parse_document, serialize}; fn main() { let opts = ParseOpts { @@ -42,8 +41,11 @@ fn main() { .unwrap(); // The validator.nu HTML2HTML always prints a doctype at the very beginning. - io::stdout().write_all(b"\n") - .ok().expect("writing DOCTYPE failed"); + io::stdout() + .write_all(b"\n") + .ok() + .expect("writing DOCTYPE failed"); serialize(&mut io::stdout(), &dom.document, Default::default()) - .ok().expect("serialization failed"); + .ok() + .expect("serialization failed"); } diff --git a/html5ever/examples/noop-tokenize.rs b/html5ever/examples/noop-tokenize.rs index c134f485..d6c62f1d 100644 --- a/html5ever/examples/noop-tokenize.rs +++ b/html5ever/examples/noop-tokenize.rs @@ -11,11 +11,11 @@ extern crate html5ever; -use std::io; use std::default::Default; +use std::io; -use html5ever::tokenizer::{BufferQueue, TokenSinkResult, TokenSink, Token, Tokenizer}; use html5ever::tendril::*; +use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; struct Sink(Vec); diff --git a/html5ever/examples/noop-tree-builder.rs b/html5ever/examples/noop-tree-builder.rs index fcc125cd..07754498 100644 --- a/html5ever/examples/noop-tree-builder.rs +++ b/html5ever/examples/noop-tree-builder.rs @@ -7,17 +7,18 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[macro_use] extern crate html5ever; +#[macro_use] +extern crate html5ever; -use std::io; -use std::default::Default; -use std::collections::HashMap; use std::borrow::Cow; +use std::collections::HashMap; +use std::default::Default; +use std::io; -use html5ever::{Attribute, QualName, ExpandedName}; use html5ever::parse_document; -use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, ElementFlags}; use html5ever::tendril::*; +use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +use html5ever::{Attribute, ExpandedName, QualName}; struct Sink { next_id: usize, @@ -35,14 +36,17 @@ impl Sink { impl TreeSink for Sink { type Handle = usize; type Output = Self; - fn finish(self) -> Self { self } + fn finish(self) -> Self { + self + } fn get_document(&mut self) -> usize { 0 } fn get_template_contents(&mut self, target: &usize) -> usize { - if let Some(expanded_name!(html "template")) = self.names.get(&target).map(|n| n.expanded()) { + if let Some(expanded_name!(html "template")) = self.names.get(&target).map(|n| n.expanded()) + { target + 1 } else { panic!("not a template element") @@ -72,26 +76,27 @@ impl TreeSink for Sink { unimplemented!() } - fn append_before_sibling(&mut self, - _sibling: &usize, - _new_node: NodeOrText) { } + fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText) {} - fn append_based_on_parent_node(&mut self, + fn append_based_on_parent_node( + &mut self, _element: &usize, _prev_element: &usize, - _new_node: NodeOrText) { } + _new_node: NodeOrText, + ) { + } - fn parse_error(&mut self, _msg: Cow<'static, str>) { } - fn set_quirks_mode(&mut self, _mode: QuirksMode) { } - fn append(&mut self, _parent: &usize, _child: NodeOrText) { } + fn parse_error(&mut self, _msg: Cow<'static, str>) {} + fn set_quirks_mode(&mut self, _mode: QuirksMode) {} + fn append(&mut self, _parent: &usize, _child: NodeOrText) {} - fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) { } + fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {} fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec) { assert!(self.names.contains_key(&target), "not an element"); } - fn remove_from_parent(&mut self, _target: &usize) { } - fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) { } - fn mark_script_already_started(&mut self, _node: &usize) { } + fn remove_from_parent(&mut self, _target: &usize) {} + fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {} + fn mark_script_already_started(&mut self, _node: &usize) {} } fn main() { diff --git a/html5ever/examples/print-rcdom.rs b/html5ever/examples/print-rcdom.rs index 9869bac1..df6c86d3 100644 --- a/html5ever/examples/print-rcdom.rs +++ b/html5ever/examples/print-rcdom.rs @@ -7,15 +7,16 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[macro_use] extern crate html5ever; +#[macro_use] +extern crate html5ever; +use std::default::Default; use std::io; use std::iter::repeat; -use std::default::Default; use std::string::String; use html5ever::parse_document; -use html5ever::rcdom::{NodeData, RcDom, Handle}; +use html5ever::rcdom::{Handle, NodeData, RcDom}; use html5ever::tendril::TendrilSink; // This is not proper HTML serialization, of course. @@ -25,19 +26,25 @@ fn walk(indent: usize, handle: Handle) { // FIXME: don't allocate print!("{}", repeat(" ").take(indent).collect::()); match node.data { - NodeData::Document - => println!("#Document"), + NodeData::Document => println!("#Document"), - NodeData::Doctype { ref name, ref public_id, ref system_id } - => println!("", name, public_id, system_id), + NodeData::Doctype { + ref name, + ref public_id, + ref system_id, + } => println!("", name, public_id, system_id), - NodeData::Text { ref contents } - => println!("#text: {}", escape_default(&contents.borrow())), + NodeData::Text { ref contents } => { + println!("#text: {}", escape_default(&contents.borrow())) + }, - NodeData::Comment { ref contents } - => println!("", escape_default(contents)), + NodeData::Comment { ref contents } => println!("", escape_default(contents)), - NodeData::Element { ref name, ref attrs, .. } => { + NodeData::Element { + ref name, + ref attrs, + .. + } => { assert!(name.ns == ns!(html)); print!("<{}", name.local); for attr in attrs.borrow().iter() { @@ -45,13 +52,13 @@ fn walk(indent: usize, handle: Handle) { print!(" {}=\"{}\"", attr.name.local, attr.value); } println!(">"); - } + }, - NodeData::ProcessingInstruction { .. } => unreachable!() + NodeData::ProcessingInstruction { .. } => unreachable!(), } for child in node.children.borrow().iter() { - walk(indent+4, child.clone()); + walk(indent + 4, child.clone()); } } diff --git a/html5ever/examples/print-tree-actions.rs b/html5ever/examples/print-tree-actions.rs index 86cacb06..bd4f562b 100644 --- a/html5ever/examples/print-tree-actions.rs +++ b/html5ever/examples/print-tree-actions.rs @@ -7,17 +7,20 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[macro_use] extern crate html5ever; +#[macro_use] +extern crate html5ever; -use std::io; -use std::default::Default; -use std::collections::HashMap; use std::borrow::Cow; +use std::collections::HashMap; +use std::default::Default; +use std::io; -use html5ever::{QualName, ExpandedName, Attribute}; -use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText, ElementFlags}; use html5ever::parse_document; use html5ever::tendril::*; +use html5ever::tree_builder::{ + AppendNode, AppendText, ElementFlags, NodeOrText, QuirksMode, TreeSink, +}; +use html5ever::{Attribute, ExpandedName, QualName}; struct Sink { next_id: usize, @@ -35,7 +38,9 @@ impl Sink { impl TreeSink for Sink { type Handle = usize; type Output = Self; - fn finish(self) -> Self { self } + fn finish(self) -> Self { + self + } fn parse_error(&mut self, msg: Cow<'static, str>) { println!("Parse error: {}", msg); @@ -46,7 +51,8 @@ impl TreeSink for Sink { } fn get_template_contents(&mut self, target: &usize) -> usize { - if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded()) { + if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded()) + { target + 1 } else { panic!("not a template element") @@ -85,36 +91,33 @@ impl TreeSink for Sink { fn append(&mut self, parent: &usize, child: NodeOrText) { match child { - AppendNode(n) - => println!("Append node {} to {}", n, parent), - AppendText(t) - => println!("Append text to {}: \"{}\"", parent, escape_default(&t)), + AppendNode(n) => println!("Append node {} to {}", n, parent), + AppendText(t) => println!("Append text to {}: \"{}\"", parent, escape_default(&t)), } } - fn append_before_sibling(&mut self, - sibling: &usize, - new_node: NodeOrText) { + fn append_before_sibling(&mut self, sibling: &usize, new_node: NodeOrText) { match new_node { - AppendNode(n) - => println!("Append node {} before {}", n, sibling), - AppendText(t) - => println!("Append text before {}: \"{}\"", sibling, escape_default(&t)), + AppendNode(n) => println!("Append node {} before {}", n, sibling), + AppendText(t) => println!("Append text before {}: \"{}\"", sibling, escape_default(&t)), } } - fn append_based_on_parent_node(&mut self, + fn append_based_on_parent_node( + &mut self, element: &Self::Handle, prev_element: &Self::Handle, - child: NodeOrText) { - + child: NodeOrText, + ) { self.append_before_sibling(element, child); } - fn append_doctype_to_document(&mut self, - name: StrTendril, - public_id: StrTendril, - system_id: StrTendril) { + fn append_doctype_to_document( + &mut self, + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + ) { println!("Append doctype: {} {} {}", name, public_id, system_id); } @@ -126,7 +129,12 @@ impl TreeSink for Sink { } } - fn associate_with_form(&mut self, _target: &usize, _form: &usize, _nodes: (&usize, Option<&usize>)) { + fn associate_with_form( + &mut self, + _target: &usize, + _form: &usize, + _nodes: (&usize, Option<&usize>), + ) { // No form owner support. } diff --git a/html5ever/examples/tokenize.rs b/html5ever/examples/tokenize.rs index 13c039d5..039ffb79 100644 --- a/html5ever/examples/tokenize.rs +++ b/html5ever/examples/tokenize.rs @@ -9,13 +9,15 @@ extern crate html5ever; -use std::io; use std::default::Default; +use std::io; -use html5ever::tokenizer::{TokenSink, Tokenizer, Token, TokenizerOpts, ParseError, TokenSinkResult}; -use html5ever::tokenizer::{CharacterTokens, NullCharacterToken, TagToken, StartTag, EndTag}; -use html5ever::tokenizer::BufferQueue; use html5ever::tendril::*; +use html5ever::tokenizer::BufferQueue; +use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken}; +use html5ever::tokenizer::{ + ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts, +}; #[derive(Copy, Clone)] struct TokenPrinter { @@ -25,8 +27,8 @@ struct TokenPrinter { impl TokenPrinter { fn is_char(&mut self, is_char: bool) { match (self.in_char_run, is_char) { - (false, true ) => print!("CHAR : \""), - (true, false) => println!("\""), + (false, true) => print!("CHAR : \""), + (true, false) => println!("\""), _ => (), } self.in_char_run = is_char; @@ -47,50 +49,53 @@ impl TokenSink for TokenPrinter { for c in b.chars() { self.do_char(c); } - } + }, NullCharacterToken => self.do_char('\0'), TagToken(tag) => { self.is_char(false); // This is not proper HTML serialization, of course. match tag.kind { StartTag => print!("TAG : <\x1b[32m{}\x1b[0m", tag.name), - EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name), + EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name), } for attr in tag.attrs.iter() { - print!(" \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'", - attr.name.local, attr.value); + print!( + " \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'", + attr.name.local, attr.value + ); } if tag.self_closing { print!(" \x1b[31m/\x1b[0m"); } println!(">"); - } + }, ParseError(err) => { self.is_char(false); println!("ERROR: {}", err); - } + }, _ => { self.is_char(false); println!("OTHER: {:?}", token); - } + }, } TokenSinkResult::Continue } } fn main() { - let mut sink = TokenPrinter { - in_char_run: false, - }; + let mut sink = TokenPrinter { in_char_run: false }; let mut chunk = ByteTendril::new(); io::stdin().read_to_tendril(&mut chunk).unwrap(); let mut input = BufferQueue::new(); input.push_back(chunk.try_reinterpret().unwrap()); - let mut tok = Tokenizer::new(sink, TokenizerOpts { - profile: true, - .. Default::default() - }); + let mut tok = Tokenizer::new( + sink, + TokenizerOpts { + profile: true, + ..Default::default() + }, + ); let _ = tok.feed(&mut input); assert!(input.is_empty()); tok.end(); diff --git a/html5ever/macros/match_token.rs b/html5ever/macros/match_token.rs index 57453a57..8671841d 100644 --- a/html5ever/macros/match_token.rs +++ b/html5ever/macros/match_token.rs @@ -99,25 +99,35 @@ matching, by enforcing the following restrictions on its input: is common in the HTML5 syntax. */ +use proc_macro2::TokenStream; use quote::ToTokens; use std::collections::HashSet; use std::fs::File; use std::io::{Read, Write}; use std::path::Path; use syn; +use syn::ext::IdentExt; use syn::fold::Fold; use syn::parse::{Parse, ParseStream, Result}; -use syn::ext::IdentExt; -use proc_macro2::TokenStream; pub fn expand(from: &Path, to: &Path) { let mut source = String::new(); - File::open(from).unwrap().read_to_string(&mut source).unwrap(); + File::open(from) + .unwrap() + .read_to_string(&mut source) + .unwrap(); let ast = syn::parse_file(&source).expect("Parsing rules.rs module"); let mut m = MatchTokenParser {}; let ast = m.fold_file(ast); - let code = ast.into_token_stream().to_string().replace("{ ", "{\n").replace(" }", "\n}"); - File::create(to).unwrap().write_all(code.as_bytes()).unwrap(); + let code = ast + .into_token_stream() + .to_string() + .replace("{ ", "{\n") + .replace(" }", "\n}"); + File::create(to) + .unwrap() + .write_all(code.as_bytes()) + .unwrap(); } struct MatchTokenParser {} @@ -166,8 +176,12 @@ impl Parse for Tag { }; input.parse::]>()?; Ok(Tag { - kind: if closing.is_some() { TagKind::EndTag } else { TagKind::StartTag }, - name: name + kind: if closing.is_some() { + TagKind::EndTag + } else { + TagKind::StartTag + }, + name: name, }) } } @@ -217,11 +231,7 @@ impl Parse for MatchTokenArm { RHS::Expression(expr) }; - Ok(MatchTokenArm { - binding, - lhs, - rhs, - }) + Ok(MatchTokenArm { binding, lhs, rhs }) } } @@ -234,10 +244,7 @@ impl Parse for MatchToken { while !content.is_empty() { arms.push(content.parse()?); } - Ok(MatchToken { - ident, - arms, - }) + Ok(MatchToken { ident, arms }) } } @@ -274,15 +281,20 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { }; match (lhs, rhs) { - (LHS::Pattern(_), RHS::Else) => panic!("'else' may not appear with an ordinary pattern"), + (LHS::Pattern(_), RHS::Else) => { + panic!("'else' may not appear with an ordinary pattern") + }, // ordinary pattern => expression (LHS::Pattern(pat), RHS::Expression(expr)) => { if !wildcards_patterns.is_empty() { - panic!("ordinary patterns may not appear after wildcard tags {:?} {:?}", pat, expr); + panic!( + "ordinary patterns may not appear after wildcard tags {:?} {:?}", + pat, expr + ); } arms_code.push(quote!(#binding #pat => #expr,)) - } + }, // ... => else (LHS::Tags(tags), RHS::Else) => { @@ -295,7 +307,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { } wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag)); } - } + }, // <_> => expression // ... => expression @@ -326,7 +338,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { arms_code.push(make_tag_pattern(&binding, tag)); wildcard = Some(false); - } + }, // <_> None => { @@ -336,16 +348,16 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { wildcard = Some(true); wildcards_patterns.push(make_tag_pattern(&binding, tag)); wildcards_expressions.push(expr.clone()); - } + }, } } match wildcard { None => panic!("[internal macro error] tag arm with no tags"), Some(false) => arms_code.push(quote!( => #expr,)), - Some(true) => {} // codegen for wildcards is deferred + Some(true) => {}, // codegen for wildcards is deferred } - } + }, } } @@ -376,7 +388,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { (Some(_), _, _) => panic!("the last arm cannot have an @-binding"), (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"), (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"), - (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e) + (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e), }; quote! { @@ -402,16 +414,18 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { } } - impl Fold for MatchTokenParser { fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt { match stmt { - syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro{ ref mac, .. })) => { + syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => { if mac.path == parse_quote!(match_token) { - return syn::fold::fold_stmt(self, syn::Stmt::Expr(expand_match_token(&mac.tts))) + return syn::fold::fold_stmt( + self, + syn::Stmt::Expr(expand_match_token(&mac.tts)), + ); } }, - _ => {} + _ => {}, } syn::fold::fold_stmt(self, stmt) @@ -419,12 +433,12 @@ impl Fold for MatchTokenParser { fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr { match expr { - syn::Expr::Macro(syn::ExprMacro{ ref mac, .. }) => { + syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => { if mac.path == parse_quote!(match_token) { - return syn::fold::fold_expr(self, expand_match_token(&mac.tts)) + return syn::fold::fold_expr(self, expand_match_token(&mac.tts)); } }, - _ => {} + _ => {}, } syn::fold::fold_expr(self, expr) @@ -446,4 +460,3 @@ fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream { ::tree_builder::types::TagToken(#binding ::tokenizer::Tag { kind: #kind, #name_field .. }) } } - diff --git a/html5ever/src/driver.rs b/html5ever/src/driver.rs index 3ee5aaf0..461856ee 100644 --- a/html5ever/src/driver.rs +++ b/html5ever/src/driver.rs @@ -9,16 +9,16 @@ //! High-level interface to the parser. -use {Attribute, QualName}; use buffer_queue::BufferQueue; use tokenizer::{Tokenizer, TokenizerOpts, TokenizerResult}; -use tree_builder::{TreeBuilderOpts, TreeBuilder, TreeSink, create_element}; +use tree_builder::{create_element, TreeBuilder, TreeBuilderOpts, TreeSink}; +use {Attribute, QualName}; use std::borrow::Cow; use tendril; -use tendril::StrTendril; use tendril::stream::{TendrilSink, Utf8LossyDecoder}; +use tendril::StrTendril; /// All-encompassing options struct for the parser. #[derive(Clone, Default)] @@ -37,10 +37,16 @@ pub struct ParseOpts { /// or all at once with the `one` method. /// /// If your input is bytes, use `Parser::from_utf8`. -pub fn parse_document(sink: Sink, opts: ParseOpts) -> Parser where Sink: TreeSink { +pub fn parse_document(sink: Sink, opts: ParseOpts) -> Parser +where + Sink: TreeSink, +{ let tb = TreeBuilder::new(sink, opts.tree_builder); let tok = Tokenizer::new(tb, opts.tokenizer); - Parser { tokenizer: tok, input_buffer: BufferQueue::new() } + Parser { + tokenizer: tok, + input_buffer: BufferQueue::new(), + } } /// Parse an HTML fragment @@ -50,33 +56,48 @@ pub fn parse_document(sink: Sink, opts: ParseOpts) -> Parser where S /// or all at once with the `one` method. /// /// If your input is bytes, use `Parser::from_utf8`. -pub fn parse_fragment(mut sink: Sink, opts: ParseOpts, - context_name: QualName, context_attrs: Vec) - -> Parser - where Sink: TreeSink { +pub fn parse_fragment( + mut sink: Sink, + opts: ParseOpts, + context_name: QualName, + context_attrs: Vec, +) -> Parser +where + Sink: TreeSink, +{ let context_elem = create_element(&mut sink, context_name, context_attrs); parse_fragment_for_element(sink, opts, context_elem, None) } /// Like `parse_fragment`, but with an existing context element /// and optionally a form element. -pub fn parse_fragment_for_element(sink: Sink, opts: ParseOpts, - context_element: Sink::Handle, - form_element: Option) - -> Parser - where Sink: TreeSink { +pub fn parse_fragment_for_element( + sink: Sink, + opts: ParseOpts, + context_element: Sink::Handle, + form_element: Option, +) -> Parser +where + Sink: TreeSink, +{ let tb = TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder); let tok_opts = TokenizerOpts { initial_state: Some(tb.tokenizer_state_for_context_elem()), - .. opts.tokenizer + ..opts.tokenizer }; let tok = Tokenizer::new(tb, tok_opts); - Parser { tokenizer: tok, input_buffer: BufferQueue::new() } + Parser { + tokenizer: tok, + input_buffer: BufferQueue::new(), + } } /// An HTML parser, /// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods. -pub struct Parser where Sink: TreeSink { +pub struct Parser +where + Sink: TreeSink, +{ pub tokenizer: Tokenizer>, pub input_buffer: BufferQueue, } @@ -116,10 +137,10 @@ impl Parser { #[cfg(test)] mod tests { + use super::*; use rcdom::RcDom; use serialize::serialize; use tendril::TendrilSink; - use super::*; #[test] fn from_utf8() { @@ -128,7 +149,9 @@ mod tests { .one("Test".as_bytes()); let mut serialized = Vec::new(); serialize(&mut serialized, &dom.document, Default::default()).unwrap(); - assert_eq!(String::from_utf8(serialized).unwrap().replace(" ", ""), - "<html><head><title>Test"); + assert_eq!( + String::from_utf8(serialized).unwrap().replace(" ", ""), + "Test" + ); } } diff --git a/html5ever/src/lib.rs b/html5ever/src/lib.rs index 4b8029d9..0149887a 100644 --- a/html5ever/src/lib.rs +++ b/html5ever/src/lib.rs @@ -7,18 +7,20 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![crate_name="html5ever"] -#![crate_type="dylib"] - +#![crate_name = "html5ever"] +#![crate_type = "dylib"] #![cfg_attr(test, deny(warnings))] #![allow(unused_parens)] -#[macro_use] extern crate log; -#[macro_use] extern crate markup5ever; -#[macro_use] extern crate mac; +#[macro_use] +extern crate log; +#[macro_use] +extern crate markup5ever; +#[macro_use] +extern crate mac; +pub use driver::{parse_document, parse_fragment, ParseOpts, Parser}; pub use markup5ever::*; -pub use driver::{ParseOpts, parse_document, parse_fragment, Parser}; pub use serialize::serialize; @@ -29,7 +31,7 @@ mod util { pub mod str; } +pub mod driver; pub mod serialize; pub mod tokenizer; pub mod tree_builder; -pub mod driver; diff --git a/html5ever/src/macros.rs b/html5ever/src/macros.rs index 33dc80d7..558a4a95 100644 --- a/html5ever/src/macros.rs +++ b/html5ever/src/macros.rs @@ -13,13 +13,13 @@ macro_rules! unwrap_or_else { None => $else_block, Some(x) => x, } - } + }; } macro_rules! unwrap_or_return { ($opt:expr, $retval:expr) => { unwrap_or_else!($opt, { return $retval }) - } + }; } macro_rules! time { @@ -29,5 +29,5 @@ macro_rules! time { let d = now.elapsed(); let dt = d.as_secs() * 1_000_000_000 + u64::from(d.subsec_nanos()); (result, dt) - }} + }}; } diff --git a/html5ever/src/serialize/mod.rs b/html5ever/src/serialize/mod.rs index 69a412bf..bc91e416 100644 --- a/html5ever/src/serialize/mod.rs +++ b/html5ever/src/serialize/mod.rs @@ -7,14 +7,17 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -pub use markup5ever::serialize::{Serialize, Serializer, TraversalScope, AttrRef}; -use std::io::{self, Write}; +pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope}; use std::default::Default; +use std::io::{self, Write}; use {LocalName, QualName}; pub fn serialize(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()> -where Wr: Write, T: Serialize { +where + Wr: Write, + T: Serialize, +{ let mut ser = HtmlSerializer::new(writer, opts.clone()); node.serialize(&mut ser, opts.traversal_scope) } @@ -64,7 +67,7 @@ fn tagname(name: &QualName) -> LocalName { ref ns => { // FIXME(#122) warn!("node with weird namespace {:?}", ns); - } + }, } name.local.clone() @@ -74,16 +77,16 @@ impl HtmlSerializer { fn new(writer: Wr, opts: SerializeOpts) -> Self { let html_name = match opts.traversal_scope { TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None, - TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)) + TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)), }; HtmlSerializer { writer: writer, opts: opts, - stack: vec!(ElemInfo { + stack: vec![ElemInfo { html_name: html_name, ignore_children: false, processed_first_child: false, - }), + }], } } @@ -116,7 +119,9 @@ impl HtmlSerializer { impl Serializer for HtmlSerializer { fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()> - where AttrIter: Iterator> { + where + AttrIter: Iterator>, + { let html_name = match name.ns { ns!(html) => Some(name.local.clone()), _ => None, @@ -143,13 +148,13 @@ impl Serializer for HtmlSerializer { if name.local != local_name!("xmlns") { try!(self.writer.write_all(b"xmlns:")); } - } + }, ns!(xlink) => try!(self.writer.write_all(b"xlink:")), ref ns => { // FIXME(#122) warn!("attr with weird namespace {:?}", ns); try!(self.writer.write_all(b"unknown_namespace:")); - } + }, } try!(self.writer.write_all(name.local.as_bytes())); @@ -159,14 +164,28 @@ impl Serializer for HtmlSerializer { } try!(self.writer.write_all(b">")); - let ignore_children = name.ns == ns!(html) && match name.local { - local_name!("area") | local_name!("base") | local_name!("basefont") | local_name!("bgsound") | local_name!("br") - | local_name!("col") | local_name!("embed") | local_name!("frame") | local_name!("hr") | local_name!("img") - | local_name!("input") | local_name!("keygen") | local_name!("link") - | local_name!("meta") | local_name!("param") | local_name!("source") | local_name!("track") | local_name!("wbr") - => true, - _ => false, - }; + let ignore_children = name.ns == ns!(html) && + match name.local { + local_name!("area") | + local_name!("base") | + local_name!("basefont") | + local_name!("bgsound") | + local_name!("br") | + local_name!("col") | + local_name!("embed") | + local_name!("frame") | + local_name!("hr") | + local_name!("img") | + local_name!("input") | + local_name!("keygen") | + local_name!("link") | + local_name!("meta") | + local_name!("param") | + local_name!("source") | + local_name!("track") | + local_name!("wbr") => true, + _ => false, + }; self.parent().processed_first_child = true; @@ -185,7 +204,7 @@ impl Serializer for HtmlSerializer { None if self.opts.create_missing_parent => { warn!("missing ElemInfo, creating default."); Default::default() - } + }, _ => panic!("no ElemInfo"), }; if info.ignore_children { @@ -199,9 +218,13 @@ impl Serializer for HtmlSerializer { fn write_text(&mut self, text: &str) -> io::Result<()> { let escape = match self.parent().html_name { - Some(local_name!("style")) | Some(local_name!("script")) | Some(local_name!("xmp")) - | Some(local_name!("iframe")) | Some(local_name!("noembed")) | Some(local_name!("noframes")) - | Some(local_name!("plaintext")) => false, + Some(local_name!("style")) | + Some(local_name!("script")) | + Some(local_name!("xmp")) | + Some(local_name!("iframe")) | + Some(local_name!("noembed")) | + Some(local_name!("noframes")) | + Some(local_name!("plaintext")) => false, Some(local_name!("noscript")) => !self.opts.scripting_enabled, diff --git a/html5ever/src/tokenizer/char_ref/mod.rs b/html5ever/src/tokenizer/char_ref/mod.rs index be7fbe3a..6ea2770d 100644 --- a/html5ever/src/tokenizer/char_ref/mod.rs +++ b/html5ever/src/tokenizer/char_ref/mod.rs @@ -7,17 +7,17 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use super::{Tokenizer, TokenSink}; +use super::{TokenSink, Tokenizer}; use buffer_queue::BufferQueue; use data; use tendril::StrTendril; -use util::str::{is_ascii_alnum}; +use util::str::is_ascii_alnum; -use std::char::from_u32; use std::borrow::Cow::Borrowed; +use std::char::from_u32; -pub use self::Status::*; use self::State::*; +pub use self::Status::*; //§ tokenizing-character-references pub struct CharRef { @@ -84,12 +84,14 @@ impl CharRefTokenizer { } fn name_buf<'t>(&'t self) -> &'t StrTendril { - self.name_buf_opt.as_ref() + self.name_buf_opt + .as_ref() .expect("name_buf missing in named character reference") } fn name_buf_mut<'t>(&'t mut self) -> &'t mut StrTendril { - self.name_buf_opt.as_mut() + self.name_buf_opt + .as_mut() .expect("name_buf missing in named character reference") } @@ -112,10 +114,10 @@ impl CharRefTokenizer { impl CharRefTokenizer { pub fn step( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue) - -> Status { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + ) -> Status { if self.result.is_some() { return Done; } @@ -132,57 +134,55 @@ impl CharRefTokenizer { } fn do_begin( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue) - -> Status { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + ) -> Status { match unwrap_or_return!(tokenizer.peek(input), Stuck) { - '\t' | '\n' | '\x0C' | ' ' | '<' | '&' - => self.finish_none(), - c if Some(c) == self.addnl_allowed - => self.finish_none(), + '\t' | '\n' | '\x0C' | ' ' | '<' | '&' => self.finish_none(), + c if Some(c) == self.addnl_allowed => self.finish_none(), '#' => { tokenizer.discard_char(input); self.state = Octothorpe; Progress - } + }, _ => { self.state = Named; self.name_buf_opt = Some(StrTendril::new()); Progress - } + }, } } fn do_octothorpe( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue) - -> Status { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + ) -> Status { let c = unwrap_or_return!(tokenizer.peek(input), Stuck); match c { 'x' | 'X' => { tokenizer.discard_char(input); self.hex_marker = Some(c); self.state = Numeric(16); - } + }, _ => { self.hex_marker = None; self.state = Numeric(10); - } + }, } Progress } fn do_numeric( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue, - base: u32) - -> Status { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + base: u32, + ) -> Status { let c = unwrap_or_return!(tokenizer.peek(input), Stuck); match c.to_digit(base) { Some(n) => { @@ -196,34 +196,36 @@ impl CharRefTokenizer { self.num = self.num.wrapping_add(n); self.seen_digit = true; Progress - } + }, None if !self.seen_digit => self.unconsume_numeric(tokenizer, input), None => { self.state = NumericSemicolon; Progress - } + }, } } fn do_numeric_semicolon( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue) - -> Status { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + ) -> Status { match unwrap_or_return!(tokenizer.peek(input), Stuck) { ';' => tokenizer.discard_char(input), - _ => tokenizer.emit_error(Borrowed("Semicolon missing after numeric character reference")), + _ => tokenizer.emit_error(Borrowed( + "Semicolon missing after numeric character reference", + )), }; self.finish_numeric(tokenizer) } fn unconsume_numeric( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue) - -> Status { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + ) -> Status { let mut unconsume = StrTendril::from_char('#'); match self.hex_marker { Some(c) => unconsume.push_char(c), @@ -249,19 +251,20 @@ impl CharRefTokenizer { None => (conv(self.num), true), }, - 0x01...0x08 | 0x0B | 0x0D...0x1F | 0x7F | 0xFDD0...0xFDEF - => (conv(self.num), true), + 0x01...0x08 | 0x0B | 0x0D...0x1F | 0x7F | 0xFDD0...0xFDEF => (conv(self.num), true), - n if (n & 0xFFFE) == 0xFFFE - => (conv(n), true), + n if (n & 0xFFFE) == 0xFFFE => (conv(n), true), n => (conv(n), false), }; if error { - let msg = format_if!(tokenizer.opts.exact_errors, + let msg = format_if!( + tokenizer.opts.exact_errors, "Invalid numeric character reference", - "Invalid numeric character reference value 0x{:06X}", self.num); + "Invalid numeric character reference value 0x{:06X}", + self.num + ); tokenizer.emit_error(msg); } @@ -269,10 +272,10 @@ impl CharRefTokenizer { } fn do_named( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue) - -> Status { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + ) -> Status { let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); self.name_buf_mut().push_char(c); match data::NAMED_ENTITIES.get(&self.name_buf()[..]) { @@ -285,7 +288,7 @@ impl CharRefTokenizer { } // Otherwise we just have a prefix match. Progress - } + }, // Can't continue the match. None => self.finish_named(tokenizer, input, Some(c)), @@ -293,9 +296,12 @@ impl CharRefTokenizer { } fn emit_name_error(&mut self, tokenizer: &mut Tokenizer) { - let msg = format_if!(tokenizer.opts.exact_errors, + let msg = format_if!( + tokenizer.opts.exact_errors, "Invalid character reference", - "Invalid character reference &{}", self.name_buf()); + "Invalid character reference &{}", + self.name_buf() + ); tokenizer.emit_error(msg); } @@ -303,10 +309,12 @@ impl CharRefTokenizer { input.push_front(self.name_buf_opt.take().unwrap()); } - fn finish_named(&mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue, - end_char: Option) -> Status { + fn finish_named( + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + end_char: Option, + ) -> Status { match self.name_match { None => { match end_char { @@ -315,17 +323,16 @@ impl CharRefTokenizer { // we emit a parse error. self.state = BogusName; return Progress; - } + }, // Check length because &; is not a parse error. - Some(';') if self.name_buf().len() > 1 - => self.emit_name_error(tokenizer), + Some(';') if self.name_buf().len() > 1 => self.emit_name_error(tokenizer), _ => (), } self.unconsume_name(input); self.finish_none() - } + }, Some((c1, c2)) => { // We have a complete match, but we may have consumed @@ -338,7 +345,7 @@ impl CharRefTokenizer { let name_len = self.name_len; assert!(name_len > 0); - let last_matched = self.name_buf()[name_len-1..].chars().next().unwrap(); + let last_matched = self.name_buf()[name_len - 1..].chars().next().unwrap(); // There might not be a next character after the match, if // we had a full match and then hit EOF. @@ -361,14 +368,18 @@ impl CharRefTokenizer { let unconsume_all = match (self.addnl_allowed, last_matched, next_after) { (_, ';', _) => false, (Some(_), _, Some('=')) => { - tokenizer.emit_error(Borrowed("Equals sign after character reference in attribute")); + tokenizer.emit_error(Borrowed( + "Equals sign after character reference in attribute", + )); true - } + }, (Some(_), _, Some(c)) if is_ascii_alnum(c) => true, _ => { - tokenizer.emit_error(Borrowed("Character reference does not end with semicolon")); + tokenizer.emit_error(Borrowed( + "Character reference does not end with semicolon", + )); false - } + }, }; if unconsume_all { @@ -382,54 +393,54 @@ impl CharRefTokenizer { }); Done } - } + }, } } fn do_bogus_name( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue) - -> Status { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + ) -> Status { let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); self.name_buf_mut().push_char(c); match c { _ if is_ascii_alnum(c) => return Progress, ';' => self.emit_name_error(tokenizer), - _ => () + _ => (), } self.unconsume_name(input); self.finish_none() } pub fn end_of_file( - &mut self, - tokenizer: &mut Tokenizer, - input: &mut BufferQueue) { + &mut self, + tokenizer: &mut Tokenizer, + input: &mut BufferQueue, + ) { while self.result.is_none() { match self.state { Begin => drop(self.finish_none()), - Numeric(_) if !self.seen_digit - => drop(self.unconsume_numeric(tokenizer, input)), + Numeric(_) if !self.seen_digit => drop(self.unconsume_numeric(tokenizer, input)), Numeric(_) | NumericSemicolon => { tokenizer.emit_error(Borrowed("EOF in numeric character reference")); self.finish_numeric(tokenizer); - } + }, Named => drop(self.finish_named(tokenizer, input, None)), BogusName => { self.unconsume_name(input); self.finish_none(); - } + }, Octothorpe => { input.push_front(StrTendril::from_slice("#")); tokenizer.emit_error(Borrowed("EOF after '#' in character reference")); self.finish_none(); - } + }, } } } diff --git a/html5ever/src/tokenizer/interface.rs b/html5ever/src/tokenizer/interface.rs index 041701b5..d12c72e6 100644 --- a/html5ever/src/tokenizer/interface.rs +++ b/html5ever/src/tokenizer/interface.rs @@ -7,15 +7,15 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use LocalName; use interface::Attribute; use std::borrow::Cow; use tendril::StrTendril; use tokenizer::states; +use LocalName; -pub use self::TagKind::{StartTag, EndTag}; -pub use self::Token::{DoctypeToken, TagToken, CommentToken, CharacterTokens}; -pub use self::Token::{NullCharacterToken, EOFToken, ParseError}; +pub use self::TagKind::{EndTag, StartTag}; +pub use self::Token::{CharacterTokens, CommentToken, DoctypeToken, TagToken}; +pub use self::Token::{EOFToken, NullCharacterToken, ParseError}; /// A `DOCTYPE` token. // FIXME: already exists in Servo DOM @@ -87,7 +87,7 @@ pub enum TokenSinkResult { Continue, Script(Handle), Plaintext, - RawData(states::RawKind) + RawData(states::RawKind), } /// Types which can receive tokens from the tokenizer. diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 4cf09315..a1118d20 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -9,43 +9,43 @@ //! The HTML5 tokenizer. -pub use self::interface::{Doctype, TagKind, StartTag, EndTag, Tag}; -pub use self::interface::{Token, DoctypeToken, TagToken, CommentToken}; -pub use self::interface::{CharacterTokens, NullCharacterToken, EOFToken, ParseError}; +pub use self::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError}; +pub use self::interface::{CommentToken, DoctypeToken, TagToken, Token}; +pub use self::interface::{Doctype, EndTag, StartTag, Tag, TagKind}; pub use self::interface::{TokenSink, TokenSinkResult}; -use self::states::{Rcdata, Rawtext, ScriptData, ScriptDataEscaped}; -use self::states::{Escaped, DoubleEscaped}; -use self::states::{Unquoted, SingleQuoted, DoubleQuoted}; use self::states::{DoctypeIdKind, Public, System}; +use self::states::{DoubleEscaped, Escaped}; +use self::states::{DoubleQuoted, SingleQuoted, Unquoted}; +use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped}; use self::char_ref::{CharRef, CharRefTokenizer}; use util::str::lower_ascii_letter; -use std::mem::replace; -use std::default::Default; use std::borrow::Cow::{self, Borrowed}; use std::collections::BTreeMap; +use std::default::Default; +use std::mem::replace; -use {LocalName, QualName, Attribute, SmallCharSet}; +pub use buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; use tendril::StrTendril; -pub use buffer_queue::{BufferQueue, SetResult, FromSet, NotFromSet}; +use {Attribute, LocalName, QualName, SmallCharSet}; -pub mod states; -mod interface; mod char_ref; +mod interface; +pub mod states; pub enum ProcessResult { Continue, Suspend, - Script(Handle) + Script(Handle), } #[must_use] pub enum TokenizerResult { Done, - Script(Handle) + Script(Handle), } fn option_push(opt_str: &mut Option, c: char) { @@ -170,7 +170,9 @@ pub struct Tokenizer { impl Tokenizer { /// Create a new tokenizer which feeds tokens to a particular `TokenSink`. pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer { - let start_tag_name = opts.last_start_tag_name.take() + let start_tag_name = opts + .last_start_tag_name + .take() .map(|s| LocalName::from(&*s)); let state = opts.initial_state.unwrap_or(states::Data); let discard_bom = opts.discard_bom; @@ -187,7 +189,7 @@ impl Tokenizer { current_tag_kind: StartTag, current_tag_name: StrTendril::new(), current_tag_self_closing: false, - current_tag_attrs: vec!(), + current_tag_attrs: vec![], current_attr_name: StrTendril::new(), current_attr_value: StrTendril::new(), current_comment: StrTendril::new(), @@ -234,17 +236,16 @@ impl Tokenizer { } fn process_token_and_continue(&mut self, token: Token) { - assert!(matches!(self.process_token(token), TokenSinkResult::Continue)); + assert!(matches!( + self.process_token(token), + TokenSinkResult::Continue + )); } //§ preprocessing-the-input-stream // Get the next input character, which might be the character // 'c' that we already consumed from the buffers. - fn get_preprocessed_char( - &mut self, - mut c: char, - input: &mut BufferQueue) - -> Option { + fn get_preprocessed_char(&mut self, mut c: char, input: &mut BufferQueue) -> Option { if self.ignore_lf { self.ignore_lf = false; if c == '\n' { @@ -261,11 +262,13 @@ impl Tokenizer { self.current_line += 1; } - if self.opts.exact_errors && match c as u32 { - 0x01...0x08 | 0x0B | 0x0E...0x1F | 0x7F...0x9F | 0xFDD0...0xFDEF => true, - n if (n & 0xFFFE) == 0xFFFE => true, - _ => false, - } { + if self.opts.exact_errors && + match c as u32 { + 0x01...0x08 | 0x0B | 0x0E...0x1F | 0x7F...0x9F | 0xFDD0...0xFDEF => true, + n if (n & 0xFFFE) == 0xFFFE => true, + _ => false, + } + { let msg = format!("Bad character {}", c); self.emit_error(Cow::Owned(msg)); } @@ -282,7 +285,9 @@ impl Tokenizer { self.reconsume = false; Some(self.current_char) } else { - input.next().and_then(|c| self.get_preprocessed_char(c, input)) + input + .next() + .and_then(|c| self.get_preprocessed_char(c, input)) } } @@ -303,7 +308,7 @@ impl Tokenizer { // NB: We don't set self.current_char for a run of characters not // in the set. It shouldn't matter for the codepaths that use // this. - _ => d + _ => d, } } @@ -313,11 +318,11 @@ impl Tokenizer { // NB: this doesn't do input stream preprocessing or set the current input // character. fn eat( - &mut self, - input: &mut BufferQueue, - pat: &str, - eq: fn(&u8, &u8) -> bool) - -> Option { + &mut self, + input: &mut BufferQueue, + pat: &str, + eq: fn(&u8, &u8) -> bool, + ) -> Option { input.push_front(replace(&mut self.temp_buf, StrTendril::new())); match input.eat(pat, eq) { None if self.at_eof => Some(false), @@ -343,7 +348,7 @@ impl Tokenizer { Some(x) => { *x += dt; false - } + }, None => true, }; if new { @@ -372,7 +377,10 @@ impl Tokenizer { let msg = format_if!( self.opts.exact_errors, "Bad character", - "Saw {} in state {:?}", self.current_char, self.state); + "Saw {} in state {:?}", + self.current_char, + self.state + ); self.emit_error(msg); } @@ -380,7 +388,9 @@ impl Tokenizer { let msg = format_if!( self.opts.exact_errors, "Unexpected EOF", - "Saw EOF in state {:?}", self.state); + "Saw EOF in state {:?}", + self.state + ); self.emit_error(msg); } @@ -405,7 +415,7 @@ impl Tokenizer { match self.current_tag_kind { StartTag => { self.last_start_tag_name = Some(name.clone()); - } + }, EndTag => { if !self.current_tag_attrs.is_empty() { self.emit_error(Borrowed("Attributes on an end tag")); @@ -413,13 +423,14 @@ impl Tokenizer { if self.current_tag_self_closing { self.emit_error(Borrowed("Self-closing end tag")); } - } + }, } - let token = TagToken(Tag { kind: self.current_tag_kind, + let token = TagToken(Tag { + kind: self.current_tag_kind, name: name, self_closing: self.current_tag_self_closing, - attrs: replace(&mut self.current_tag_attrs, vec!()), + attrs: replace(&mut self.current_tag_attrs, vec![]), }); match self.process_token(token) { @@ -435,7 +446,7 @@ impl Tokenizer { TokenSinkResult::RawData(kind) => { self.state = states::RawData(kind); ProcessResult::Continue - } + }, } } @@ -458,7 +469,7 @@ impl Tokenizer { fn discard_tag(&mut self) { self.current_tag_name.clear(); self.current_tag_self_closing = false; - self.current_tag_attrs = vec!(); + self.current_tag_attrs = vec![]; } fn create_tag(&mut self, kind: TagKind, c: char) { @@ -469,9 +480,7 @@ impl Tokenizer { fn have_appropriate_end_tag(&self) -> bool { match self.last_start_tag_name.as_ref() { - Some(last) => - (self.current_tag_kind == EndTag) - && (*self.current_tag_name == **last), + Some(last) => (self.current_tag_kind == EndTag) && (*self.current_tag_name == **last), None => false, } } @@ -492,7 +501,9 @@ impl Tokenizer { // FIXME: linear time search, do we care? let dup = { let name = &*self.current_attr_name; - self.current_tag_attrs.iter().any(|a| &*a.name.local == name) + self.current_tag_attrs + .iter() + .any(|a| &*a.name.local == name) }; if dup { @@ -680,9 +691,9 @@ impl Tokenizer { states::Data => loop { match pop_except_from!(self, input, small_char_set!('\r' '\0' '&' '<' '\n')) { FromSet('\0') => go!(self: error; emit '\0'), - FromSet('&') => go!(self: consume_char_ref), - FromSet('<') => go!(self: to TagOpen), - FromSet(c) => go!(self: emit c), + FromSet('&') => go!(self: consume_char_ref), + FromSet('<') => go!(self: to TagOpen), + FromSet(c) => go!(self: emit c), NotFromSet(b) => self.emit_chars(b), } }, @@ -734,7 +745,9 @@ impl Tokenizer { match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) { FromSet('\0') => go!(self: error; emit '\u{fffd}'), FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash DoubleEscaped), - FromSet('<') => go!(self: emit '<'; to RawLessThanSign ScriptDataEscaped DoubleEscaped), + FromSet('<') => { + go!(self: emit '<'; to RawLessThanSign ScriptDataEscaped DoubleEscaped) + }, FromSet(c) => go!(self: emit c), NotFromSet(b) => self.emit_chars(b), } @@ -744,72 +757,87 @@ impl Tokenizer { states::Plaintext => loop { match pop_except_from!(self, input, small_char_set!('\r' '\0' '\n')) { FromSet('\0') => go!(self: error; emit '\u{fffd}'), - FromSet(c) => go!(self: emit c), + FromSet(c) => go!(self: emit c), NotFromSet(b) => self.emit_chars(b), } }, //§ tag-open-state - states::TagOpen => loop { match get_char!(self, input) { - '!' => go!(self: clear_temp; to MarkupDeclarationOpen), - '/' => go!(self: to EndTagOpen), - '?' => go!(self: error; clear_comment; push_comment '?'; to BogusComment), - c => match lower_ascii_letter(c) { - Some(cl) => go!(self: create_tag StartTag cl; to TagName), - None => go!(self: error; emit '<'; reconsume Data), + states::TagOpen => loop { + match get_char!(self, input) { + '!' => go!(self: clear_temp; to MarkupDeclarationOpen), + '/' => go!(self: to EndTagOpen), + '?' => go!(self: error; clear_comment; push_comment '?'; to BogusComment), + c => match lower_ascii_letter(c) { + Some(cl) => go!(self: create_tag StartTag cl; to TagName), + None => go!(self: error; emit '<'; reconsume Data), + }, } - }}, + }, //§ end-tag-open-state - states::EndTagOpen => loop { match get_char!(self, input) { - '>' => go!(self: error; to Data), - '\0' => go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment), - c => match lower_ascii_letter(c) { - Some(cl) => go!(self: create_tag EndTag cl; to TagName), - None => go!(self: error; clear_comment; push_comment c; to BogusComment), + states::EndTagOpen => loop { + match get_char!(self, input) { + '>' => go!(self: error; to Data), + '\0' => { + go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment) + }, + c => match lower_ascii_letter(c) { + Some(cl) => go!(self: create_tag EndTag cl; to TagName), + None => go!(self: error; clear_comment; push_comment c; to BogusComment), + }, } - }}, + }, //§ tag-name-state - states::TagName => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' - => go!(self: to BeforeAttributeName), - '/' => go!(self: to SelfClosingStartTag), - '>' => go!(self: emit_tag Data), - '\0' => go!(self: error; push_tag '\u{fffd}'), - c => go!(self: push_tag (c.to_ascii_lowercase())), - }}, + states::TagName => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName), + '/' => go!(self: to SelfClosingStartTag), + '>' => go!(self: emit_tag Data), + '\0' => go!(self: error; push_tag '\u{fffd}'), + c => go!(self: push_tag (c.to_ascii_lowercase())), + } + }, //§ script-data-escaped-less-than-sign-state - states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop { match get_char!(self, input) { - '/' => go!(self: clear_temp; to RawEndTagOpen ScriptDataEscaped Escaped), - c => match lower_ascii_letter(c) { - Some(cl) => go!(self: clear_temp; push_temp cl; emit '<'; emit c; + states::RawLessThanSign(ScriptDataEscaped(Escaped)) => loop { + match get_char!(self, input) { + '/' => go!(self: clear_temp; to RawEndTagOpen ScriptDataEscaped Escaped), + c => match lower_ascii_letter(c) { + Some(cl) => go!(self: clear_temp; push_temp cl; emit '<'; emit c; to ScriptDataEscapeStart DoubleEscaped), - None => go!(self: emit '<'; reconsume RawData ScriptDataEscaped Escaped), + None => go!(self: emit '<'; reconsume RawData ScriptDataEscaped Escaped), + }, } - }}, + }, //§ script-data-double-escaped-less-than-sign-state - states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop { match get_char!(self, input) { - '/' => go!(self: clear_temp; emit '/'; to ScriptDataDoubleEscapeEnd), - _ => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped), - }}, + states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => loop { + match get_char!(self, input) { + '/' => go!(self: clear_temp; emit '/'; to ScriptDataDoubleEscapeEnd), + _ => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped), + } + }, //§ rcdata-less-than-sign-state rawtext-less-than-sign-state script-data-less-than-sign-state // otherwise - states::RawLessThanSign(kind) => loop { match get_char!(self, input) { - '/' => go!(self: clear_temp; to RawEndTagOpen kind), - '!' if kind == ScriptData => go!(self: emit '<'; emit '!'; to ScriptDataEscapeStart Escaped), - _ => go!(self: emit '<'; reconsume RawData kind), - }}, + states::RawLessThanSign(kind) => loop { + match get_char!(self, input) { + '/' => go!(self: clear_temp; to RawEndTagOpen kind), + '!' if kind == ScriptData => { + go!(self: emit '<'; emit '!'; to ScriptDataEscapeStart Escaped) + }, + _ => go!(self: emit '<'; reconsume RawData kind), + } + }, //§ rcdata-end-tag-open-state rawtext-end-tag-open-state script-data-end-tag-open-state script-data-escaped-end-tag-open-state states::RawEndTagOpen(kind) => loop { let c = get_char!(self, input); match lower_ascii_letter(c) { Some(cl) => go!(self: create_tag EndTag cl; push_temp c; to RawEndTagName kind), - None => go!(self: emit '<'; emit '/'; reconsume RawData kind), + None => go!(self: emit '<'; emit '/'; reconsume RawData kind), } }, @@ -818,8 +846,7 @@ impl Tokenizer { let c = get_char!(self, input); if self.have_appropriate_end_tag() { match c { - '\t' | '\n' | '\x0C' | ' ' - => go!(self: to BeforeAttributeName), + '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName), '/' => go!(self: to SelfClosingStartTag), '>' => go!(self: emit_tag Data), _ => (), @@ -828,7 +855,9 @@ impl Tokenizer { match lower_ascii_letter(c) { Some(cl) => go!(self: push_tag cl; push_temp c), - None => go!(self: discard_tag; emit '<'; emit '/'; emit_temp; reconsume RawData kind), + None => { + go!(self: discard_tag; emit '<'; emit '/'; emit_temp; reconsume RawData kind) + }, } }, @@ -837,136 +866,165 @@ impl Tokenizer { let c = get_char!(self, input); match c { '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => { - let esc = if &*self.temp_buf == "script" { DoubleEscaped } else { Escaped }; + let esc = if &*self.temp_buf == "script" { + DoubleEscaped + } else { + Escaped + }; go!(self: emit c; to RawData ScriptDataEscaped esc); - } + }, _ => match lower_ascii_letter(c) { Some(cl) => go!(self: push_temp cl; emit c), - None => go!(self: reconsume RawData ScriptDataEscaped Escaped), - } + None => go!(self: reconsume RawData ScriptDataEscaped Escaped), + }, } }, //§ script-data-escape-start-state - states::ScriptDataEscapeStart(Escaped) => loop { match get_char!(self, input) { - '-' => go!(self: emit '-'; to ScriptDataEscapeStartDash), - _ => go!(self: reconsume RawData ScriptData), - }}, + states::ScriptDataEscapeStart(Escaped) => loop { + match get_char!(self, input) { + '-' => go!(self: emit '-'; to ScriptDataEscapeStartDash), + _ => go!(self: reconsume RawData ScriptData), + } + }, //§ script-data-escape-start-dash-state - states::ScriptDataEscapeStartDash => loop { match get_char!(self, input) { - '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash Escaped), - _ => go!(self: reconsume RawData ScriptData), - }}, + states::ScriptDataEscapeStartDash => loop { + match get_char!(self, input) { + '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash Escaped), + _ => go!(self: reconsume RawData ScriptData), + } + }, //§ script-data-escaped-dash-state script-data-double-escaped-dash-state - states::ScriptDataEscapedDash(kind) => loop { match get_char!(self, input) { - '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash kind), - '<' => { - if kind == DoubleEscaped { go!(self: emit '<'); } - go!(self: to RawLessThanSign ScriptDataEscaped kind); + states::ScriptDataEscapedDash(kind) => loop { + match get_char!(self, input) { + '-' => go!(self: emit '-'; to ScriptDataEscapedDashDash kind), + '<' => { + if kind == DoubleEscaped { + go!(self: emit '<'); + } + go!(self: to RawLessThanSign ScriptDataEscaped kind); + }, + '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind), + c => go!(self: emit c; to RawData ScriptDataEscaped kind), } - '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind), - c => go!(self: emit c; to RawData ScriptDataEscaped kind), - }}, + }, //§ script-data-escaped-dash-dash-state script-data-double-escaped-dash-dash-state - states::ScriptDataEscapedDashDash(kind) => loop { match get_char!(self, input) { - '-' => go!(self: emit '-'), - '<' => { - if kind == DoubleEscaped { go!(self: emit '<'); } - go!(self: to RawLessThanSign ScriptDataEscaped kind); + states::ScriptDataEscapedDashDash(kind) => loop { + match get_char!(self, input) { + '-' => go!(self: emit '-'), + '<' => { + if kind == DoubleEscaped { + go!(self: emit '<'); + } + go!(self: to RawLessThanSign ScriptDataEscaped kind); + }, + '>' => go!(self: emit '>'; to RawData ScriptData), + '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind), + c => go!(self: emit c; to RawData ScriptDataEscaped kind), } - '>' => go!(self: emit '>'; to RawData ScriptData), - '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind), - c => go!(self: emit c; to RawData ScriptDataEscaped kind), - }}, + }, //§ script-data-double-escape-end-state states::ScriptDataDoubleEscapeEnd => loop { let c = get_char!(self, input); match c { '\t' | '\n' | '\x0C' | ' ' | '/' | '>' => { - let esc = if &*self.temp_buf == "script" { Escaped } else { DoubleEscaped }; + let esc = if &*self.temp_buf == "script" { + Escaped + } else { + DoubleEscaped + }; go!(self: emit c; to RawData ScriptDataEscaped esc); - } + }, _ => match lower_ascii_letter(c) { Some(cl) => go!(self: push_temp cl; emit c), - None => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped), - } + None => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped), + }, } }, //§ before-attribute-name-state - states::BeforeAttributeName => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' => (), - '/' => go!(self: to SelfClosingStartTag), - '>' => go!(self: emit_tag Data), - '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName), - c => match lower_ascii_letter(c) { - Some(cl) => go!(self: create_attr cl; to AttributeName), - None => { - go_match!(self: c, + states::BeforeAttributeName => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => (), + '/' => go!(self: to SelfClosingStartTag), + '>' => go!(self: emit_tag Data), + '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName), + c => match lower_ascii_letter(c) { + Some(cl) => go!(self: create_attr cl; to AttributeName), + None => { + go_match!(self: c, '"' , '\'' , '<' , '=' => error); - go!(self: create_attr c; to AttributeName); - } + go!(self: create_attr c; to AttributeName); + }, + }, } - }}, + }, //§ attribute-name-state - states::AttributeName => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' - => go!(self: to AfterAttributeName), - '/' => go!(self: to SelfClosingStartTag), - '=' => go!(self: to BeforeAttributeValue), - '>' => go!(self: emit_tag Data), - '\0' => go!(self: error; push_name '\u{fffd}'), - c => match lower_ascii_letter(c) { - Some(cl) => go!(self: push_name cl), - None => { - go_match!(self: c, + states::AttributeName => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => go!(self: to AfterAttributeName), + '/' => go!(self: to SelfClosingStartTag), + '=' => go!(self: to BeforeAttributeValue), + '>' => go!(self: emit_tag Data), + '\0' => go!(self: error; push_name '\u{fffd}'), + c => match lower_ascii_letter(c) { + Some(cl) => go!(self: push_name cl), + None => { + go_match!(self: c, '"' , '\'' , '<' => error); - go!(self: push_name c); - } + go!(self: push_name c); + }, + }, } - }}, + }, //§ after-attribute-name-state - states::AfterAttributeName => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' => (), - '/' => go!(self: to SelfClosingStartTag), - '=' => go!(self: to BeforeAttributeValue), - '>' => go!(self: emit_tag Data), - '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName), - c => match lower_ascii_letter(c) { - Some(cl) => go!(self: create_attr cl; to AttributeName), - None => { - go_match!(self: c, + states::AfterAttributeName => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => (), + '/' => go!(self: to SelfClosingStartTag), + '=' => go!(self: to BeforeAttributeValue), + '>' => go!(self: emit_tag Data), + '\0' => go!(self: error; create_attr '\u{fffd}'; to AttributeName), + c => match lower_ascii_letter(c) { + Some(cl) => go!(self: create_attr cl; to AttributeName), + None => { + go_match!(self: c, '"' , '\'' , '<' => error); - go!(self: create_attr c; to AttributeName); - } + go!(self: create_attr c; to AttributeName); + }, + }, } - }}, + }, //§ before-attribute-value-state // Use peek so we can handle the first attr character along with the rest, // hopefully in the same zero-copy buffer. - states::BeforeAttributeValue => loop { match peek!(self, input) { - '\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char input), - '"' => go!(self: discard_char input; to AttributeValue DoubleQuoted), - '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted), - '\0' => go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted), - '>' => go!(self: discard_char input; error; emit_tag Data), - _ => go!(self: to AttributeValue Unquoted), - }}, + states::BeforeAttributeValue => loop { + match peek!(self, input) { + '\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char input), + '"' => go!(self: discard_char input; to AttributeValue DoubleQuoted), + '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted), + '\0' => { + go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted) + }, + '>' => go!(self: discard_char input; error; emit_tag Data), + _ => go!(self: to AttributeValue Unquoted), + } + }, //§ attribute-value-(double-quoted)-state states::AttributeValue(DoubleQuoted) => loop { match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) { - FromSet('"') => go!(self: to AfterAttributeValueQuoted), - FromSet('&') => go!(self: consume_char_ref '"'), + FromSet('"') => go!(self: to AfterAttributeValueQuoted), + FromSet('&') => go!(self: consume_char_ref '"'), FromSet('\0') => go!(self: error; push_value '\u{fffd}'), - FromSet(c) => go!(self: push_value c), + FromSet(c) => go!(self: push_value c), NotFromSet(ref b) => go!(self: append_value b), } }, @@ -975,119 +1033,145 @@ impl Tokenizer { states::AttributeValue(SingleQuoted) => loop { match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) { FromSet('\'') => go!(self: to AfterAttributeValueQuoted), - FromSet('&') => go!(self: consume_char_ref '\''), + FromSet('&') => go!(self: consume_char_ref '\''), FromSet('\0') => go!(self: error; push_value '\u{fffd}'), - FromSet(c) => go!(self: push_value c), + FromSet(c) => go!(self: push_value c), NotFromSet(ref b) => go!(self: append_value b), } }, //§ attribute-value-(unquoted)-state states::AttributeValue(Unquoted) => loop { - match pop_except_from!(self, input, small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0')) { - FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') - => go!(self: to BeforeAttributeName), - FromSet('&') => go!(self: consume_char_ref '>'), - FromSet('>') => go!(self: emit_tag Data), + match pop_except_from!( + self, + input, + small_char_set!('\r' '\t' '\n' '\x0C' ' ' '&' '>' '\0') + ) { + FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => { + go!(self: to BeforeAttributeName) + }, + FromSet('&') => go!(self: consume_char_ref '>'), + FromSet('>') => go!(self: emit_tag Data), FromSet('\0') => go!(self: error; push_value '\u{fffd}'), FromSet(c) => { go_match!(self: c, '"' , '\'' , '<' , '=' , '`' => error); go!(self: push_value c); - } + }, NotFromSet(ref b) => go!(self: append_value b), } }, //§ after-attribute-value-(quoted)-state - states::AfterAttributeValueQuoted => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' - => go!(self: to BeforeAttributeName), - '/' => go!(self: to SelfClosingStartTag), - '>' => go!(self: emit_tag Data), - _ => go!(self: error; reconsume BeforeAttributeName), - }}, + states::AfterAttributeValueQuoted => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName), + '/' => go!(self: to SelfClosingStartTag), + '>' => go!(self: emit_tag Data), + _ => go!(self: error; reconsume BeforeAttributeName), + } + }, //§ self-closing-start-tag-state - states::SelfClosingStartTag => loop { match get_char!(self, input) { - '>' => { - self.current_tag_self_closing = true; - go!(self: emit_tag Data); + states::SelfClosingStartTag => loop { + match get_char!(self, input) { + '>' => { + self.current_tag_self_closing = true; + go!(self: emit_tag Data); + }, + _ => go!(self: error; reconsume BeforeAttributeName), } - _ => go!(self: error; reconsume BeforeAttributeName), - }}, + }, //§ comment-start-state - states::CommentStart => loop { match get_char!(self, input) { - '-' => go!(self: to CommentStartDash), - '\0' => go!(self: error; push_comment '\u{fffd}'; to Comment), - '>' => go!(self: error; emit_comment; to Data), - c => go!(self: push_comment c; to Comment), - }}, + states::CommentStart => loop { + match get_char!(self, input) { + '-' => go!(self: to CommentStartDash), + '\0' => go!(self: error; push_comment '\u{fffd}'; to Comment), + '>' => go!(self: error; emit_comment; to Data), + c => go!(self: push_comment c; to Comment), + } + }, //§ comment-start-dash-state - states::CommentStartDash => loop { match get_char!(self, input) { - '-' => go!(self: to CommentEnd), - '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment), - '>' => go!(self: error; emit_comment; to Data), - c => go!(self: push_comment '-'; push_comment c; to Comment), - }}, + states::CommentStartDash => loop { + match get_char!(self, input) { + '-' => go!(self: to CommentEnd), + '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment), + '>' => go!(self: error; emit_comment; to Data), + c => go!(self: push_comment '-'; push_comment c; to Comment), + } + }, //§ comment-state - states::Comment => loop { match get_char!(self, input) { - '-' => go!(self: to CommentEndDash), - '\0' => go!(self: error; push_comment '\u{fffd}'), - c => go!(self: push_comment c), - }}, + states::Comment => loop { + match get_char!(self, input) { + '-' => go!(self: to CommentEndDash), + '\0' => go!(self: error; push_comment '\u{fffd}'), + c => go!(self: push_comment c), + } + }, //§ comment-end-dash-state - states::CommentEndDash => loop { match get_char!(self, input) { - '-' => go!(self: to CommentEnd), - '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment), - c => go!(self: push_comment '-'; push_comment c; to Comment), - }}, + states::CommentEndDash => loop { + match get_char!(self, input) { + '-' => go!(self: to CommentEnd), + '\0' => go!(self: error; append_comment "-\u{fffd}"; to Comment), + c => go!(self: push_comment '-'; push_comment c; to Comment), + } + }, //§ comment-end-state - states::CommentEnd => loop { match get_char!(self, input) { - '>' => go!(self: emit_comment; to Data), - '\0' => go!(self: error; append_comment "--\u{fffd}"; to Comment), - '!' => go!(self: error; to CommentEndBang), - '-' => go!(self: error; push_comment '-'), - c => go!(self: error; append_comment "--"; push_comment c; to Comment), - }}, + states::CommentEnd => loop { + match get_char!(self, input) { + '>' => go!(self: emit_comment; to Data), + '\0' => go!(self: error; append_comment "--\u{fffd}"; to Comment), + '!' => go!(self: error; to CommentEndBang), + '-' => go!(self: error; push_comment '-'), + c => go!(self: error; append_comment "--"; push_comment c; to Comment), + } + }, //§ comment-end-bang-state - states::CommentEndBang => loop { match get_char!(self, input) { - '-' => go!(self: append_comment "--!"; to CommentEndDash), - '>' => go!(self: emit_comment; to Data), - '\0' => go!(self: error; append_comment "--!\u{fffd}"; to Comment), - c => go!(self: append_comment "--!"; push_comment c; to Comment), - }}, + states::CommentEndBang => loop { + match get_char!(self, input) { + '-' => go!(self: append_comment "--!"; to CommentEndDash), + '>' => go!(self: emit_comment; to Data), + '\0' => go!(self: error; append_comment "--!\u{fffd}"; to Comment), + c => go!(self: append_comment "--!"; push_comment c; to Comment), + } + }, //§ doctype-state - states::Doctype => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' - => go!(self: to BeforeDoctypeName), - _ => go!(self: error; reconsume BeforeDoctypeName), - }}, + states::Doctype => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName), + _ => go!(self: error; reconsume BeforeDoctypeName), + } + }, //§ before-doctype-name-state - states::BeforeDoctypeName => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' => (), - '\0' => go!(self: error; create_doctype; push_doctype_name '\u{fffd}'; to DoctypeName), - '>' => go!(self: error; create_doctype; force_quirks; emit_doctype; to Data), - c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase()); + states::BeforeDoctypeName => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => (), + '\0' => { + go!(self: error; create_doctype; push_doctype_name '\u{fffd}'; to DoctypeName) + }, + '>' => go!(self: error; create_doctype; force_quirks; emit_doctype; to Data), + c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase()); to DoctypeName), - }}, + } + }, //§ doctype-name-state - states::DoctypeName => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' - => go!(self: clear_temp; to AfterDoctypeName), - '>' => go!(self: emit_doctype; to Data), - '\0' => go!(self: error; push_doctype_name '\u{fffd}'), - c => go!(self: push_doctype_name (c.to_ascii_lowercase())), - }}, + states::DoctypeName => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => go!(self: clear_temp; to AfterDoctypeName), + '>' => go!(self: emit_doctype; to Data), + '\0' => go!(self: error; push_doctype_name '\u{fffd}'), + c => go!(self: push_doctype_name (c.to_ascii_lowercase())), + } + }, //§ after-doctype-name-state states::AfterDoctypeName => loop { @@ -1099,84 +1183,114 @@ impl Tokenizer { match get_char!(self, input) { '\t' | '\n' | '\x0C' | ' ' => (), '>' => go!(self: emit_doctype; to Data), - _ => go!(self: error; force_quirks; to BogusDoctype), + _ => go!(self: error; force_quirks; to BogusDoctype), } } }, //§ after-doctype-public-keyword-state after-doctype-system-keyword-state - states::AfterDoctypeKeyword(kind) => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' - => go!(self: to BeforeDoctypeIdentifier kind), - '"' => go!(self: error; clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind), - '\'' => go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind), - '>' => go!(self: error; force_quirks; emit_doctype; to Data), - _ => go!(self: error; force_quirks; to BogusDoctype), - }}, + states::AfterDoctypeKeyword(kind) => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier kind), + '"' => { + go!(self: error; clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind) + }, + '\'' => { + go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind) + }, + '>' => go!(self: error; force_quirks; emit_doctype; to Data), + _ => go!(self: error; force_quirks; to BogusDoctype), + } + }, //§ before-doctype-public-identifier-state before-doctype-system-identifier-state - states::BeforeDoctypeIdentifier(kind) => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' => (), - '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind), - '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind), - '>' => go!(self: error; force_quirks; emit_doctype; to Data), - _ => go!(self: error; force_quirks; to BogusDoctype), - }}, + states::BeforeDoctypeIdentifier(kind) => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => (), + '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind), + '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind), + '>' => go!(self: error; force_quirks; emit_doctype; to Data), + _ => go!(self: error; force_quirks; to BogusDoctype), + } + }, //§ doctype-public-identifier-(double-quoted)-state doctype-system-identifier-(double-quoted)-state - states::DoctypeIdentifierDoubleQuoted(kind) => loop { match get_char!(self, input) { - '"' => go!(self: to AfterDoctypeIdentifier kind), - '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'), - '>' => go!(self: error; force_quirks; emit_doctype; to Data), - c => go!(self: push_doctype_id kind c), - }}, + states::DoctypeIdentifierDoubleQuoted(kind) => loop { + match get_char!(self, input) { + '"' => go!(self: to AfterDoctypeIdentifier kind), + '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'), + '>' => go!(self: error; force_quirks; emit_doctype; to Data), + c => go!(self: push_doctype_id kind c), + } + }, //§ doctype-public-identifier-(single-quoted)-state doctype-system-identifier-(single-quoted)-state - states::DoctypeIdentifierSingleQuoted(kind) => loop { match get_char!(self, input) { - '\'' => go!(self: to AfterDoctypeIdentifier kind), - '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'), - '>' => go!(self: error; force_quirks; emit_doctype; to Data), - c => go!(self: push_doctype_id kind c), - }}, + states::DoctypeIdentifierSingleQuoted(kind) => loop { + match get_char!(self, input) { + '\'' => go!(self: to AfterDoctypeIdentifier kind), + '\0' => go!(self: error; push_doctype_id kind '\u{fffd}'), + '>' => go!(self: error; force_quirks; emit_doctype; to Data), + c => go!(self: push_doctype_id kind c), + } + }, //§ after-doctype-public-identifier-state - states::AfterDoctypeIdentifier(Public) => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' - => go!(self: to BetweenDoctypePublicAndSystemIdentifiers), - '>' => go!(self: emit_doctype; to Data), - '"' => go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System), - '\'' => go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System), - _ => go!(self: error; force_quirks; to BogusDoctype), - }}, + states::AfterDoctypeIdentifier(Public) => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => { + go!(self: to BetweenDoctypePublicAndSystemIdentifiers) + }, + '>' => go!(self: emit_doctype; to Data), + '"' => { + go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System) + }, + '\'' => { + go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System) + }, + _ => go!(self: error; force_quirks; to BogusDoctype), + } + }, //§ after-doctype-system-identifier-state - states::AfterDoctypeIdentifier(System) => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' => (), - '>' => go!(self: emit_doctype; to Data), - _ => go!(self: error; to BogusDoctype), - }}, + states::AfterDoctypeIdentifier(System) => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => (), + '>' => go!(self: emit_doctype; to Data), + _ => go!(self: error; to BogusDoctype), + } + }, //§ between-doctype-public-and-system-identifiers-state - states::BetweenDoctypePublicAndSystemIdentifiers => loop { match get_char!(self, input) { - '\t' | '\n' | '\x0C' | ' ' => (), - '>' => go!(self: emit_doctype; to Data), - '"' => go!(self: clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System), - '\'' => go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System), - _ => go!(self: error; force_quirks; to BogusDoctype), - }}, + states::BetweenDoctypePublicAndSystemIdentifiers => loop { + match get_char!(self, input) { + '\t' | '\n' | '\x0C' | ' ' => (), + '>' => go!(self: emit_doctype; to Data), + '"' => { + go!(self: clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System) + }, + '\'' => { + go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System) + }, + _ => go!(self: error; force_quirks; to BogusDoctype), + } + }, //§ bogus-doctype-state - states::BogusDoctype => loop { match get_char!(self, input) { - '>' => go!(self: emit_doctype; to Data), - _ => (), - }}, + states::BogusDoctype => loop { + match get_char!(self, input) { + '>' => go!(self: emit_doctype; to Data), + _ => (), + } + }, //§ bogus-comment-state - states::BogusComment => loop { match get_char!(self, input) { - '>' => go!(self: emit_comment; to Data), - '\0' => go!(self: push_comment '\u{fffd}'), - c => go!(self: push_comment c), - }}, + states::BogusComment => loop { + match get_char!(self, input) { + '>' => go!(self: emit_comment; to Data), + '\0' => go!(self: push_comment '\u{fffd}'), + c => go!(self: push_comment c), + } + }, //§ markup-declaration-open-state states::MarkupDeclarationOpen => loop { @@ -1185,7 +1299,10 @@ impl Tokenizer { } else if eat!(self, input, "doctype") { go!(self: to Doctype); } else { - if self.sink.adjusted_current_node_present_but_not_in_html_namespace() { + if self + .sink + .adjusted_current_node_present_but_not_in_html_namespace() + { if eat_exact!(self, input, "[CDATA[") { go!(self: clear_temp; to CdataSection); } @@ -1195,11 +1312,13 @@ impl Tokenizer { }, //§ cdata-section-state - states::CdataSection => loop { match get_char!(self, input) { - ']' => go!(self: to CdataSectionBracket), - '\0' => go!(self: emit_temp; emit '\0'), - c => go!(self: push_temp c), - }}, + states::CdataSection => loop { + match get_char!(self, input) { + ']' => go!(self: to CdataSectionBracket), + '\0' => go!(self: emit_temp; emit '\0'), + c => go!(self: push_temp c), + } + }, //§ cdata-section-bracket states::CdataSectionBracket => match get_char!(self, input) { @@ -1208,12 +1327,13 @@ impl Tokenizer { }, //§ cdata-section-end - states::CdataSectionEnd => loop { match get_char!(self, input) { - ']' => go!(self: push_temp ']'), - '>' => go!(self: emit_temp; to Data), - _ => go!(self: push_temp ']'; push_temp ']'; reconsume CdataSection), - }}, - + states::CdataSectionEnd => loop { + match get_char!(self, input) { + ']' => go!(self: push_temp ']'), + '>' => go!(self: emit_temp; to Data), + _ => go!(self: push_temp ']'; push_temp ']'; reconsume CdataSection), + } + }, //§ END } } @@ -1228,7 +1348,7 @@ impl Tokenizer { char_ref::Done => { self.process_char_ref(tok.get_result()); return ProcessResult::Continue; - } + }, char_ref::Stuck => ProcessResult::Suspend, char_ref::Progress => ProcessResult::Continue, @@ -1239,23 +1359,27 @@ impl Tokenizer { } fn process_char_ref(&mut self, char_ref: CharRef) { - let CharRef { mut chars, mut num_chars } = char_ref; + let CharRef { + mut chars, + mut num_chars, + } = char_ref; if num_chars == 0 { chars[0] = '&'; num_chars = 1; } - for i in 0 .. num_chars { + for i in 0..num_chars { let c = chars[i as usize]; match self.state { - states::Data | states::RawData(states::Rcdata) - => go!(self: emit c), + states::Data | states::RawData(states::Rcdata) => go!(self: emit c), - states::AttributeValue(_) - => go!(self: push_value c), + states::AttributeValue(_) => go!(self: push_value c), - _ => panic!("state {:?} should not be reachable in process_char_ref", self.state), + _ => panic!( + "state {:?} should not be reachable in process_char_ref", + self.state + ), } } } @@ -1270,7 +1394,7 @@ impl Tokenizer { Some(mut tok) => { tok.end_of_file(self, &mut input); self.process_char_ref(tok.get_result()); - } + }, } // Process all remaining buffered input. @@ -1295,11 +1419,14 @@ impl Tokenizer { } fn dump_profile(&self) { - let mut results: Vec<(states::State, u64)> - = self.state_profile.iter().map(|(s, t)| (*s, *t)).collect(); + let mut results: Vec<(states::State, u64)> = + self.state_profile.iter().map(|(s, t)| (*s, *t)).collect(); results.sort_by(|&(_, x), &(_, y)| y.cmp(&x)); - let total: u64 = results.iter().map(|&(_, t)| t).fold(0, ::std::ops::Add::add); + let total: u64 = results + .iter() + .map(|&(_, t)| t) + .fold(0, ::std::ops::Add::add); println!("\nTokenizer profile, in nanoseconds"); println!("\n{:12} total in token sink", self.time_in_sink); println!("\n{:12} total in tokenizer", total); @@ -1313,76 +1440,81 @@ impl Tokenizer { fn eof_step(&mut self) -> ProcessResult { debug!("processing EOF in state {:?}", self.state); match self.state { - states::Data | states::RawData(Rcdata) | states::RawData(Rawtext) - | states::RawData(ScriptData) | states::Plaintext - => go!(self: eof), - - states::TagName | states::RawData(ScriptDataEscaped(_)) - | states::BeforeAttributeName | states::AttributeName - | states::AfterAttributeName | states::BeforeAttributeValue - | states::AttributeValue(_) | states::AfterAttributeValueQuoted - | states::SelfClosingStartTag | states::ScriptDataEscapedDash(_) - | states::ScriptDataEscapedDashDash(_) - => go!(self: error_eof; to Data), - - states::TagOpen - => go!(self: error_eof; emit '<'; to Data), - - states::EndTagOpen - => go!(self: error_eof; emit '<'; emit '/'; to Data), - - states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) - => go!(self: to RawData ScriptDataEscaped DoubleEscaped), + states::Data | + states::RawData(Rcdata) | + states::RawData(Rawtext) | + states::RawData(ScriptData) | + states::Plaintext => go!(self: eof), + + states::TagName | + states::RawData(ScriptDataEscaped(_)) | + states::BeforeAttributeName | + states::AttributeName | + states::AfterAttributeName | + states::BeforeAttributeValue | + states::AttributeValue(_) | + states::AfterAttributeValueQuoted | + states::SelfClosingStartTag | + states::ScriptDataEscapedDash(_) | + states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data), + + states::TagOpen => go!(self: error_eof; emit '<'; to Data), + + states::EndTagOpen => go!(self: error_eof; emit '<'; emit '/'; to Data), + + states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => { + go!(self: to RawData ScriptDataEscaped DoubleEscaped) + }, - states::RawLessThanSign(kind) - => go!(self: emit '<'; to RawData kind), + states::RawLessThanSign(kind) => go!(self: emit '<'; to RawData kind), - states::RawEndTagOpen(kind) - => go!(self: emit '<'; emit '/'; to RawData kind), + states::RawEndTagOpen(kind) => go!(self: emit '<'; emit '/'; to RawData kind), - states::RawEndTagName(kind) - => go!(self: emit '<'; emit '/'; emit_temp; to RawData kind), + states::RawEndTagName(kind) => { + go!(self: emit '<'; emit '/'; emit_temp; to RawData kind) + }, - states::ScriptDataEscapeStart(kind) - => go!(self: to RawData ScriptDataEscaped kind), + states::ScriptDataEscapeStart(kind) => go!(self: to RawData ScriptDataEscaped kind), - states::ScriptDataEscapeStartDash - => go!(self: to RawData ScriptData), + states::ScriptDataEscapeStartDash => go!(self: to RawData ScriptData), - states::ScriptDataDoubleEscapeEnd - => go!(self: to RawData ScriptDataEscaped DoubleEscaped), + states::ScriptDataDoubleEscapeEnd => { + go!(self: to RawData ScriptDataEscaped DoubleEscaped) + }, - states::CommentStart | states::CommentStartDash - | states::Comment | states::CommentEndDash - | states::CommentEnd | states::CommentEndBang - => go!(self: error_eof; emit_comment; to Data), + states::CommentStart | + states::CommentStartDash | + states::Comment | + states::CommentEndDash | + states::CommentEnd | + states::CommentEndBang => go!(self: error_eof; emit_comment; to Data), - states::Doctype | states::BeforeDoctypeName - => go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data), + states::Doctype | states::BeforeDoctypeName => { + go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data) + }, - states::DoctypeName | states::AfterDoctypeName | states::AfterDoctypeKeyword(_) - | states::BeforeDoctypeIdentifier(_) | states::DoctypeIdentifierDoubleQuoted(_) - | states::DoctypeIdentifierSingleQuoted(_) | states::AfterDoctypeIdentifier(_) - | states::BetweenDoctypePublicAndSystemIdentifiers - => go!(self: error_eof; force_quirks; emit_doctype; to Data), + states::DoctypeName | + states::AfterDoctypeName | + states::AfterDoctypeKeyword(_) | + states::BeforeDoctypeIdentifier(_) | + states::DoctypeIdentifierDoubleQuoted(_) | + states::DoctypeIdentifierSingleQuoted(_) | + states::AfterDoctypeIdentifier(_) | + states::BetweenDoctypePublicAndSystemIdentifiers => { + go!(self: error_eof; force_quirks; emit_doctype; to Data) + }, - states::BogusDoctype - => go!(self: emit_doctype; to Data), + states::BogusDoctype => go!(self: emit_doctype; to Data), - states::BogusComment - => go!(self: emit_comment; to Data), + states::BogusComment => go!(self: emit_comment; to Data), - states::MarkupDeclarationOpen - => go!(self: error; to BogusComment), + states::MarkupDeclarationOpen => go!(self: error; to BogusComment), - states::CdataSection - => go!(self: emit_temp; error_eof; to Data), + states::CdataSection => go!(self: emit_temp; error_eof; to Data), - states::CdataSectionBracket - => go!(self: push_temp ']'; to CdataSection), + states::CdataSectionBracket => go!(self: push_temp ']'; to CdataSection), - states::CdataSectionEnd - => go!(self: push_temp ']'; push_temp ']'; to CdataSection), + states::CdataSectionEnd => go!(self: push_temp ']'; push_temp ']'; to CdataSection), } } } @@ -1391,18 +1523,18 @@ impl Tokenizer { #[allow(non_snake_case)] mod test { use super::option_push; // private items - use tendril::{StrTendril, SliceExt}; + use tendril::{SliceExt, StrTendril}; - use super::{TokenSink, Tokenizer, TokenizerOpts, TokenSinkResult}; + use super::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts}; - use super::interface::{Token, TagToken}; - use super::interface::{CharacterTokens, NullCharacterToken, EOFToken, ParseError}; - use super::interface::{TagKind, StartTag, EndTag, Tag}; + use super::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError}; + use super::interface::{EndTag, StartTag, Tag, TagKind}; + use super::interface::{TagToken, Token}; - use markup5ever::buffer_queue::{BufferQueue}; + use markup5ever::buffer_queue::BufferQueue; use std::mem::replace; - use {LocalName}; + use LocalName; // LinesMatch implements the TokenSink trait. It is used for testing to see // if current_line is being updated when process_token is called. The lines @@ -1416,9 +1548,9 @@ mod test { impl LinesMatch { fn new() -> LinesMatch { LinesMatch { - tokens: vec!(), + tokens: vec![], current_str: StrTendril::new(), - lines: vec!(), + lines: vec![], } } @@ -1433,27 +1565,28 @@ mod test { self.tokens.push(CharacterTokens(s)); } } - } impl TokenSink for LinesMatch { - type Handle = (); - fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult { - + fn process_token( + &mut self, + token: Token, + line_number: u64, + ) -> TokenSinkResult { match token { CharacterTokens(b) => { self.current_str.push_slice(&b); - } + }, NullCharacterToken => { self.current_str.push_char('\0'); - } + }, ParseError(_) => { panic!("unexpected parse error"); - } + }, TagToken(mut t) => { // The spec seems to indicate that one can emit @@ -1462,12 +1595,12 @@ mod test { match t.kind { EndTag => { t.self_closing = false; - t.attrs = vec!(); - } + t.attrs = vec![]; + }, _ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)), } self.push(TagToken(t), line_number); - } + }, EOFToken => (), @@ -1494,10 +1627,11 @@ mod test { // Create a tag token fn create_tag(token: StrTendril, tagkind: TagKind) -> Token { let name = LocalName::from(&*token); - let token = TagToken(Tag { kind: tagkind, + let token = TagToken(Tag { + kind: tagkind, name: name, self_closing: false, - attrs: vec!(), + attrs: vec![], }); token } @@ -1532,12 +1666,18 @@ mod test { initial_state: None, last_start_tag_name: None, }; - let vector = vec![StrTendril::from("\n"), StrTendril::from("\n"), - StrTendril::from("\n"), StrTendril::from("\n")]; - let expected = vec![(create_tag(StrTendril::from("a"), StartTag), 1), + let vector = vec![ + StrTendril::from("\n"), + StrTendril::from("\n"), + StrTendril::from("\n"), + StrTendril::from("\n"), + ]; + let expected = vec![ + (create_tag(StrTendril::from("a"), StartTag), 1), (create_tag(StrTendril::from("b"), StartTag), 2), (create_tag(StrTendril::from("b"), EndTag), 3), - (create_tag(StrTendril::from("a"), EndTag), 4)]; + (create_tag(StrTendril::from("a"), EndTag), 4), + ]; let results = tokenize(vector, opts); assert_eq!(results, expected); } @@ -1551,12 +1691,18 @@ mod test { initial_state: None, last_start_tag_name: None, }; - let vector = vec![StrTendril::from("\r\n"), StrTendril::from("\r\n"), - StrTendril::from("\r\n"), StrTendril::from("\r\n")]; - let expected = vec![(create_tag(StrTendril::from("a"), StartTag), 1), + let vector = vec![ + StrTendril::from("\r\n"), + StrTendril::from("\r\n"), + StrTendril::from("\r\n"), + StrTendril::from("\r\n"), + ]; + let expected = vec![ + (create_tag(StrTendril::from("a"), StartTag), 1), (create_tag(StrTendril::from("b"), StartTag), 2), (create_tag(StrTendril::from("b"), EndTag), 3), - (create_tag(StrTendril::from("a"), EndTag), 4)]; + (create_tag(StrTendril::from("a"), EndTag), 4), + ]; let results = tokenize(vector, opts); assert_eq!(results, expected); } diff --git a/html5ever/src/tokenizer/states.rs b/html5ever/src/tokenizer/states.rs index 142b48c7..d455e9a8 100644 --- a/html5ever/src/tokenizer/states.rs +++ b/html5ever/src/tokenizer/states.rs @@ -12,10 +12,10 @@ //! This is public for use by the tokenizer tests. Other library //! users should not have to care about this. -pub use self::ScriptEscapeKind::*; +pub use self::AttrValueKind::*; pub use self::DoctypeIdKind::*; pub use self::RawKind::*; -pub use self::AttrValueKind::*; +pub use self::ScriptEscapeKind::*; pub use self::State::*; #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)] diff --git a/html5ever/src/tree_builder/data.rs b/html5ever/src/tree_builder/data.rs index ae181591..477e3fb2 100644 --- a/html5ever/src/tree_builder/data.rs +++ b/html5ever/src/tree_builder/data.rs @@ -7,7 +7,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks}; +use interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; use tendril::StrTendril; use tokenizer::Doctype; @@ -75,9 +75,8 @@ static QUIRKY_PUBLIC_MATCHES: &'static [&'static str] = &[ "html", ]; -static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] = &[ - "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd", -]; +static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] = + &["http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]; static LIMITED_QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ "-//w3c//dtd xhtml 1.0 frameset//", @@ -110,15 +109,30 @@ pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool let system = opt_tendril_as_slice(&doctype.system_id); let err = match (name, public, system) { - (Some("html"), None, None) - | (Some("html"), None, Some("about:legacy-compat")) - | (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) - | (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), Some("http://www.w3.org/TR/REC-html40/strict.dtd")) - | (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) - | (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), Some("http://www.w3.org/TR/html4/strict.dtd")) - | (Some("html"), Some("-//W3C//DTD XHTML 1.0 Strict//EN"), Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")) - | (Some("html"), Some("-//W3C//DTD XHTML 1.1//EN"), Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd")) - => false, + (Some("html"), None, None) | + (Some("html"), None, Some("about:legacy-compat")) | + (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) | + ( + Some("html"), + Some("-//W3C//DTD HTML 4.0//EN"), + Some("http://www.w3.org/TR/REC-html40/strict.dtd"), + ) | + (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) | + ( + Some("html"), + Some("-//W3C//DTD HTML 4.01//EN"), + Some("http://www.w3.org/TR/html4/strict.dtd"), + ) | + ( + Some("html"), + Some("-//W3C//DTD XHTML 1.0 Strict//EN"), + Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"), + ) | + ( + Some("html"), + Some("-//W3C//DTD XHTML 1.1//EN"), + Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"), + ) => false, _ => true, }; diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index ee09e865..e1f664b3 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -11,37 +11,38 @@ //! The HTML5 tree builder. -pub use interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks}; -pub use interface::{NodeOrText, AppendNode, AppendText, Attribute}; -pub use interface::{TreeSink, Tracer, NextParserState, create_element, ElementFlags}; +pub use interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink}; +pub use interface::{AppendNode, AppendText, Attribute, NodeOrText}; +pub use interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; use self::types::*; -use {ExpandedName, QualName, LocalName, Namespace}; use tendril::StrTendril; +use {ExpandedName, LocalName, Namespace, QualName}; use tokenizer; -use tokenizer::{Doctype, StartTag, Tag, EndTag, TokenSink, TokenSinkResult}; use tokenizer::states as tok_state; +use tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult}; use util::str::is_ascii_whitespace; -use std::{slice, fmt}; use std::borrow::Cow::Borrowed; use std::collections::VecDeque; use std::default::Default; -use std::iter::{Rev, Enumerate}; +use std::iter::{Enumerate, Rev}; use std::mem::replace; +use std::{fmt, slice}; +use log::Level; use tokenizer::states::{RawData, RawKind}; -use tree_builder::types::*; use tree_builder::tag_sets::*; +use tree_builder::types::*; use util::str::to_escaped_string; -use log::Level; pub use self::PushFlag::*; -#[macro_use] mod tag_sets; +#[macro_use] +mod tag_sets; mod data; mod types; @@ -124,7 +125,6 @@ pub struct TreeBuilder { /// Form element pointer. form_elem: Option, //§ END - /// Frameset-ok flag. frameset_ok: bool, @@ -139,7 +139,6 @@ pub struct TreeBuilder { /// Track current line current_line: u64, - // WARNING: If you add new fields that contain Handles, you // must add them to trace_handles() below to preserve memory // safety! @@ -148,8 +147,9 @@ pub struct TreeBuilder { } impl TreeBuilder - where Handle: Clone, - Sink: TreeSink, +where + Handle: Clone, + Sink: TreeSink, { /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. /// @@ -161,12 +161,12 @@ impl TreeBuilder sink: sink, mode: Initial, orig_mode: None, - template_modes: vec!(), - pending_table_text: vec!(), + template_modes: vec![], + pending_table_text: vec![], quirks_mode: opts.quirks_mode, doc_handle: doc_handle, - open_elems: vec!(), - active_formatting: vec!(), + open_elems: vec![], + active_formatting: vec![], head_elem: None, form_elem: None, frameset_ok: true, @@ -181,24 +181,29 @@ impl TreeBuilder /// This is for parsing fragments. /// /// The tree builder is also a `TokenSink`. - pub fn new_for_fragment(mut sink: Sink, - context_elem: Handle, - form_elem: Option, - opts: TreeBuilderOpts) -> TreeBuilder { + pub fn new_for_fragment( + mut sink: Sink, + context_elem: Handle, + form_elem: Option, + opts: TreeBuilderOpts, + ) -> TreeBuilder { let doc_handle = sink.get_document(); - let context_is_template = - sink.elem_name(&context_elem) == expanded_name!(html "template"); + let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template"); let mut tb = TreeBuilder { opts: opts, sink: sink, mode: Initial, orig_mode: None, - template_modes: if context_is_template { vec![InTemplate] } else { vec![] }, - pending_table_text: vec!(), + template_modes: if context_is_template { + vec![InTemplate] + } else { + vec![] + }, + pending_table_text: vec![], quirks_mode: opts.quirks_mode, doc_handle: doc_handle, - open_elems: vec!(), - active_formatting: vec!(), + open_elems: vec![], + active_formatting: vec![], head_elem: None, form_elem: form_elem, frameset_ok: true, @@ -212,7 +217,7 @@ impl TreeBuilder // 5. Let root be a new html element with no attributes. // 6. Append the element root to the Document node created above. // 7. Set up the parser's stack of open elements so that it contains just the single element root. - tb.create_root(vec!()); + tb.create_root(vec![]); // 10. Reset the parser's insertion mode appropriately. tb.mode = tb.reset_insertion_mode(); @@ -224,32 +229,40 @@ impl TreeBuilder pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State { let elem = self.context_elem.as_ref().expect("no context element"); let name = match self.sink.elem_name(elem) { - ExpandedName { ns: &ns!(html), local } => local, - _ => return tok_state::Data + ExpandedName { + ns: &ns!(html), + local, + } => local, + _ => return tok_state::Data, }; match *name { local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata), - local_name!("style") | local_name!("xmp") | local_name!("iframe") - | local_name!("noembed") | local_name!("noframes") => tok_state::RawData(tok_state::Rawtext), + local_name!("style") | + local_name!("xmp") | + local_name!("iframe") | + local_name!("noembed") | + local_name!("noframes") => tok_state::RawData(tok_state::Rawtext), local_name!("script") => tok_state::RawData(tok_state::ScriptData), - local_name!("noscript") => if self.opts.scripting_enabled { - tok_state::RawData(tok_state::Rawtext) - } else { - tok_state::Data + local_name!("noscript") => { + if self.opts.scripting_enabled { + tok_state::RawData(tok_state::Rawtext) + } else { + tok_state::Data + } }, local_name!("plaintext") => tok_state::Plaintext, - _ => tok_state::Data + _ => tok_state::Data, } } /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's /// internal state. This is intended to support garbage-collected DOMs. - pub fn trace_handles(&self, tracer: &Tracer) { + pub fn trace_handles(&self, tracer: &Tracer) { tracer.trace_handle(&self.doc_handle); for e in &self.open_elems { tracer.trace_handle(e); @@ -287,7 +300,7 @@ impl TreeBuilder ns!(html) => print!(" {}", name.local), _ => panic!(), } - } + }, } } println!(""); @@ -295,7 +308,11 @@ impl TreeBuilder fn debug_step(&self, mode: InsertionMode, token: &Token) { if log_enabled!(Level::Debug) { - debug!("processing {} in insertion mode {:?}", to_escaped_string(token), mode); + debug!( + "processing {} in insertion mode {:?}", + to_escaped_string(token), + mode + ); } } @@ -305,8 +322,14 @@ impl TreeBuilder let mut more_tokens = VecDeque::new(); loop { - let should_have_acknowledged_self_closing_flag = - matches!(token, TagToken(Tag { self_closing: true, kind: StartTag, .. })); + let should_have_acknowledged_self_closing_flag = matches!( + token, + TagToken(Tag { + self_closing: true, + kind: StartTag, + .. + }) + ); let result = if self.is_foreign(&token) { self.step_foreign(token) } else { @@ -316,20 +339,27 @@ impl TreeBuilder match result { Done => { if should_have_acknowledged_self_closing_flag { - self.sink.parse_error(Borrowed("Unacknowledged self-closing tag")); + self.sink + .parse_error(Borrowed("Unacknowledged self-closing tag")); } - token = unwrap_or_return!(more_tokens.pop_front(), tokenizer::TokenSinkResult::Continue); - } + token = unwrap_or_return!( + more_tokens.pop_front(), + tokenizer::TokenSinkResult::Continue + ); + }, DoneAckSelfClosing => { - token = unwrap_or_return!(more_tokens.pop_front(), tokenizer::TokenSinkResult::Continue); - } + token = unwrap_or_return!( + more_tokens.pop_front(), + tokenizer::TokenSinkResult::Continue + ); + }, Reprocess(m, t) => { self.mode = m; token = t; - } + }, ReprocessForeign(t) => { token = t; - } + }, SplitWhitespace(mut buf) => { let p = buf.pop_front_char_run(is_ascii_whitespace); let (first, is_ws) = unwrap_or_return!(p, tokenizer::TokenSinkResult::Continue); @@ -339,19 +369,19 @@ impl TreeBuilder if buf.len32() > 0 { more_tokens.push_back(CharacterTokens(NotSplit, buf)); } - } + }, Script(node) => { assert!(more_tokens.is_empty()); return tokenizer::TokenSinkResult::Script(node); - } + }, ToPlaintext => { assert!(more_tokens.is_empty()); return tokenizer::TokenSinkResult::Plaintext; - } + }, ToRawData(k) => { assert!(more_tokens.is_empty()); return tokenizer::TokenSinkResult::RawData(k); - } + }, } } } @@ -362,9 +392,10 @@ impl TreeBuilder } /// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node - fn appropriate_place_for_insertion(&mut self, - override_target: Option) - -> InsertionPoint { + fn appropriate_place_for_insertion( + &mut self, + override_target: Option, + ) -> InsertionPoint { use self::tag_sets::*; declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr"); @@ -401,22 +432,28 @@ impl TreeBuilder match insertion_point { LastChild(parent) => self.sink.append(&parent, child), BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child), - TableFosterParenting { element, prev_element } => self.sink.append_based_on_parent_node( - &element, - &prev_element, - child), + TableFosterParenting { + element, + prev_element, + } => self + .sink + .append_based_on_parent_node(&element, &prev_element, child), } } } -impl TokenSink - for TreeBuilder - where Handle: Clone, - Sink: TreeSink, +impl TokenSink for TreeBuilder +where + Handle: Clone, + Sink: TreeSink, { type Handle = Handle; - fn process_token(&mut self, token: tokenizer::Token, line_number: u64) -> TokenSinkResult { + fn process_token( + &mut self, + token: tokenizer::Token, + line_number: u64, + ) -> TokenSinkResult { if line_number != self.current_line { self.sink.set_current_line(line_number); } @@ -427,34 +464,45 @@ impl TokenSink tokenizer::ParseError(e) => { self.sink.parse_error(e); return tokenizer::TokenSinkResult::Continue; - } + }, + + tokenizer::DoctypeToken(dt) => { + if self.mode == Initial { + let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc); + if err { + self.sink.parse_error(format_if!( + self.opts.exact_errors, + "Bad DOCTYPE", + "Bad DOCTYPE: {:?}", + dt + )); + } + let Doctype { + name, + public_id, + system_id, + force_quirks: _, + } = dt; + if !self.opts.drop_doctype { + self.sink.append_doctype_to_document( + name.unwrap_or(StrTendril::new()), + public_id.unwrap_or(StrTendril::new()), + system_id.unwrap_or(StrTendril::new()), + ); + } + self.set_quirks_mode(quirk); - tokenizer::DoctypeToken(dt) => if self.mode == Initial { - let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc); - if err { + self.mode = BeforeHtml; + return tokenizer::TokenSinkResult::Continue; + } else { self.sink.parse_error(format_if!( self.opts.exact_errors, - "Bad DOCTYPE", - "Bad DOCTYPE: {:?}", dt)); - } - let Doctype { name, public_id, system_id, force_quirks: _ } = dt; - if !self.opts.drop_doctype { - self.sink.append_doctype_to_document( - name.unwrap_or(StrTendril::new()), - public_id.unwrap_or(StrTendril::new()), - system_id.unwrap_or(StrTendril::new()) - ); + "DOCTYPE in body", + "DOCTYPE in insertion mode {:?}", + self.mode + )); + return tokenizer::TokenSinkResult::Continue; } - self.set_quirks_mode(quirk); - - self.mode = BeforeHtml; - return tokenizer::TokenSinkResult::Continue; - } else { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "DOCTYPE in body", - "DOCTYPE in insertion mode {:?}", self.mode)); - return tokenizer::TokenSinkResult::Continue; }, tokenizer::TagToken(x) => TagToken(x), @@ -470,7 +518,7 @@ impl TokenSink return tokenizer::TokenSinkResult::Continue; } CharacterTokens(NotSplit, x) - } + }, }; self.process_to_completion(token) @@ -484,12 +532,12 @@ impl TokenSink fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool { !self.open_elems.is_empty() && - self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html) + self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html) } } pub fn html_elem(open_elems: &[Handle]) -> &Handle { - &open_elems[0] + &open_elems[0] } pub struct ActiveFormattingIter<'a, Handle: 'a> { @@ -530,19 +578,23 @@ macro_rules! qualname { ns: ns!($ns), local: local_name!($local), } - } + }; } #[doc(hidden)] impl TreeBuilder - where Handle: Clone, - Sink: TreeSink, +where + Handle: Clone, + Sink: TreeSink, { fn unexpected(&mut self, _thing: &T) -> ProcessResult { self.sink.parse_error(format_if!( self.opts.exact_errors, "Unexpected token", - "Unexpected token {} in insertion mode {:?}", to_escaped_string(_thing), self.mode)); + "Unexpected token {} in insertion mode {:?}", + to_escaped_string(_thing), + self.mode + )); Done } @@ -559,14 +611,10 @@ impl TreeBuilder } fn position_in_active_formatting(&self, element: &Handle) -> Option { - self.active_formatting - .iter() - .position(|n| { - match n { - &Marker => false, - &Element(ref handle, _) => self.sink.same_node(handle, element) - } - }) + self.active_formatting.iter().position(|n| match n { + &Marker => false, + &Element(ref handle, _) => self.sink.same_node(handle, element), + }) } fn set_quirks_mode(&mut self, mode: QuirksMode) { @@ -610,7 +658,8 @@ impl TreeBuilder } fn current_node_in(&self, set: TagSet) -> bool - where TagSet: Fn(ExpandedName) -> bool + where + TagSet: Fn(ExpandedName) -> bool, { set(self.sink.elem_name(self.current_node())) } @@ -624,7 +673,10 @@ impl TreeBuilder fn adoption_agency(&mut self, subject: LocalName) { // 1. if self.current_node_named(subject.clone()) { - if self.position_in_active_formatting(self.current_node()).is_none() { + if self + .position_in_active_formatting(self.current_node()) + .is_none() + { self.pop(); return; } @@ -639,47 +691,49 @@ impl TreeBuilder .filter(|&(_, _, tag)| tag.name == subject) .next() .map(|(i, h, t)| (i, h.clone(), t.clone())), - { self.process_end_tag_in_body(Tag { kind: EndTag, name: subject, self_closing: false, - attrs: vec!(), + attrs: vec![], }); } ); let fmt_elem_stack_index = unwrap_or_return!( - self.open_elems.iter() + self.open_elems + .iter() .rposition(|n| self.sink.same_node(n, &fmt_elem)), - { - self.sink.parse_error(Borrowed("Formatting element not open")); + self.sink + .parse_error(Borrowed("Formatting element not open")); self.active_formatting.remove(fmt_elem_index); } ); // 7. if !self.in_scope(default_scope, |n| self.sink.same_node(&n, &fmt_elem)) { - self.sink.parse_error(Borrowed("Formatting element not in scope")); + self.sink + .parse_error(Borrowed("Formatting element not in scope")); return; } // 8. if !self.sink.same_node(self.current_node(), &fmt_elem) { - self.sink.parse_error(Borrowed("Formatting element not current node")); + self.sink + .parse_error(Borrowed("Formatting element not current node")); } // 9. let (furthest_block_index, furthest_block) = unwrap_or_return!( - self.open_elems.iter() + self.open_elems + .iter() .enumerate() .skip(fmt_elem_stack_index) .filter(|&(_, open_element)| self.elem_in(open_element, special_tag)) .next() .map(|(i, h)| (i, h.clone())), - // 10. { self.open_elems.truncate(fmt_elem_stack_index); @@ -723,7 +777,6 @@ impl TreeBuilder let node_formatting_index = unwrap_or_else!( self.position_in_active_formatting(&node), - // 13.6. { self.open_elems.remove(node_index); @@ -736,14 +789,16 @@ impl TreeBuilder Element(ref h, ref t) => { assert!(self.sink.same_node(h, &node)); t.clone() - } + }, Marker => panic!("Found marker during adoption agency"), }; // FIXME: Is there a way to avoid cloning the attributes twice here (once on their // own, once as part of t.clone() above)? let new_element = create_element( - &mut self.sink, QualName::new(None, ns!(html), tag.name.clone()), - tag.attrs.clone()); + &mut self.sink, + QualName::new(None, ns!(html), tag.name.clone()), + tag.attrs.clone(), + ); self.open_elems[node_index] = new_element.clone(); self.active_formatting[node_formatting_index] = Element(new_element.clone(), tag); node = new_element; @@ -771,41 +826,51 @@ impl TreeBuilder // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, // once as part of t.clone() above)? let new_element = create_element( - &mut self.sink, QualName::new(None, ns!(html), fmt_elem_tag.name.clone()), - fmt_elem_tag.attrs.clone()); + &mut self.sink, + QualName::new(None, ns!(html), fmt_elem_tag.name.clone()), + fmt_elem_tag.attrs.clone(), + ); let new_entry = Element(new_element.clone(), fmt_elem_tag); // 16. self.sink.reparent_children(&furthest_block, &new_element); // 17. - self.sink.append(&furthest_block, AppendNode(new_element.clone())); + self.sink + .append(&furthest_block, AppendNode(new_element.clone())); // 18. // FIXME: We could probably get rid of the position_in_active_formatting() calls here // if we had a more clever Bookmark representation. match bookmark { Bookmark::Replace(to_replace) => { - let index = self.position_in_active_formatting(&to_replace) + let index = self + .position_in_active_formatting(&to_replace) .expect("bookmark not found in active formatting elements"); self.active_formatting[index] = new_entry; - } + }, Bookmark::InsertAfter(previous) => { - let index = self.position_in_active_formatting(&previous) - .expect("bookmark not found in active formatting elements") + 1; + let index = self + .position_in_active_formatting(&previous) + .expect("bookmark not found in active formatting elements") + + 1; self.active_formatting.insert(index, new_entry); - let old_index = self.position_in_active_formatting(&fmt_elem) + let old_index = self + .position_in_active_formatting(&fmt_elem) .expect("formatting element not found in active formatting elements"); self.active_formatting.remove(old_index); - } + }, } // 19. self.remove_from_stack(&fmt_elem); - let new_furthest_block_index = self.open_elems.iter() + let new_furthest_block_index = self + .open_elems + .iter() .position(|n| self.sink.same_node(n, &furthest_block)) .expect("furthest block missing from open element stack"); - self.open_elems.insert(new_furthest_block_index + 1, new_element); + self.open_elems + .insert(new_furthest_block_index + 1, new_element); // 20. } @@ -823,7 +888,8 @@ impl TreeBuilder fn remove_from_stack(&mut self, elem: &Handle) { let sink = &mut self.sink; - let position = self.open_elems + let position = self + .open_elems .iter() .rposition(|x| sink.same_node(elem, &x)); if let Some(position) = position { @@ -835,11 +901,11 @@ impl TreeBuilder fn is_marker_or_open(&self, entry: &FormatEntry) -> bool { match *entry { Marker => true, - Element(ref node, _) => { - self.open_elems.iter() - .rev() - .any(|n| self.sink.same_node(&n, &node)) - } + Element(ref node, _) => self + .open_elems + .iter() + .rev() + .any(|n| self.sink.same_node(&n, &node)), } } @@ -848,19 +914,19 @@ impl TreeBuilder { let last = unwrap_or_return!(self.active_formatting.last(), ()); if self.is_marker_or_open(last) { - return + return; } } let mut entry_index = self.active_formatting.len() - 1; loop { if entry_index == 0 { - break + break; } entry_index -= 1; if self.is_marker_or_open(&self.active_formatting[entry_index]) { entry_index += 1; - break + break; } } @@ -872,11 +938,11 @@ impl TreeBuilder // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, // once as part of t.clone() above)? - let new_element = self.insert_element(Push, ns!(html), tag.name.clone(), - tag.attrs.clone()); + let new_element = + self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone()); self.active_formatting[entry_index] = Element(new_element, tag); if entry_index == self.active_formatting.len() - 1 { - break + break; } entry_index += 1; } @@ -884,7 +950,7 @@ impl TreeBuilder /// Get the first element on the stack, which will be the element. fn html_elem(&self) -> &Handle { - &self.open_elems[0] + &self.open_elems[0] } /// Get the second element on the stack, if it's a HTML body element. @@ -913,11 +979,14 @@ impl TreeBuilder { let name = self.sink.elem_name(elem); if body_end_ok(name) { - continue + continue; } - error = format_if!(self.opts.exact_errors, + error = format_if!( + self.opts.exact_errors, "Unexpected open tag at end of body", - "Unexpected open tag {:?} at end of body", name); + "Unexpected open tag {:?} at end of body", + name + ); } self.sink.parse_error(error); // FIXME: Do we keep checking after finding one bad tag? @@ -926,8 +995,10 @@ impl TreeBuilder } } - fn in_scope(&self, scope: TagSet, pred: Pred) -> bool - where TagSet: Fn(ExpandedName) -> bool, Pred: Fn(Handle) -> bool + fn in_scope(&self, scope: TagSet, pred: Pred) -> bool + where + TagSet: Fn(ExpandedName) -> bool, + Pred: Fn(Handle) -> bool, { for node in self.open_elems.iter().rev() { if pred(node.clone()) { @@ -944,7 +1015,8 @@ impl TreeBuilder } fn elem_in(&self, elem: &Handle, set: TagSet) -> bool - where TagSet: Fn(ExpandedName) -> bool + where + TagSet: Fn(ExpandedName) -> bool, { set(self.sink.elem_name(elem)) } @@ -955,7 +1027,9 @@ impl TreeBuilder } fn in_html_elem_named(&self, name: LocalName) -> bool { - self.open_elems.iter().any(|elem| self.html_elem_named(elem, name.clone())) + self.open_elems + .iter() + .any(|elem| self.html_elem_named(elem, name.clone())) } fn current_node_named(&self, name: LocalName) -> bool { @@ -963,20 +1037,24 @@ impl TreeBuilder } fn in_scope_named(&self, scope: TagSet, name: LocalName) -> bool - where TagSet: Fn(ExpandedName) -> bool + where + TagSet: Fn(ExpandedName) -> bool, { self.in_scope(scope, |elem| self.html_elem_named(&elem, name.clone())) } //§ closing-elements-that-have-implied-end-tags fn generate_implied_end(&mut self, set: TagSet) - where TagSet: Fn(ExpandedName) -> bool + where + TagSet: Fn(ExpandedName) -> bool, { loop { { let elem = unwrap_or_return!(self.open_elems.last(), ()); let nsname = self.sink.elem_name(elem); - if !set(nsname) { return; } + if !set(nsname) { + return; + } } self.pop(); } @@ -995,7 +1073,8 @@ impl TreeBuilder // Pop elements until the current element is in the set. fn pop_until_current(&mut self, pred: TagSet) - where TagSet: Fn(ExpandedName) -> bool + where + TagSet: Fn(ExpandedName) -> bool, { loop { if self.current_node_in(|x| pred(x)) { @@ -1008,14 +1087,19 @@ impl TreeBuilder // Pop elements until an element from the set has been popped. Returns the // number of elements popped. fn pop_until

(&mut self, pred: P) -> usize - where P: Fn(ExpandedName) -> bool + where + P: Fn(ExpandedName) -> bool, { let mut n = 0; loop { n += 1; match self.open_elems.pop() { None => break, - Some(elem) => if pred(self.sink.elem_name(&elem)) { break; }, + Some(elem) => { + if pred(self.sink.elem_name(&elem)) { + break; + } + }, } } n @@ -1029,9 +1113,12 @@ impl TreeBuilder // Signal an error if it was not the first one. fn expect_to_close(&mut self, name: LocalName) { if self.pop_until_named(name.clone()) != 1 { - self.sink.parse_error(format_if!(self.opts.exact_errors, + self.sink.parse_error(format_if!( + self.opts.exact_errors, "Unexpected open element", - "Unexpected open element while closing {:?}", name)); + "Unexpected open element while closing {:?}", + name + )); } } @@ -1049,7 +1136,11 @@ impl TreeBuilder // Check tags for type=hidden fn is_type_hidden(&self, tag: &Tag) -> bool { - match tag.attrs.iter().find(|&at| at.name.expanded() == expanded_name!("", "type")) { + match tag + .attrs + .iter() + .find(|&at| at.name.expanded() == expanded_name!("", "type")) + { None => false, Some(at) => (&*at.value).eq_ignore_ascii_case("hidden"), } @@ -1071,9 +1162,12 @@ impl TreeBuilder self.orig_mode = Some(self.mode); Reprocess(InTableText, token) } else { - self.sink.parse_error(format_if!(self.opts.exact_errors, + self.sink.parse_error(format_if!( + self.opts.exact_errors, "Unexpected characters in table", - "Unexpected characters {} in table", to_escaped_string(&token))); + "Unexpected characters {} in table", + to_escaped_string(&token) + )); self.foster_parent_in_body(token) } } @@ -1086,7 +1180,10 @@ impl TreeBuilder node = ctx; } let name = match self.sink.elem_name(node) { - ExpandedName { ns: &ns!(html), local } => local, + ExpandedName { + ns: &ns!(html), + local, + } => local, _ => continue, }; match *name { @@ -1100,14 +1197,24 @@ impl TreeBuilder } return InSelect; }, - local_name!("td") | local_name!("th") => if !last { return InCell; }, + local_name!("td") | local_name!("th") => { + if !last { + return InCell; + } + }, local_name!("tr") => return InRow, - local_name!("tbody") | local_name!("thead") | local_name!("tfoot") => return InTableBody, + local_name!("tbody") | local_name!("thead") | local_name!("tfoot") => { + return InTableBody; + }, local_name!("caption") => return InCaption, local_name!("colgroup") => return InColumnGroup, local_name!("table") => return InTable, local_name!("template") => return *self.template_modes.last().unwrap(), - local_name!("head") => if !last { return InHead }, + local_name!("head") => { + if !last { + return InHead; + } + }, local_name!("body") => return InBody, local_name!("frameset") => return InFrameset, local_name!("html") => match self.head_elem { @@ -1124,7 +1231,8 @@ impl TreeBuilder fn close_the_cell(&mut self) { self.generate_implied_end(cursory_implied_end); if self.pop_until(td_th) != 1 { - self.sink.parse_error(Borrowed("expected to close or with cell")); + self.sink + .parse_error(Borrowed("expected to close or with cell")); } self.clear_active_formatting_to_marker(); } @@ -1156,16 +1264,23 @@ impl TreeBuilder //§ creating-and-inserting-nodes fn create_root(&mut self, attrs: Vec) { let elem = create_element( - &mut self.sink, QualName::new(None, ns!(html), local_name!("html")), - attrs); + &mut self.sink, + QualName::new(None, ns!(html), local_name!("html")), + attrs, + ); self.push(&elem); self.sink.append(&self.doc_handle, AppendNode(elem)); // FIXME: application cache selection algorithm } // https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token - fn insert_element(&mut self, push: PushFlag, ns: Namespace, name: LocalName, attrs: Vec) - -> Handle { + fn insert_element( + &mut self, + push: PushFlag, + ns: Namespace, + name: LocalName, + attrs: Vec, + ) -> Handle { declare_tag_set!(form_associatable = "button" "fieldset" "input" "object" "output" "select" "textarea" "img"); @@ -1178,18 +1293,22 @@ impl TreeBuilder let insertion_point = self.appropriate_place_for_insertion(None); let (node1, node2) = match insertion_point { - LastChild(ref p) | - BeforeSibling(ref p) => (p.clone(), None), - TableFosterParenting { ref element, ref prev_element } => (element.clone(), Some(prev_element.clone())), + LastChild(ref p) | BeforeSibling(ref p) => (p.clone(), None), + TableFosterParenting { + ref element, + ref prev_element, + } => (element.clone(), Some(prev_element.clone())), }; // Step 12. if form_associatable(qname.expanded()) && - self.form_elem.is_some() && - !self.in_html_elem_named(local_name!("template")) && - !(listed(qname.expanded()) && - attrs.iter().any(|a| a.name.expanded() == expanded_name!("", "form"))) { - + self.form_elem.is_some() && + !self.in_html_elem_named(local_name!("template")) && + !(listed(qname.expanded()) && + attrs + .iter() + .any(|a| a.name.expanded() == expanded_name!("", "form"))) + { let form = self.form_elem.as_ref().unwrap().clone(); let node2 = match node2 { Some(ref n) => Some(n), @@ -1217,7 +1336,7 @@ impl TreeBuilder } fn insert_phantom(&mut self, name: LocalName) -> Handle { - self.insert_element(Push, ns!(html), name, vec!()) + self.insert_element(Push, ns!(html), name, vec![]) } //§ END @@ -1233,7 +1352,8 @@ impl TreeBuilder } if matches >= 3 { - self.active_formatting.remove(first_match.expect("matches with no index")); + self.active_formatting + .remove(first_match.expect("matches with no index")); } let elem = self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone()); @@ -1260,7 +1380,8 @@ impl TreeBuilder } if self.elem_in(elem, special_tag) { - self.sink.parse_error(Borrowed("Found special tag while closing generic tag")); + self.sink + .parse_error(Borrowed("Found special tag while closing generic tag")); return; } } @@ -1272,7 +1393,7 @@ impl TreeBuilder // element is in special_tag. self.unexpected(&tag); return; - } + }, Some(x) => x, }; @@ -1291,7 +1412,6 @@ impl TreeBuilder .filter(|&(_, n, _)| self.html_elem_named(n, local_name!("a"))) .next() .map(|(_, n, _)| n.clone()), - () ); @@ -1320,8 +1440,13 @@ impl TreeBuilder if mathml_text_integration_point(name) { match *token { CharacterTokens(..) | NullCharacterToken => return false, - TagToken(Tag { kind: StartTag, ref name, .. }) - if !matches!(*name, local_name!("mglyph") | local_name!("malignmark")) => return false, + TagToken(Tag { + kind: StartTag, + ref name, + .. + }) if !matches!(*name, local_name!("mglyph") | local_name!("malignmark")) => { + return false; + }, _ => (), } } @@ -1336,13 +1461,17 @@ impl TreeBuilder if let expanded_name!(mathml "annotation-xml") = name { match *token { - TagToken(Tag { kind: StartTag, name: local_name!("svg"), .. }) => return false, - CharacterTokens(..) | NullCharacterToken | - TagToken(Tag { kind: StartTag, .. }) => { - return !self.sink.is_mathml_annotation_xml_integration_point( - self.adjusted_current_node()) - } - _ => {} + TagToken(Tag { + kind: StartTag, + name: local_name!("svg"), + .. + }) => return false, + CharacterTokens(..) | NullCharacterToken | TagToken(Tag { kind: StartTag, .. }) => { + return !self + .sink + .is_mathml_annotation_xml_integration_point(self.adjusted_current_node()); + }, + _ => {}, }; } @@ -1412,7 +1541,8 @@ impl TreeBuilder } fn adjust_attributes(&mut self, tag: &mut Tag, mut map: F) - where F: FnMut(LocalName) -> Option, + where + F: FnMut(LocalName) -> Option, { for &mut Attribute { ref mut name, .. } in &mut tag.attrs { if let Some(replacement) = map(name.local.clone()) { @@ -1517,7 +1647,7 @@ impl TreeBuilder ns!(svg) => { self.adjust_svg_tag_name(&mut tag); self.adjust_svg_attributes(&mut tag); - } + }, _ => (), } self.adjust_foreign_attributes(&mut tag); @@ -1539,8 +1669,8 @@ impl TreeBuilder self.pop(); while !self.current_node_in(|n| { *n.ns == ns!(html) || - mathml_text_integration_point(n) || - svg_html_integration_point(n) + mathml_text_integration_point(n) || + svg_html_integration_point(n) }) { self.pop(); } @@ -1552,34 +1682,34 @@ impl TreeBuilder #[cfg(test)] #[allow(non_snake_case)] mod test { - use markup5ever::interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks}; - use markup5ever::interface::{NodeOrText, AppendNode, AppendText}; - use markup5ever::interface::{TreeSink, Tracer, ElementFlags}; + use markup5ever::interface::{AppendNode, AppendText, NodeOrText}; + use markup5ever::interface::{ElementFlags, Tracer, TreeSink}; + use markup5ever::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; use super::types::*; + use tendril::stream::{TendrilSink, Utf8LossyDecoder}; + use tendril::StrTendril; use ExpandedName; use QualName; - use tendril::StrTendril; - use tendril::stream::{TendrilSink, Utf8LossyDecoder}; use tokenizer; - use tokenizer::{Tokenizer, TokenizerOpts}; - use tokenizer::{Doctype, StartTag, Tag, TokenSink}; use tokenizer::states as tok_state; + use tokenizer::{Doctype, StartTag, Tag, TokenSink}; + use tokenizer::{Tokenizer, TokenizerOpts}; use util::str::is_ascii_whitespace; - use std::default::Default; - use std::mem::replace; use std::borrow::Cow; use std::borrow::Cow::Borrowed; use std::collections::VecDeque; + use std::default::Default; + use std::mem::replace; + use super::{TreeBuilder, TreeBuilderOpts}; use driver::*; - use super::{TreeBuilderOpts, TreeBuilder}; use markup5ever::Attribute; - use rcdom::{Node, Handle, RcDom, NodeData}; + use rcdom::{Handle, Node, NodeData, RcDom}; pub struct LineCountingDOM { pub line_vec: Vec<(QualName, u64)>, @@ -1590,7 +1720,9 @@ mod test { impl TreeSink for LineCountingDOM { type Output = Self; - fn finish(self) -> Self { self } + fn finish(self) -> Self { + self + } type Handle = Handle; @@ -1618,8 +1750,12 @@ mod test { self.rcdom.elem_name(target) } - fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) - -> Handle { + fn create_element( + &mut self, + name: QualName, + attrs: Vec, + flags: ElementFlags, + ) -> Handle { self.line_vec.push((name.clone(), self.current_line)); self.rcdom.create_element(name, attrs, flags) } @@ -1636,9 +1772,7 @@ mod test { self.rcdom.append(parent, child) } - fn append_before_sibling(&mut self, - sibling: &Handle, - child: NodeOrText) { + fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText) { self.rcdom.append_before_sibling(sibling, child) } @@ -1646,15 +1780,20 @@ mod test { &mut self, element: &Handle, prev_element: &Handle, - child: NodeOrText) { - self.rcdom.append_based_on_parent_node(element, prev_element, child) + child: NodeOrText, + ) { + self.rcdom + .append_based_on_parent_node(element, prev_element, child) } - fn append_doctype_to_document(&mut self, - name: StrTendril, - public_id: StrTendril, - system_id: StrTendril) { - self.rcdom.append_doctype_to_document(name, public_id, system_id); + fn append_doctype_to_document( + &mut self, + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + ) { + self.rcdom + .append_doctype_to_document(name, public_id, system_id); } fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec) { @@ -1682,10 +1821,10 @@ mod test { fn check_four_lines() { // Input let sink = LineCountingDOM { - line_vec: vec!(), - current_line: 1, - rcdom: RcDom::default(), - }; + line_vec: vec![], + current_line: 1, + rcdom: RcDom::default(), + }; let opts = ParseOpts::default(); let mut resultTok = parse_document(sink, opts); resultTok.process(StrTendril::from("\n")); @@ -1695,11 +1834,13 @@ mod test { // Actual Output let actual = resultTok.finish(); // Expected Output - let expected = vec![(QualName::new(None, ns!(html), local_name!("html")), 1), - (QualName::new(None, ns!(html), local_name!("head")), 1), - (QualName::new(None, ns!(html), local_name!("body")), 1), - (QualName::new(None, ns!(html), local_name!("a")), 1), - (QualName::new(None, ns!(html), local_name!("b")), 3)]; + let expected = vec![ + (QualName::new(None, ns!(html), local_name!("html")), 1), + (QualName::new(None, ns!(html), local_name!("head")), 1), + (QualName::new(None, ns!(html), local_name!("body")), 1), + (QualName::new(None, ns!(html), local_name!("a")), 1), + (QualName::new(None, ns!(html), local_name!("b")), 3), + ]; // Assertion assert_eq!(actual.line_vec, expected); } diff --git a/html5ever/src/tree_builder/rules.rs b/html5ever/src/tree_builder/rules.rs index 5b5b8bc4..fd77de7f 100644 --- a/html5ever/src/tree_builder/rules.rs +++ b/html5ever/src/tree_builder/rules.rs @@ -9,9 +9,9 @@ // The tree builder rules, as a single, enormous nested match expression. -use tree_builder::types::*; +use tokenizer::states::{Plaintext, Rawtext, Rcdata, ScriptData}; use tree_builder::tag_sets::*; -use tokenizer::states::{Rcdata, Rawtext, ScriptData, Plaintext}; +use tree_builder::types::*; use std::borrow::ToOwned; @@ -28,8 +28,9 @@ fn current_node(open_elems: &[Handle]) -> &Handle { #[doc(hidden)] impl TreeBuilder - where Handle: Clone, - Sink: TreeSink, +where + Handle: Clone, + Sink: TreeSink, { fn step(&mut self, mode: InsertionMode, token: Token) -> ProcessResult { self.debug_step(mode, &token); diff --git a/html5ever/src/tree_builder/tag_sets.rs b/html5ever/src/tree_builder/tag_sets.rs index f7ce8dc0..3db5a8b9 100644 --- a/html5ever/src/tree_builder/tag_sets.rs +++ b/html5ever/src/tree_builder/tag_sets.rs @@ -43,16 +43,23 @@ macro_rules! declare_tag_set ( ); ); -#[inline(always)] pub fn empty_set(_: ExpandedName) -> bool { false } -#[inline(always)] pub fn full_set(_: ExpandedName) -> bool { true } +#[inline(always)] +pub fn empty_set(_: ExpandedName) -> bool { + false +} +#[inline(always)] +pub fn full_set(_: ExpandedName) -> bool { + true +} declare_tag_set!(pub html_default_scope = "applet" "caption" "html" "table" "td" "th" "marquee" "object" "template"); -#[inline(always)] pub fn default_scope(name: ExpandedName) -> bool { +#[inline(always)] +pub fn default_scope(name: ExpandedName) -> bool { html_default_scope(name) || - mathml_text_integration_point(name) || - svg_html_integration_point(name) + mathml_text_integration_point(name) || + svg_html_integration_point(name) } declare_tag_set!(pub list_item_scope = [default_scope] + "ol" "ul"); @@ -84,19 +91,23 @@ declare_tag_set!(pub special_tag = //§ END pub fn mathml_text_integration_point(p: ExpandedName) -> bool { - matches!(p, + matches!( + p, expanded_name!(mathml "mi") | - expanded_name!(mathml "mo") | - expanded_name!(mathml "mn") | - expanded_name!(mathml "ms") | - expanded_name!(mathml "mtext")) + expanded_name!(mathml "mo") | + expanded_name!(mathml "mn") | + expanded_name!(mathml "ms") | + expanded_name!(mathml "mtext") + ) } /// https://html.spec.whatwg.org/multipage/#html-integration-point pub fn svg_html_integration_point(p: ExpandedName) -> bool { // annotation-xml are handle in another place - matches!(p, + matches!( + p, expanded_name!(svg "foreignObject") | - expanded_name!(svg "desc") | - expanded_name!(svg "title")) + expanded_name!(svg "desc") | + expanded_name!(svg "title") + ) } diff --git a/html5ever/src/tree_builder/types.rs b/html5ever/src/tree_builder/types.rs index 1192472c..2b76551b 100644 --- a/html5ever/src/tree_builder/types.rs +++ b/html5ever/src/tree_builder/types.rs @@ -9,17 +9,17 @@ //! Types used within the tree builder code. Not exported to users. -use tokenizer::Tag; use tokenizer::states::RawKind; +use tokenizer::Tag; use tendril::StrTendril; +pub use self::FormatEntry::*; pub use self::InsertionMode::*; +pub use self::InsertionPoint::*; +pub use self::ProcessResult::*; pub use self::SplitStatus::*; pub use self::Token::*; -pub use self::ProcessResult::*; -pub use self::FormatEntry::*; -pub use self::InsertionPoint::*; #[derive(PartialEq, Eq, Copy, Clone, Debug)] pub enum InsertionMode { @@ -88,5 +88,8 @@ pub enum InsertionPoint { /// Insert before this following sibling. BeforeSibling(Handle), /// Insertion point is decided based on existence of element's parent node. - TableFosterParenting { element: Handle, prev_element: Handle }, + TableFosterParenting { + element: Handle, + prev_element: Handle, + }, } diff --git a/html5ever/src/util/str.rs b/html5ever/src/util/str.rs index 36a2e6f8..6876e437 100644 --- a/html5ever/src/util/str.rs +++ b/html5ever/src/util/str.rs @@ -19,9 +19,9 @@ pub fn to_escaped_string(x: &T) -> String { /// letter, otherwise None. pub fn lower_ascii_letter(c: char) -> Option { match c { - 'a' ... 'z' => Some(c), - 'A' ... 'Z' => Some((c as u8 - b'A' + b'a') as char), - _ => None + 'a'...'z' => Some(c), + 'A'...'Z' => Some((c as u8 - b'A' + b'a') as char), + _ => None, } } @@ -44,7 +44,11 @@ mod test { test_eq!(lower_letter_a_is_a, lower_ascii_letter('a'), Some('a')); test_eq!(lower_letter_A_is_a, lower_ascii_letter('A'), Some('a')); test_eq!(lower_letter_symbol_is_None, lower_ascii_letter('!'), None); - test_eq!(lower_letter_nonascii_is_None, lower_ascii_letter('\u{a66e}'), None); + test_eq!( + lower_letter_nonascii_is_None, + lower_ascii_letter('\u{a66e}'), + None + ); test_eq!(is_alnum_a, is_ascii_alnum('a'), true); test_eq!(is_alnum_A, is_ascii_alnum('A'), true); diff --git a/html5ever/tests/foreach_html5lib_test/mod.rs b/html5ever/tests/foreach_html5lib_test/mod.rs index 13202d10..6138c98c 100644 --- a/html5ever/tests/foreach_html5lib_test/mod.rs +++ b/html5ever/tests/foreach_html5lib_test/mod.rs @@ -7,17 +7,18 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::fs; use std::ffi::OsStr; -use std::path::Path; +use std::fs; use std::ops::FnMut; +use std::path::Path; pub fn foreach_html5lib_test( - src_dir: &Path, - subdir: &'static str, - ext: &'static OsStr, - mut mk: Mk) - where Mk: FnMut(&Path, fs::File) + src_dir: &Path, + subdir: &'static str, + ext: &'static OsStr, + mut mk: Mk, +) where + Mk: FnMut(&Path, fs::File), { let mut test_dir_path = src_dir.to_path_buf(); test_dir_path.push("html5lib-tests"); @@ -36,6 +37,6 @@ pub fn foreach_html5lib_test( }, Err(_) => { panic!("Before launching the tests, please run this command:\n\n\tgit submodule update --init\n\nto retrieve an html5lib-tests snapshot."); - } + }, } } diff --git a/html5ever/tests/serializer.rs b/html5ever/tests/serializer.rs index 8c59f3e4..7715788c 100644 --- a/html5ever/tests/serializer.rs +++ b/html5ever/tests/serializer.rs @@ -7,16 +7,17 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[macro_use] extern crate html5ever; +#[macro_use] +extern crate html5ever; use std::default::Default; -use html5ever::{parse_fragment, parse_document, serialize, QualName}; use html5ever::driver::ParseOpts; use html5ever::rcdom::RcDom; -use html5ever::tendril::{StrTendril, SliceExt, TendrilSink}; -use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, TagKind, Tokenizer}; -use html5ever::serialize::{Serialize, Serializer, TraversalScope, SerializeOpts}; +use html5ever::serialize::{Serialize, SerializeOpts, Serializer, TraversalScope}; +use html5ever::tendril::{SliceExt, StrTendril, TendrilSink}; +use html5ever::tokenizer::{TagKind, Token, TokenSink, TokenSinkResult, Tokenizer}; +use html5ever::{parse_document, parse_fragment, serialize, QualName}; use std::io; @@ -37,7 +38,8 @@ impl Serialize for Tokens { S: Serializer, { for t in self.0.iter() { - match t { // TODO: check whether this is an IE conditional comment or a spec comment + match t { + // TODO: check whether this is an IE conditional comment or a spec comment &Token::TagToken(ref tag) => { let name = QualName::new( None, @@ -45,27 +47,20 @@ impl Serialize for Tokens { tag.name.as_ref().into(), ); match tag.kind { - TagKind::StartTag => { - serializer.start_elem( - name, - tag.attrs.iter().map( - |at| (&at.name, &at.value[..]), - ), - )? - } + TagKind::StartTag => serializer.start_elem( + name, + tag.attrs.iter().map(|at| (&at.name, &at.value[..])), + )?, TagKind::EndTag => serializer.end_elem(name)?, } - } - &Token::DoctypeToken(ref dt) => { - match dt.name { - Some(ref name) => serializer.write_doctype(&name)?, - None => {} - } - } + }, + &Token::DoctypeToken(ref dt) => match dt.name { + Some(ref name) => serializer.write_doctype(&name)?, + None => {}, + }, &Token::CommentToken(ref chars) => serializer.write_comment(&chars)?, &Token::CharacterTokens(ref chars) => serializer.write_text(&chars)?, - &Token::NullCharacterToken | - &Token::EOFToken => {} + &Token::NullCharacterToken | &Token::EOFToken => {}, &Token::ParseError(ref e) => println!("parse error: {:#?}", e), } } @@ -90,15 +85,19 @@ fn tokenize_and_serialize(input: StrTendril) -> StrTendril { create_missing_parent: true, ..Default::default() }, - ).unwrap(); + ) + .unwrap(); StrTendril::try_from_byte_slice(&output.into_inner()).unwrap() } fn parse_and_serialize(input: StrTendril) -> StrTendril { let dom = parse_fragment( - RcDom::default(), ParseOpts::default(), - QualName::new(None, ns!(html), local_name!("body")), vec![], - ).one(input); + RcDom::default(), + ParseOpts::default(), + QualName::new(None, ns!(html), local_name!("body")), + vec![], + ) + .one(input); let inner = &dom.document.children.borrow()[0]; let mut result = vec![]; @@ -132,22 +131,38 @@ macro_rules! test_no_parse { }; } - - test!(empty, r#""#); test!(fuzz, "Hello, World!

"#); -test!(misnest, r#"

Hello!

, World!"#, - r#"

Hello!

, World!"#); +test!( + misnest, + r#"

Hello!

, World!"#, + r#"

Hello!

, World!"# +); test!(attr_literal, r#""#); test!(attr_escape_amp, r#""#); -test!(attr_escape_amp_2, r#""#, r#""#); -test!(attr_escape_nbsp, "", r#""#); -test!(attr_escape_quot, r#""#, r#""#); -test!(attr_escape_several, r#""#, - r#""#); +test!( + attr_escape_amp_2, + r#""#, + r#""# +); +test!( + attr_escape_nbsp, + "", + r#""# +); +test!( + attr_escape_quot, + r#""#, + r#""# +); +test!( + attr_escape_several, + r#""#, + r#""# +); test!(text_literal, r#"

"'"

"#); test!(text_escape_amp, r#"

&

"#); @@ -157,24 +172,55 @@ test!(text_escape_lt, r#"

<

"#); test!(text_escape_gt, r#"

>

"#); test!(text_escape_gt2, r#"

>

"#, r#"

>

"#); -test!(script_literal, r#""#); -test!(style_literal, r#""#); +test!( + script_literal, + r#""# +); +test!( + style_literal, + r#""# +); test!(xmp_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"#); -test!(iframe_literal, r#""#); -test!(noembed_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"#); -test!(noframes_literal, r#"(x & 1) < 2; y > "foo" + 'bar'"#); +test!( + iframe_literal, + r#""# +); +test!( + noembed_literal, + r#"(x & 1) < 2; y > "foo" + 'bar'"# +); +test!( + noframes_literal, + r#"(x & 1) < 2; y > "foo" + 'bar'"# +); test!(pre_lf_0, "
foo bar
"); test!(pre_lf_1, "
\nfoo bar
", "
foo bar
"); test!(pre_lf_2, "
\n\nfoo bar
", "
\nfoo bar
"); test!(textarea_lf_0, ""); -test!(textarea_lf_1, "", ""); -test!(textarea_lf_2, "", ""); +test!( + textarea_lf_1, + "", + "" +); +test!( + textarea_lf_2, + "", + "" +); test!(listing_lf_0, "foo bar"); -test!(listing_lf_1, "\nfoo bar", "foo bar"); -test!(listing_lf_2, "\n\nfoo bar", "\nfoo bar"); +test!( + listing_lf_1, + "\nfoo bar", + "foo bar" +); +test!( + listing_lf_2, + "\n\nfoo bar", + "\nfoo bar" +); test!(comment_1, r#"

hi

"#); test!(comment_2, r#"

hi

"#); @@ -193,9 +239,8 @@ test_no_parse!(malformed_tokens, r#"foo
"#); #[test] fn doctype() { - let dom = parse_document( - RcDom::default(), ParseOpts::default()).one(""); - dom.document.children.borrow_mut().truncate(1); // Remove + let dom = parse_document(RcDom::default(), ParseOpts::default()).one(""); + dom.document.children.borrow_mut().truncate(1); // Remove let mut result = vec![]; serialize(&mut result, &dom.document, Default::default()).unwrap(); assert_eq!(String::from_utf8(result).unwrap(), ""); diff --git a/html5ever/tests/tokenizer.rs b/html5ever/tests/tokenizer.rs index 7093a181..ca4ecb79 100644 --- a/html5ever/tests/tokenizer.rs +++ b/html5ever/tests/tokenizer.rs @@ -9,53 +9,53 @@ extern crate rustc_serialize; extern crate rustc_test as test; -#[macro_use] extern crate html5ever; +#[macro_use] +extern crate html5ever; mod foreach_html5lib_test; use foreach_html5lib_test::foreach_html5lib_test; -use std::{char, env}; +use rustc_serialize::json::Json; +use std::borrow::Cow::Borrowed; +use std::collections::BTreeMap; +use std::default::Default; use std::ffi::OsStr; use std::mem::replace; -use std::default::Default; use std::path::Path; -use test::{TestDesc, TestDescAndFn, DynTestName, DynTestFn}; -use rustc_serialize::json::Json; -use std::collections::BTreeMap; -use std::borrow::Cow::Borrowed; +use std::{char, env}; +use test::{DynTestFn, DynTestName, TestDesc, TestDescAndFn}; -use html5ever::{LocalName, QualName}; -use html5ever::tokenizer::{Doctype, StartTag, EndTag, Tag}; -use html5ever::tokenizer::{Token, DoctypeToken, TagToken, CommentToken}; -use html5ever::tokenizer::{CharacterTokens, NullCharacterToken, EOFToken, ParseError}; -use html5ever::tokenizer::{TokenSink, Tokenizer, TokenizerOpts, TokenSinkResult}; -use html5ever::tokenizer::{BufferQueue}; -use html5ever::tokenizer::states::{Plaintext, RawData, Rcdata, Rawtext}; use html5ever::tendril::*; -use html5ever::{Attribute}; - +use html5ever::tokenizer::states::{Plaintext, RawData, Rawtext, Rcdata}; +use html5ever::tokenizer::BufferQueue; +use html5ever::tokenizer::{CharacterTokens, EOFToken, NullCharacterToken, ParseError}; +use html5ever::tokenizer::{CommentToken, DoctypeToken, TagToken, Token}; +use html5ever::tokenizer::{Doctype, EndTag, StartTag, Tag}; +use html5ever::tokenizer::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts}; +use html5ever::Attribute; +use html5ever::{LocalName, QualName}; // Return all ways of splitting the string into at most n // possibly-empty pieces. fn splits(s: &str, n: usize) -> Vec> { if n == 1 { - return vec!(vec!(s.to_tendril())); + return vec![vec![s.to_tendril()]]; } - let mut points: Vec = s.char_indices().map(|(n,_)| n).collect(); + let mut points: Vec = s.char_indices().map(|(n, _)| n).collect(); points.push(s.len()); // do this with iterators? - let mut out = vec!(); + let mut out = vec![]; for p in points.into_iter() { let y = &s[p..]; - for mut x in splits(&s[..p], n-1).into_iter() { + for mut x in splits(&s[..p], n - 1).into_iter() { x.push(y.to_tendril()); out.push(x); } } - out.extend(splits(s, n-1).into_iter()); + out.extend(splits(s, n - 1).into_iter()); out } @@ -68,7 +68,7 @@ struct TokenLogger { impl TokenLogger { fn new(exact_errors: bool) -> TokenLogger { TokenLogger { - tokens: vec!(), + tokens: vec![], current_str: StrTendril::new(), exact_errors: exact_errors, } @@ -100,14 +100,16 @@ impl TokenSink for TokenLogger { match token { CharacterTokens(b) => { self.current_str.push_slice(&b); - } + }, NullCharacterToken => { self.current_str.push_char('\0'); - } + }, - ParseError(_) => if self.exact_errors { - self.push(ParseError(Borrowed(""))); + ParseError(_) => { + if self.exact_errors { + self.push(ParseError(Borrowed(""))); + } }, TagToken(mut t) => { @@ -117,12 +119,12 @@ impl TokenSink for TokenLogger { match t.kind { EndTag => { t.self_closing = false; - t.attrs = vec!(); - } + t.attrs = vec![]; + }, _ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)), } self.push(TagToken(t)); - } + }, EOFToken => (), @@ -220,23 +222,25 @@ fn json_to_token(js: &Json) -> Token { "StartTag" => TagToken(Tag { kind: StartTag, name: LocalName::from(&*args[0].get_str()), - attrs: args[1].get_obj().iter().map(|(k,v)| { - Attribute { + attrs: args[1] + .get_obj() + .iter() + .map(|(k, v)| Attribute { name: QualName::new(None, ns!(), LocalName::from(&**k)), - value: v.get_tendril() - } - }).collect(), + value: v.get_tendril(), + }) + .collect(), self_closing: match args.get(2) { Some(b) => b.get_bool(), None => false, - } + }, }), "EndTag" => TagToken(Tag { kind: EndTag, name: LocalName::from(&*args[0].get_str()), - attrs: vec!(), - self_closing: false + attrs: vec![], + self_closing: false, }), "Comment" => CommentToken(args[0].get_tendril()), @@ -245,7 +249,6 @@ fn json_to_token(js: &Json) -> Token { // We don't need to produce NullCharacterToken because // the TokenLogger will convert them to CharacterTokens. - _ => panic!("don't understand token {:?}", parts), } } @@ -256,11 +259,15 @@ fn json_to_tokens(js: &Json, exact_errors: bool) -> Vec { // by an ignored error. let mut sink = TokenLogger::new(exact_errors); for tok in js.get_list().iter() { - assert_eq!(match *tok { - Json::String(ref s) - if &s[..] == "ParseError" => sink.process_token(ParseError(Borrowed("")), 0), - _ => sink.process_token(json_to_token(tok), 0), - }, TokenSinkResult::Continue); + assert_eq!( + match *tok { + Json::String(ref s) if &s[..] == "ParseError" => { + sink.process_token(ParseError(Borrowed("")), 0) + }, + _ => sink.process_token(json_to_token(tok), 0), + }, + TokenSinkResult::Continue + ); } sink.get_tokens() } @@ -278,8 +285,7 @@ fn unescape(s: &str) -> Option { } drop(it.next()); let hex: String = it.by_ref().take(4).collect(); - match u32::from_str_radix(&hex, 16).ok() - .and_then(char::from_u32) { + match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) { // Some of the tests use lone surrogates, but we have no // way to represent them in the UTF-8 input to our parser. // Since these can only come from script, we will catch @@ -287,7 +293,7 @@ fn unescape(s: &str) -> Option { None => return None, Some(c) => out.push(c), } - } + }, Some(c) => out.push(c), } } @@ -301,17 +307,16 @@ fn unescape_json(js: &Json) -> Json { Json::Array(ref xs) => Json::Array(xs.iter().map(unescape_json).collect()), Json::Object(ref obj) => { let mut new_obj = BTreeMap::new(); - for (k,v) in obj.iter() { + for (k, v) in obj.iter() { new_obj.insert(k.clone(), unescape_json(v)); } Json::Object(new_obj) - } + }, _ => js.clone(), } } -fn mk_test(desc: String, input: String, expect: Json, opts: TokenizerOpts) - -> TestDescAndFn { +fn mk_test(desc: String, input: String, expect: Json, opts: TokenizerOpts) -> TestDescAndFn { TestDescAndFn { desc: TestDesc::new(DynTestName(desc)), testfn: DynTestFn(Box::new(move || { @@ -325,8 +330,10 @@ fn mk_test(desc: String, input: String, expect: Json, opts: TokenizerOpts) let output = tokenize(input.clone(), opts.clone()); let expect_toks = json_to_tokens(&expect, opts.exact_errors); if output != expect_toks { - panic!("\ninput: {:?}\ngot: {:?}\nexpected: {:?}", - input, output, expect); + panic!( + "\ninput: {:?}\ngot: {:?}\nexpected: {:?}", + input, output, expect + ); } } })), @@ -337,12 +344,18 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Json) { let obj = js.get_obj(); let mut input = js.find("input").unwrap().get_str(); let mut expect = js.find("output").unwrap().clone(); - let desc = format!("tok: {}: {}", - filename, js.find("description").unwrap().get_str()); + let desc = format!( + "tok: {}: {}", + filename, + js.find("description").unwrap().get_str() + ); // "Double-escaped" tests require additional processing of // the input and output. - if obj.get(&"doubleEscaped".to_string()).map_or(false, |j| j.get_bool()) { + if obj + .get(&"doubleEscaped".to_string()) + .map_or(false, |j| j.get_bool()) + { match unescape(&input) { None => return, Some(i) => input = i, @@ -355,14 +368,18 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Json) { // Some tests want to start in a state other than Data. let state_overrides = match obj.get(&"initialStates".to_string()) { - Some(&Json::Array(ref xs)) => xs.iter().map(|s| - Some(match &s.get_str()[..] { - "PLAINTEXT state" => Plaintext, - "RAWTEXT state" => RawData(Rawtext), - "RCDATA state" => RawData(Rcdata), - s => panic!("don't know state {}", s), - })).collect(), - None => vec!(None), + Some(&Json::Array(ref xs)) => xs + .iter() + .map(|s| { + Some(match &s.get_str()[..] { + "PLAINTEXT state" => Plaintext, + "RAWTEXT state" => RawData(Rawtext), + "RCDATA state" => RawData(Rcdata), + s => panic!("don't know state {}", s), + }) + }) + .collect(), + None => vec![None], _ => panic!("don't understand initialStates value"), }; @@ -372,45 +389,58 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Json) { let mut newdesc = desc.clone(); match state { Some(s) => newdesc = format!("{} (in state {:?})", newdesc, s), - None => (), + None => (), }; if exact_errors { newdesc = format!("{} (exact errors)", newdesc); } - tests.push(mk_test(newdesc, input.clone(), expect.clone(), TokenizerOpts { - exact_errors: exact_errors, - initial_state: state, - last_start_tag_name: start_tag.clone(), - - // Not discarding a BOM is what the test suite expects; see - // https://github.com/html5lib/html5lib-tests/issues/2 - discard_bom: false, - - .. Default::default() - })); + tests.push(mk_test( + newdesc, + input.clone(), + expect.clone(), + TokenizerOpts { + exact_errors: exact_errors, + initial_state: state, + last_start_tag_name: start_tag.clone(), + + // Not discarding a BOM is what the test suite expects; see + // https://github.com/html5lib/html5lib-tests/issues/2 + discard_bom: false, + + ..Default::default() + }, + )); } } } fn tests(src_dir: &Path) -> Vec { - let mut tests = vec!(); - - foreach_html5lib_test(src_dir, "tokenizer", - OsStr::new("test"), |path, mut file| { - let js = Json::from_reader(&mut file).ok().expect("json parse error"); + let mut tests = vec![]; + + foreach_html5lib_test( + src_dir, + "tokenizer", + OsStr::new("test"), + |path, mut file| { + let js = Json::from_reader(&mut file).ok().expect("json parse error"); + + match js.get_obj().get(&"tests".to_string()) { + Some(&Json::Array(ref lst)) => { + for test in lst.iter() { + mk_tests( + &mut tests, + path.file_name().unwrap().to_str().unwrap(), + test, + ); + } + }, - match js.get_obj().get(&"tests".to_string()) { - Some(&Json::Array(ref lst)) => { - for test in lst.iter() { - mk_tests(&mut tests, path.file_name().unwrap().to_str().unwrap(), test); - } + // xmlViolation.test doesn't follow this format. + _ => (), } - - // xmlViolation.test doesn't follow this format. - _ => (), - } - }); + }, + ); tests } diff --git a/html5ever/tests/tree_builder.rs b/html5ever/tests/tree_builder.rs index f169557e..4a3694eb 100644 --- a/html5ever/tests/tree_builder.rs +++ b/html5ever/tests/tree_builder.rs @@ -8,29 +8,29 @@ // except according to those terms. extern crate rustc_test as test; -#[macro_use] extern crate html5ever; +#[macro_use] +extern crate html5ever; mod foreach_html5lib_test; use foreach_html5lib_test::foreach_html5lib_test; -use std::{fs, io, env}; -use std::io::BufRead; +use std::collections::{HashMap, HashSet}; +use std::default::Default; use std::ffi::OsStr; +use std::io::BufRead; use std::iter::repeat; use std::mem::replace; -use std::default::Default; use std::path::Path; -use std::collections::{HashSet, HashMap}; -use test::{TestDesc, TestDescAndFn, DynTestName, TestFn}; +use std::{env, fs, io}; +use test::{DynTestName, TestDesc, TestDescAndFn, TestFn}; -use html5ever::{LocalName, QualName}; -use html5ever::{ParseOpts, parse_document, parse_fragment}; -use html5ever::rcdom::{NodeData, Handle, RcDom}; +use html5ever::rcdom::{Handle, NodeData, RcDom}; use html5ever::tendril::{StrTendril, TendrilSink}; +use html5ever::{parse_document, parse_fragment, ParseOpts}; +use html5ever::{LocalName, QualName}; - -fn parse_tests>(mut lines: It) -> Vec> { - let mut tests = vec!(); +fn parse_tests>(mut lines: It) -> Vec> { + let mut tests = vec![]; let mut test = HashMap::new(); let mut key: Option = None; let mut val = String::new(); @@ -64,7 +64,7 @@ fn parse_tests>(mut lines: It) -> Vec panic!("should not reach Document"), - NodeData::Doctype { ref name, ref public_id, ref system_id } => { + NodeData::Doctype { + ref name, + ref public_id, + ref system_id, + } => { buf.push_str("\n"); - } + }, NodeData::Text { ref contents } => { buf.push_str("\""); buf.push_str(&contents.borrow()); buf.push_str("\"\n"); - } + }, NodeData::Comment { ref contents } => { buf.push_str("\n"); - } + }, - NodeData::Element { ref name, ref attrs, .. } => { + NodeData::Element { + ref name, + ref attrs, + .. + } => { buf.push_str("<"); match name.ns { ns!(svg) => buf.push_str("svg "), @@ -118,42 +126,45 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { for attr in attrs.into_iter() { buf.push_str("|"); - buf.push_str(&repeat(" ").take(indent+2).collect::()); + buf.push_str(&repeat(" ").take(indent + 2).collect::()); match attr.name.ns { ns!(xlink) => buf.push_str("xlink "), ns!(xml) => buf.push_str("xml "), ns!(xmlns) => buf.push_str("xmlns "), _ => (), } - buf.push_str(&format!("{}=\"{}\"\n", - attr.name.local, attr.value)); + buf.push_str(&format!("{}=\"{}\"\n", attr.name.local, attr.value)); } - } + }, - NodeData::ProcessingInstruction { .. } => unreachable!() + NodeData::ProcessingInstruction { .. } => unreachable!(), } for child in node.children.borrow().iter() { - serialize(buf, indent+2, child.clone()); + serialize(buf, indent + 2, child.clone()); } - if let NodeData::Element { template_contents: Some(ref content), .. } = node.data { + if let NodeData::Element { + template_contents: Some(ref content), + .. + } = node.data + { buf.push_str("|"); - buf.push_str(&repeat(" ").take(indent+2).collect::()); + buf.push_str(&repeat(" ").take(indent + 2).collect::()); buf.push_str("content\n"); for child in content.children.borrow().iter() { - serialize(buf, indent+4, child.clone()); + serialize(buf, indent + 4, child.clone()); } } } fn make_test( - tests: &mut Vec, - ignores: &HashSet, - filename: &str, - idx: usize, - fields: HashMap) { - + tests: &mut Vec, + ignores: &HashSet, + filename: &str, + idx: usize, + fields: HashMap, +) { let scripting_flags = &[false, true]; let scripting_flags = if fields.contains_key("script-off") { &scripting_flags[0..1] @@ -164,18 +175,17 @@ fn make_test( }; let name = format!("tb: {}-{}", filename, idx); for scripting_enabled in scripting_flags { - let test = make_test_desc_with_scripting_flag( - ignores, &name, &fields, *scripting_enabled); + let test = make_test_desc_with_scripting_flag(ignores, &name, &fields, *scripting_enabled); tests.push(test); } } fn make_test_desc_with_scripting_flag( - ignores: &HashSet, - name: &str, - fields: &HashMap, - scripting_enabled: bool) - -> TestDescAndFn { + ignores: &HashSet, + name: &str, + fields: &HashMap, + scripting_enabled: bool, +) -> TestDescAndFn { let get_field = |key| { let field = fields.get(key).expect("missing field"); field.trim_right_matches('\n').to_string() @@ -184,8 +194,9 @@ fn make_test_desc_with_scripting_flag( let mut data = fields.get("data").expect("missing data").to_string(); data.pop(); let expected = get_field("document"); - let context = fields.get("document-fragment") - .map(|field| context_name(field.trim_right_matches('\n'))); + let context = fields + .get("document-fragment") + .map(|field| context_name(field.trim_right_matches('\n'))); let ignore = ignores.contains(name); let mut name = name.to_owned(); if scripting_enabled { @@ -199,7 +210,7 @@ fn make_test_desc_with_scripting_flag( TestDescAndFn { desc: TestDesc { ignore: ignore, - .. TestDesc::new(DynTestName(name)) + ..TestDesc::new(DynTestName(name)) }, testfn: TestFn::dyn_test_fn(move || { // Do this here because Tendril isn't Send. @@ -225,11 +236,13 @@ fn make_test_desc_with_scripting_flag( }, }; let len = result.len(); - result.truncate(len - 1); // drop the trailing newline + result.truncate(len - 1); // drop the trailing newline if result != expected { - panic!("\ninput: {}\ngot:\n{}\nexpected:\n{}\n", - data, result, expected); + panic!( + "\ninput: {}\ngot:\n{}\nexpected:\n{}\n", + data, result, expected + ); } }), } @@ -246,20 +259,28 @@ fn context_name(context: &str) -> QualName { } fn tests(src_dir: &Path, ignores: &HashSet) -> Vec { - let mut tests = vec!(); - - foreach_html5lib_test(src_dir, "tree-construction", - OsStr::new("dat"), |path, file| { - let buf = io::BufReader::new(file); - let lines = buf.lines() - .map(|res| res.ok().expect("couldn't read")); - let data = parse_tests(lines); - - for (i, test) in data.into_iter().enumerate() { - make_test(&mut tests, ignores, path.file_name().unwrap().to_str().unwrap(), - i, test); - } - }); + let mut tests = vec![]; + + foreach_html5lib_test( + src_dir, + "tree-construction", + OsStr::new("dat"), + |path, file| { + let buf = io::BufReader::new(file); + let lines = buf.lines().map(|res| res.ok().expect("couldn't read")); + let data = parse_tests(lines); + + for (i, test) in data.into_iter().enumerate() { + make_test( + &mut tests, + ignores, + path.file_name().unwrap().to_str().unwrap(), + i, + test, + ); + } + }, + ); tests } diff --git a/markup5ever/build.rs b/markup5ever/build.rs index d9c7603c..427e236a 100644 --- a/markup5ever/build.rs +++ b/markup5ever/build.rs @@ -7,8 +7,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -extern crate string_cache_codegen; extern crate phf_codegen; +extern crate string_cache_codegen; #[macro_use] extern crate serde_derive; extern crate serde_json; @@ -16,7 +16,7 @@ extern crate serde_json; use std::collections::HashMap; use std::env; use std::fs::File; -use std::io::{Write, BufWriter, BufReader, BufRead}; +use std::io::{BufRead, BufReader, BufWriter, Write}; use std::path::Path; static NAMESPACES: &'static [(&'static str, &'static str)] = &[ @@ -38,7 +38,8 @@ fn main() { named_entities_to_phf( &Path::new(&manifest_dir).join("data").join("entities.json"), - &Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs")); + &Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs"), + ); // Create a string cache for local names let local_names = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap()).join("local_names.txt"); @@ -50,7 +51,8 @@ fn main() { } local_names_atom .with_macro_doc("Takes a local name as a string and returns its key in the string cache.") - .write_to(&mut generated).unwrap(); + .write_to(&mut generated) + .unwrap(); // Create a string cache for namespace prefixes string_cache_codegen::AtomType::new("Prefix", "namespace_prefix!") @@ -66,12 +68,21 @@ fn main() { .write_to(&mut generated) .unwrap(); - writeln!(generated, r#" + writeln!( + generated, + r#" /// Maps the input of `namespace_prefix!` to the output of `namespace_url!`. #[macro_export] macro_rules! ns {{ - "#).unwrap(); + "# + ) + .unwrap(); for &(prefix, url) in NAMESPACES { - writeln!(generated, "({}) => {{ namespace_url!({:?}) }};", prefix, url).unwrap(); + writeln!( + generated, + "({}) => {{ namespace_url!({:?}) }};", + prefix, url + ) + .unwrap(); } writeln!(generated, "}}").unwrap(); } @@ -84,17 +95,26 @@ fn named_entities_to_phf(from: &Path, to: &Path) { //characters: String, // Present in the file but we don't need it } - let entities: HashMap - = serde_json::from_reader(&mut File::open(from).unwrap()).unwrap(); - let mut entities: HashMap<&str, (u32, u32)> = entities.iter().map(|(name, char_ref)| { - assert!(name.starts_with("&")); - assert!(char_ref.codepoints.len() <= 2); - (&name[1..], (char_ref.codepoints[0], *char_ref.codepoints.get(1).unwrap_or(&0))) - }).collect(); + let entities: HashMap = + serde_json::from_reader(&mut File::open(from).unwrap()).unwrap(); + let mut entities: HashMap<&str, (u32, u32)> = entities + .iter() + .map(|(name, char_ref)| { + assert!(name.starts_with("&")); + assert!(char_ref.codepoints.len() <= 2); + ( + &name[1..], + ( + char_ref.codepoints[0], + *char_ref.codepoints.get(1).unwrap_or(&0), + ), + ) + }) + .collect(); // Add every missing prefix of those keys, mapping to NULL characters. for key in entities.keys().cloned().collect::>() { - for n in 1 .. key.len() { + for n in 1..key.len() { entities.entry(&key[..n]).or_insert((0, 0)); } } @@ -106,7 +126,9 @@ fn named_entities_to_phf(from: &Path, to: &Path) { } let mut file = File::create(to).unwrap(); - writeln!(&mut file, r#" + writeln!( + &mut file, + r#" /// A map of entity names to their codepoints. The second codepoint will /// be 0 if the entity contains a single codepoint. Entities have their preceeding '&' removed. /// @@ -117,8 +139,14 @@ fn named_entities_to_phf(from: &Path, to: &Path) { /// /// assert_eq!(NAMED_ENTITIES.get("gt;").unwrap(), &(62, 0)); /// ``` -"#).unwrap(); - write!(&mut file, "pub static NAMED_ENTITIES: Map<&'static str, (u32, u32)> = ").unwrap(); +"# + ) + .unwrap(); + write!( + &mut file, + "pub static NAMED_ENTITIES: Map<&'static str, (u32, u32)> = " + ) + .unwrap(); phf_map.build(&mut file).unwrap(); write!(&mut file, ";\n").unwrap(); } diff --git a/markup5ever/data/mod.rs b/markup5ever/data/mod.rs index 231b7562..fa839ba8 100644 --- a/markup5ever/data/mod.rs +++ b/markup5ever/data/mod.rs @@ -13,14 +13,38 @@ use phf::Map; /// (U+0080 through U+009F) with these characters, based on Windows 8-bit /// codepages. pub static C1_REPLACEMENTS: [Option; 32] = [ - Some('\u{20ac}'), None, Some('\u{201a}'), Some('\u{0192}'), - Some('\u{201e}'), Some('\u{2026}'), Some('\u{2020}'), Some('\u{2021}'), - Some('\u{02c6}'), Some('\u{2030}'), Some('\u{0160}'), Some('\u{2039}'), - Some('\u{0152}'), None, Some('\u{017d}'), None, - None, Some('\u{2018}'), Some('\u{2019}'), Some('\u{201c}'), - Some('\u{201d}'), Some('\u{2022}'), Some('\u{2013}'), Some('\u{2014}'), - Some('\u{02dc}'), Some('\u{2122}'), Some('\u{0161}'), Some('\u{203a}'), - Some('\u{0153}'), None, Some('\u{017e}'), Some('\u{0178}'), + Some('\u{20ac}'), + None, + Some('\u{201a}'), + Some('\u{0192}'), + Some('\u{201e}'), + Some('\u{2026}'), + Some('\u{2020}'), + Some('\u{2021}'), + Some('\u{02c6}'), + Some('\u{2030}'), + Some('\u{0160}'), + Some('\u{2039}'), + Some('\u{0152}'), + None, + Some('\u{017d}'), + None, + None, + Some('\u{2018}'), + Some('\u{2019}'), + Some('\u{201c}'), + Some('\u{201d}'), + Some('\u{2022}'), + Some('\u{2013}'), + Some('\u{2014}'), + Some('\u{02dc}'), + Some('\u{2122}'), + Some('\u{0161}'), + Some('\u{203a}'), + Some('\u{0153}'), + None, + Some('\u{017e}'), + Some('\u{0178}'), ]; include!(concat!(env!("OUT_DIR"), "/named_entities.rs")); diff --git a/markup5ever/interface/mod.rs b/markup5ever/interface/mod.rs index 1de6ab80..685821ef 100644 --- a/markup5ever/interface/mod.rs +++ b/markup5ever/interface/mod.rs @@ -11,10 +11,10 @@ use std::fmt; use tendril::StrTendril; -use super::{LocalName, Prefix, Namespace}; -pub use self::tree_builder::{NodeOrText, AppendNode, AppendText, create_element, ElementFlags}; -pub use self::tree_builder::{QuirksMode, Quirks, LimitedQuirks, NoQuirks}; -pub use self::tree_builder::{TreeSink, Tracer, NextParserState}; +pub use self::tree_builder::{create_element, AppendNode, AppendText, ElementFlags, NodeOrText}; +pub use self::tree_builder::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; +pub use self::tree_builder::{NextParserState, Tracer, TreeSink}; +use super::{LocalName, Namespace, Prefix}; /// An [expanded name], containing the tag and the namespace. /// @@ -72,7 +72,7 @@ macro_rules! expanded_name { ns: &ns!($ns), local: &local_name!($local), } - } + }; } pub mod tree_builder; @@ -148,7 +148,7 @@ impl QualName { pub fn expanded(&self) -> ExpandedName { ExpandedName { ns: &self.ns, - local: &self.local + local: &self.local, } } } @@ -167,20 +167,25 @@ pub struct Attribute { pub value: StrTendril, } - #[cfg(test)] mod tests { use super::Namespace; #[test] fn ns_macro() { - assert_eq!(ns!(), Namespace::from("")); + assert_eq!(ns!(), Namespace::from("")); - assert_eq!(ns!(html), Namespace::from("http://www.w3.org/1999/xhtml")); - assert_eq!(ns!(xml), Namespace::from("http://www.w3.org/XML/1998/namespace")); - assert_eq!(ns!(xmlns), Namespace::from("http://www.w3.org/2000/xmlns/")); - assert_eq!(ns!(xlink), Namespace::from("http://www.w3.org/1999/xlink")); - assert_eq!(ns!(svg), Namespace::from("http://www.w3.org/2000/svg")); - assert_eq!(ns!(mathml), Namespace::from("http://www.w3.org/1998/Math/MathML")); + assert_eq!(ns!(html), Namespace::from("http://www.w3.org/1999/xhtml")); + assert_eq!( + ns!(xml), + Namespace::from("http://www.w3.org/XML/1998/namespace") + ); + assert_eq!(ns!(xmlns), Namespace::from("http://www.w3.org/2000/xmlns/")); + assert_eq!(ns!(xlink), Namespace::from("http://www.w3.org/1999/xlink")); + assert_eq!(ns!(svg), Namespace::from("http://www.w3.org/2000/svg")); + assert_eq!( + ns!(mathml), + Namespace::from("http://www.w3.org/1998/Math/MathML") + ); } } diff --git a/markup5ever/interface/tree_builder.rs b/markup5ever/interface/tree_builder.rs index ad8462ff..562a254e 100644 --- a/markup5ever/interface/tree_builder.rs +++ b/markup5ever/interface/tree_builder.rs @@ -11,12 +11,12 @@ //! //! It can be used by a parser to create the DOM graph structure in memory. +use interface::{Attribute, ExpandedName, QualName}; use std::borrow::Cow; use tendril::StrTendril; -use interface::{QualName, ExpandedName, Attribute}; pub use self::NodeOrText::{AppendNode, AppendText}; -pub use self::QuirksMode::{Quirks, LimitedQuirks, NoQuirks}; +pub use self::QuirksMode::{LimitedQuirks, NoQuirks, Quirks}; /// Something which can be inserted into the DOM. /// @@ -72,7 +72,7 @@ pub struct ElementFlags { pub mathml_annotation_xml_integration_point: bool, // Prevent construction from outside module - _private: () + _private: (), } /// A constructor for an element. @@ -106,21 +106,20 @@ pub struct ElementFlags { /// /// ``` pub fn create_element(sink: &mut Sink, name: QualName, attrs: Vec) -> Sink::Handle -where Sink: TreeSink { +where + Sink: TreeSink, +{ let mut flags = ElementFlags::default(); match name.expanded() { - expanded_name!(html "template") => { - flags.template = true - } + expanded_name!(html "template") => flags.template = true, expanded_name!(mathml "annotation-xml") => { flags.mathml_annotation_xml_integration_point = attrs.iter().any(|attr| { - attr.name.expanded() == expanded_name!("", "encoding") && ( - attr.value.eq_ignore_ascii_case("text/html") || - attr.value.eq_ignore_ascii_case("application/xhtml+xml") - ) + attr.name.expanded() == expanded_name!("", "encoding") && + (attr.value.eq_ignore_ascii_case("text/html") || + attr.value.eq_ignore_ascii_case("application/xhtml+xml")) }) - } - _ => {} + }, + _ => {}, } sink.create_element(name, attrs, flags) } @@ -169,8 +168,12 @@ pub trait TreeSink { /// See [the template element in the whatwg spec][whatwg template]. /// /// [whatwg template]: https://html.spec.whatwg.org/multipage/#the-template-element - fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) - -> Self::Handle; + fn create_element( + &mut self, + name: QualName, + attrs: Vec, + flags: ElementFlags, + ) -> Self::Handle; /// Create a comment node. fn create_comment(&mut self, text: StrTendril) -> Self::Handle; @@ -188,16 +191,20 @@ pub trait TreeSink { /// When the insertion point is decided by the existence of a parent node of the /// element, we consider both possibilities and send the element which will be used /// if a parent node exists, along with the element to be used if there isn't one. - fn append_based_on_parent_node(&mut self, + fn append_based_on_parent_node( + &mut self, element: &Self::Handle, prev_element: &Self::Handle, - child: NodeOrText); + child: NodeOrText, + ); /// Append a `DOCTYPE` element to the `Document` node. - fn append_doctype_to_document(&mut self, - name: StrTendril, - public_id: StrTendril, - system_id: StrTendril); + fn append_doctype_to_document( + &mut self, + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + ); /// Mark a HTML `