From 30fe940a2181cebd981d45c16e4156d808984440 Mon Sep 17 00:00:00 2001 From: Leo Schwarz Date: Mon, 17 Apr 2017 12:05:19 +0200 Subject: [PATCH 01/21] Implement `std::error::Error` for `parser::Error`. --- src/parser.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index fdc3a6e..9907d9d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -14,6 +14,7 @@ //! let doc = parser::parse(xml).expect("Failed to parse"); //! ``` +use std; use std::ascii::AsciiExt; use std::collections::HashMap; use std::mem::replace; @@ -108,6 +109,69 @@ impl Recoverable for Error { } } +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use std::error::Error; + use self::Error::*; + + match *self { + Expected(s) | + ExpectedClosingQuote(s) | + ExpectedOpeningQuote(s) => { + write!(f, "Parser error: {} {}", self.description(), s) + }, + _ => write!(f, "Parser error: {}", self.description()) + } + } +} + +impl std::error::Error for Error { + fn description(&self) -> &str { + use self::Error::*; + + match *self { + Expected(_) => "expected", + ExpectedAttribute => "expected attribute", + ExpectedAttributeValue => "expected attribute value", + ExpectedCData => "expected CDATA", + ExpectedCharacterData => "expected character data", + ExpectedComment => "expected comment", + ExpectedCommentBody => "expected comment body", + ExpectedElement => "expected element", + ExpectedElementName => "expected element name", + ExpectedElementEnd => "expected element end", + ExpectedElementSelfClosed => "expected element self closed", + ExpectedProcessingInstruction => "expected processing instruction", + ExpectedProcessingInstructionTarget => "expected processing instruction target", + ExpectedProcessingInstructionValue => "expected processing instruction value", + ExpectedVersionNumber => "expected version number", + ExpectedEncoding => "expected encoding", + ExpectedYesNo => "expected yes or no", + ExpectedWhitespace => "expected whitespace", + ExpectedDocumentTypeName => "expected document type name", + ExpectedSystemLiteral => "expected system literal", + ExpectedClosingQuote(_) => "expected closing quote", + ExpectedOpeningQuote(_) => "expected opening quote", + ExpectedDecimalReferenceValue => "expected decimal reference value", + ExpectedHexReferenceValue => "expected hex reference value", + ExpectedNamedReferenceValue => "expected named reference value", + ExpectedDecimalReference => "expected decimal reference", + ExpectedHexReference => "expected hex reference", + ExpectedNamedReference => "expected named reference", + InvalidProcessingInstructionTarget => "invalid processing instruction target", + MismatchedElementEndName => "mismatched element end name", + InvalidDecimalReference => "invalid decimal reference", + InvalidHexReference => "invalid hex reference", + UnknownNamedReference => "unknown named reference", + DuplicateAttribute => "duplicate attribute", + RedefinedNamespace => "redefined namespace", + RedefinedDefaultNamespace => "redefined default namespace", + EmptyNamespace => "empty namespace", + UnknownNamespacePrefix => "unknown namespace prefix", + } + } +} + type XmlMaster<'a> = peresil::ParseMaster, Error>; type XmlProgress<'a, T> = peresil::Progress, T, Error>; From d19a3fa3d197ce0002cece991a5b1f8eca01b19f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 17 Apr 2017 13:14:17 -0400 Subject: [PATCH 02/21] Bump to version 0.2.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8981657..6643699 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sxd-document" -version = "0.2.0" +version = "0.2.1" authors = ["Jake Goulding "] description = "A Rust XML DOM library" From 41599abea05512c0820be4e298740e6768039ea6 Mon Sep 17 00:00:00 2001 From: Ian Ornelas Date: Mon, 29 May 2017 01:40:51 -0300 Subject: [PATCH 03/21] Properly pop scope on empty elements --- src/writer.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/writer.rs b/src/writer.rs index d80c014..95a81e5 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -340,7 +340,9 @@ fn format_element<'d, W: ?Sized>(element: dom::Element<'d>, let mut children = element.children(); if children.is_empty() { - writer.write_str("/>") + try!(writer.write_str("/>")); + mapping.pop_scope(); + Ok(()) } else { try!(writer.write_str(">")); @@ -624,6 +626,27 @@ mod test { assert_eq!(xml, ""); } + #[test] + fn nested_empty_element_with_namespaces() { + let p = Package::new(); + let d = p.as_document(); + + let hello = d.create_element(("outer", "hello")); + hello.set_default_namespace_uri(Some("outer")); + hello.set_preferred_prefix(Some("o")); + + let world = d.create_element("world"); + world.set_default_namespace_uri(Some("inner")); + + let empty = d.create_element("empty"); + world.append_child(empty); + hello.append_child(world); + d.root().append_child(hello); + + let xml = format_xml(&d); + assert_eq!(xml, ""); + } + #[test] fn nested_element_with_namespaces_with_reused_namespaces() { let p = Package::new(); From 4591373d39ae4b7c397e2f72f605200edbe26049 Mon Sep 17 00:00:00 2001 From: Ian Ornelas Date: Mon, 29 May 2017 02:09:40 -0300 Subject: [PATCH 04/21] Ignore default namespace for attribute prefixes --- src/writer.rs | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/writer.rs b/src/writer.rs index d80c014..11a3c07 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -232,8 +232,13 @@ impl<'d> PrefixMapping<'d> { } } - fn namespace_type<'a>(&'a self, preferred_prefix: Option<&'a str>, namespace_uri: &str) -> NamespaceType<'a> { - if Some(namespace_uri) == self.active_default_namespace_uri() { + fn namespace_type<'a>(&'a self, + preferred_prefix: Option<&'a str>, + namespace_uri: &str, + ignore_default: bool) + -> NamespaceType<'a> + { + if !ignore_default && Some(namespace_uri) == self.active_default_namespace_uri() { return NamespaceType::Default; } @@ -264,6 +269,7 @@ enum Content<'d> { fn format_qname<'d, W: ?Sized>(q: QName<'d>, mapping: &mut PrefixMapping<'d>, preferred_prefix: Option<&str>, + ignore_default: bool, writer: &mut W) -> io::Result<()> where W: Write @@ -271,7 +277,7 @@ fn format_qname<'d, W: ?Sized>(q: QName<'d>, // Can something without a namespace be prefixed? No, because // defining a prefix requires a non-empty URI if let Some(namespace_uri) = q.namespace_uri { - match mapping.namespace_type(preferred_prefix, namespace_uri) { + match mapping.namespace_type(preferred_prefix, namespace_uri, ignore_default) { NamespaceType::Default => { // No need to do anything }, @@ -316,11 +322,11 @@ fn format_element<'d, W: ?Sized>(element: dom::Element<'d>, mapping.populate_scope(&element, &attrs); try!(writer.write_str("<")); - try!(format_qname(element.name(), mapping, element.preferred_prefix(), writer)); + try!(format_qname(element.name(), mapping, element.preferred_prefix(), false, writer)); for attr in &attrs { try!(writer.write_str(" ")); - try!(format_qname(attr.name(), mapping, attr.preferred_prefix(), writer)); + try!(format_qname(attr.name(), mapping, attr.preferred_prefix(), true, writer)); try!(write!(writer, "='")); try!(format_attribute_value(attr.value(), writer)); try!(write!(writer, "'")); @@ -365,7 +371,7 @@ fn format_element_end<'d, W: ?Sized>(element: dom::Element<'d>, where W: Write { try!(writer.write_str("") } @@ -550,6 +556,20 @@ mod test { assert_eq!(xml, ""); } + #[test] + fn attribute_with_default_namespace_prefix() { + let p = Package::new(); + let d = p.as_document(); + let e = d.create_element(("namespace", "hello")); + e.set_preferred_prefix(Some("p")); + e.set_default_namespace_uri(Some("namespace")); + e.set_attribute_value(("namespace", "a"), "b"); + d.root().append_child(e); + + let xml = format_xml(&d); + assert_eq!(xml, ""); + } + #[test] fn attributes_with_conflicting_preferred_namespace_prefixes() { let p = Package::new(); From 1d6c4db257126bfea3751541ce9213e8efec6dba Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 30 May 2017 20:28:22 -0400 Subject: [PATCH 05/21] Bump to version 0.2.2 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6643699..643949f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sxd-document" -version = "0.2.1" +version = "0.2.2" authors = ["Jake Goulding "] description = "A Rust XML DOM library" From 9657cca5bb5f84dbd7e517073f90e3cf54007b2f Mon Sep 17 00:00:00 2001 From: Christopher Serr Date: Thu, 29 Jun 2017 14:30:19 +0200 Subject: [PATCH 06/21] Fix Use After Free The String Pool now uses just a HashSet, that stores the actual Interned Strings. The old code also stored the str slices that we where looking for as keys, but there were never interned properly, so they were super likely to get freed at some point and cause a Use after Free. Fixes #47 --- src/string_pool.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/string_pool.rs b/src/string_pool.rs index 65b6f88..9e73921 100644 --- a/src/string_pool.rs +++ b/src/string_pool.rs @@ -6,7 +6,7 @@ use std::borrow::Borrow; use std::cell::{Cell,RefCell}; use std::cmp::max; use std::collections::LinkedList; -use std::collections::hash_map::HashMap; +use std::collections::hash_set::HashSet; use std::default::Default; use std::ops::Deref; use std::slice; @@ -131,7 +131,7 @@ pub struct StringPool { start: Cell<*mut u8>, end: Cell<*const u8>, chunks: RefCell>, - index: RefCell>, + index: RefCell>, } static CAPACITY: usize = 10240; @@ -149,10 +149,13 @@ impl StringPool { pub fn intern<'s>(&'s self, s: &str) -> &'s str { if s == "" { return ""; } - let search_string = InternedString::from_str(s); - let mut index = self.index.borrow_mut(); - let interned_str = *index.entry(search_string).or_insert_with(|| self.do_intern(s)); + if let Some(interned) = index.get(s) { + return unsafe { mem::transmute(interned as &str) }; + } + + let interned_str = self.do_intern(s); + index.insert(interned_str); // The lifetime is really matched to us unsafe { mem::transmute(interned_str) } From 86111a80d21637ba77a42b1779c259336492ab53 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Fri, 7 Jul 2017 10:22:03 -0400 Subject: [PATCH 07/21] Bump to version 0.2.3 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 643949f..c4c083b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sxd-document" -version = "0.2.2" +version = "0.2.3" authors = ["Jake Goulding "] description = "A Rust XML DOM library" From 88bed66f63b46d2063ba8f30d75973730d6d1552 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 1 Jul 2017 17:12:14 -0700 Subject: [PATCH 08/21] Understand the "xml" namespace prefix when parsing into the DOM This does not yet *do* anything with the attributes; it only stops them from preventing a successful parse. Related to #46 --- src/lib.rs | 3 +++ src/parser.rs | 22 ++++++++++++++++++++++ src/raw.rs | 5 +---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ee1217d..51c956a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -72,6 +72,9 @@ pub mod writer; pub use str::XmlChar; +static XML_NS_PREFIX: &'static str = "xml"; +static XML_NS_URI: &'static str = "http://www.w3.org/XML/1998/namespace"; + /// A prefixed name. This represents what is found in the string form /// of an XML document, and does not apply any namespace mapping. #[derive(Debug,Copy,Clone,PartialEq,Eq,PartialOrd,Ord)] diff --git a/src/parser.rs b/src/parser.rs index 9907d9d..369798a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -798,6 +798,7 @@ struct DomBuilder<'d> { elements: Vec>, element_names: Vec>>, attributes: Vec>, + seen_top_element: bool, } impl<'d> DomBuilder<'d> { @@ -807,6 +808,7 @@ impl<'d> DomBuilder<'d> { elements: vec![], element_names: Vec::new(), attributes: Vec::new(), + seen_top_element: false, } } @@ -879,6 +881,11 @@ impl<'d> DomBuilder<'d> { element.register_prefix(*prefix, ns_uri); } + if !self.seen_top_element { + self.seen_top_element = true; + element.register_prefix(::XML_NS_PREFIX, ::XML_NS_URI); + } + self.append_to_either(element); self.elements.push(element); @@ -1326,6 +1333,21 @@ mod test { assert_eq!(attr.value(), "b"); } + #[test] + fn an_attribute_with_xml_space_preserve() { + let package = quick_parse(" "); + let doc = package.as_document(); + let top = top(&doc); + + assert_eq!(top.attribute((::XML_NS_URI, "space")).unwrap().value(), "preserve"); + + let children = top.children(); + assert_eq!(children.len(), 3); + assert_eq!(children[0].text().unwrap().text(), " "); + assert_qname_eq!(children[1].element().unwrap().name(), "a"); + assert_eq!(children[2].text().unwrap().text(), " "); + } + #[test] fn an_attribute_with_references() { let package = quick_parse(""); diff --git a/src/raw.rs b/src/raw.rs index 4517810..47717e2 100644 --- a/src/raw.rs +++ b/src/raw.rs @@ -6,9 +6,6 @@ use string_pool::{StringPool,InternedString}; use std::marker::PhantomData; use std::slice; -static XML_NS_PREFIX: &'static str = "xml"; -static XML_NS_URI: &'static str = "http://www.w3.org/XML/1998/namespace"; - struct InternedQName { namespace_uri: Option, local_part: InternedString, @@ -644,7 +641,7 @@ impl Connections { { let mut namespaces = Vec::new(); - namespaces.push((XML_NS_PREFIX, XML_NS_URI)); + namespaces.push((::XML_NS_PREFIX, ::XML_NS_URI)); let all_namespaces = self.element_parents(element) From d9a5428673a38d9917ed0688fa4028607f147464 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Thu, 10 Aug 2017 09:17:01 -0400 Subject: [PATCH 09/21] Bump to version 0.2.4 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c4c083b..05e0099 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sxd-document" -version = "0.2.3" +version = "0.2.4" authors = ["Jake Goulding "] description = "A Rust XML DOM library" From 09eb52f1573b0cbc5e1eef1e02341dfed8c0906f Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 25 Nov 2017 10:23:07 -0500 Subject: [PATCH 10/21] Remove unused trait --- src/parser.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 369798a..5fafaee 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -15,7 +15,6 @@ //! ``` use std; -use std::ascii::AsciiExt; use std::collections::HashMap; use std::mem::replace; use std::ops::Deref; From c4283c0979dac80350d7ad6d823e8a21ae8eab28 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Sat, 25 Nov 2017 10:40:28 -0500 Subject: [PATCH 11/21] Missing close tag causes parser to hang indefinitely We weren't checking that we made progress in the tokenizer or that all the in-progress elements were done when parsing was finished. Closes #52 --- src/parser.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 5fafaee..cfc4248 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -82,6 +82,7 @@ pub enum Error { RedefinedDefaultNamespace, EmptyNamespace, UnknownNamespacePrefix, + UnclosedElement, } impl Recoverable for Error { @@ -100,7 +101,8 @@ impl Recoverable for Error { RedefinedNamespace | RedefinedDefaultNamespace | EmptyNamespace | - UnknownNamespacePrefix => { + UnknownNamespacePrefix | + UnclosedElement => { false }, _ => true @@ -167,6 +169,7 @@ impl std::error::Error for Error { RedefinedDefaultNamespace => "redefined default namespace", EmptyNamespace => "empty namespace", UnknownNamespacePrefix => "unknown namespace prefix", + UnclosedElement => "unclosed element", } } } @@ -713,6 +716,10 @@ impl<'a> Iterator for PullParser<'a> { }, }; + if pt == xml { + return None; + } + let next_state = match (self.state, r) { (State::AtBeginning, Token::XmlDeclaration) | (State::AtBeginning, Token::ProcessingInstruction(..)) | @@ -925,6 +932,10 @@ impl<'d> DomBuilder<'d> { e.append_child(t); } + fn has_unclosed_elements(&self) -> bool { + !self.elements.is_empty() + } + fn consume(&mut self, token: Token<'d>) -> DomBuilderResult<()> { use self::Token::*; @@ -1013,6 +1024,10 @@ pub fn parse(xml: &str) -> Result)> { return Err((s.offset, vec![s.value])); } } + + if builder.has_unclosed_elements() { + return Err((xml.len(), vec![Error::UnclosedElement])); + } } Ok(package) @@ -1726,6 +1741,15 @@ mod test { assert_parse_failure!(r, 9, ExpectedWhitespace, ExpectedElementSelfClosed, ExpectedElementEnd); } + #[test] + fn failure_missing_close_tag() { + use super::Error::*; + + let r = full_parse("wow"); + + assert_parse_failure!(r, 7, UnclosedElement); + } + #[test] fn failure_nested_unexpected_space() { use super::Error::*; From 1093f81cc416cc67ab874d4979f4429798f6a147 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 4 Dec 2017 18:53:55 -0500 Subject: [PATCH 12/21] Bump to version 0.2.5 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 05e0099..c438542 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sxd-document" -version = "0.2.4" +version = "0.2.5" authors = ["Jake Goulding "] description = "A Rust XML DOM library" From 622fcba8567693daddb95f105a0f702e984c6722 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 4 Dec 2017 22:17:55 -0500 Subject: [PATCH 13/21] Revert "Remove unused trait" This reverts commit 09eb52f1573b0cbc5e1eef1e02341dfed8c0906f. --- src/parser.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.rs b/src/parser.rs index cfc4248..3d5b06b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -15,6 +15,7 @@ //! ``` use std; +use std::ascii::AsciiExt; use std::collections::HashMap; use std::mem::replace; use std::ops::Deref; From 7b24769424a72575f60f74104087a00d10ec967d Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 4 Dec 2017 22:20:18 -0500 Subject: [PATCH 14/21] Allow AsciiExt to be unused in newer versions of Rust --- src/parser.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.rs b/src/parser.rs index 3d5b06b..b41cdde 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -15,6 +15,7 @@ //! ``` use std; +#[allow(unused)] // rust-lang/rust#46510 use std::ascii::AsciiExt; use std::collections::HashMap; use std::mem::replace; From 5fc09048585f4a3cab5396f21fd77388503b3b65 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Mon, 4 Dec 2017 22:21:17 -0500 Subject: [PATCH 15/21] Bump to version 0.2.6 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c438542..ac1ab48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sxd-document" -version = "0.2.5" +version = "0.2.6" authors = ["Jake Goulding "] description = "A Rust XML DOM library" From 5bfef623c9d3d181217d362c030b62604ec6dd4b Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Tue, 5 Dec 2017 09:16:04 -0500 Subject: [PATCH 16/21] Update CI configuration to include stable --- .travis.yml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 823de2c..0dc5a6b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,19 +1,13 @@ language: rust sudo: false rust: +- stable - nightly - beta -env: - matrix: - - FEATURES="" - - FEATURES="unstable" - global: - secure: WPH3AcdfXVt8QBD5oNTZtBGDqnV4dt9LA0pxJJ0QjJZgZtjp+HfTG+l6NnX+8C8WPLOubYnphTdGg6tOCq/x5wJpltYMUNubF9ivhavyCkG2UfItyuejdjmf7edeHk3maNqY0cml9L/Gh5ysrf3/ti5+LZXzdnX9QhQvoL1TFZg= matrix: - exclude: - - rust: beta + include: + - rust: nightly env: FEATURES="unstable" script: | cargo build --verbose --features "${FEATURES}" cargo test --verbose --features "${FEATURES}" -after_success: "./deploy_gh_pages.sh" From 943de41c2ff19d956ce8248dffeea08d25d3a7f7 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 28 Feb 2018 19:37:11 -0500 Subject: [PATCH 17/21] Single struct for parsing errors Closes #57 --- src/bin/open.rs | 7 ++-- src/parser.rs | 98 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 38 deletions(-) diff --git a/src/bin/open.rs b/src/bin/open.rs index b90e10e..8fe524c 100644 --- a/src/bin/open.rs +++ b/src/bin/open.rs @@ -25,10 +25,9 @@ fn process_input(input: R) panic!("Can't read: {}", x); } - let package = match parser::parse(&data) { - Ok(d) => d, - Err((point, _)) => panic!("Unable to parse: {}", pretty_error(&data, point)), - }; + let package = parser::parse(&data).unwrap_or_else(|e| { + panic!("Unable to parse: {}", pretty_error(&data, e.location())); + }); // let mut out = io::stdout(); let mut out = io::sink(); diff --git a/src/parser.rs b/src/parser.rs index b41cdde..8a6b8d1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -14,13 +14,12 @@ //! let doc = parser::parse(xml).expect("Failed to parse"); //! ``` -use std; #[allow(unused)] // rust-lang/rust#46510 use std::ascii::AsciiExt; -use std::collections::HashMap; +use std::collections::{BTreeSet, HashMap}; use std::mem::replace; use std::ops::Deref; -use std::{char,iter}; +use std::{char, error, fmt, iter}; use peresil::{self,StringPoint,ParseMaster,Recoverable}; @@ -112,9 +111,9 @@ impl Recoverable for Error { } } -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - use std::error::Error; +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::error::Error; use self::Error::*; match *self { @@ -128,7 +127,7 @@ impl std::fmt::Display for Error { } } -impl std::error::Error for Error { +impl error::Error for Error { fn description(&self) -> &str { use self::Error::*; @@ -1010,9 +1009,51 @@ impl<'d> DomBuilder<'d> { } } +#[derive(Debug, PartialEq, Eq)] +pub struct ParseError { + location: usize, + errors: BTreeSet, +} + +impl ParseError { + fn new(location: usize, error: Error) -> Self { + let mut errors = BTreeSet::new(); + errors.insert(error); + ParseError { location, errors } + } + + pub fn location(&self) -> usize { self.location } +} + +impl From<(usize, Vec)> for ParseError { + fn from(other: (usize, Vec)) -> Self { + let (location, errors) = other; + let errors = errors.into_iter().collect(); + ParseError { location, errors } + } +} + +impl From> for ParseError { + fn from(other: Span) -> Self { + Self::new(other.offset, other.value) + } +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "XML parsing error at {}: {:?}", self.location, self.errors) + } +} + +impl error::Error for ParseError { + fn description(&self) -> &str { + "Unable to parse XML" + } +} + /// Parses a string into a DOM. On failure, the location of the /// parsing failure and all possible failures will be returned. -pub fn parse(xml: &str) -> Result)> { +pub fn parse(xml: &str) -> Result { let parser = PullParser::new(xml); let package = super::Package::new(); @@ -1022,13 +1063,11 @@ pub fn parse(xml: &str) -> Result)> { for token in parser { let token = try!(token); - if let Err(s) = builder.consume(token) { - return Err((s.offset, vec![s.value])); - } + try!(builder.consume(token)); } if builder.has_unclosed_elements() { - return Err((xml.len(), vec![Error::UnclosedElement])); + return Err(ParseError::new(xml.len(), Error::UnclosedElement)); } } @@ -1206,15 +1245,14 @@ impl<'a> DeferredAttributes<'a> { #[cfg(test)] mod test { - use super::Error; - use super::super::{Package,QName}; - use super::super::dom; + use super::*; + use ::{dom, Package, QName}; macro_rules! assert_qname_eq( ($l:expr, $r:expr) => (assert_eq!(Into::::into($l), $r.into())); ); - fn full_parse(xml: &str) -> Result)> { + fn full_parse(xml: &str) -> Result { super::parse(xml) } @@ -1636,28 +1674,12 @@ mod test { // commentbody // pinstructionvalue - type ParseResult = Result)>; - - fn sort_parse_result(e: ParseResult) -> ParseResult - where E: ::std::cmp::Ord - { - match e { - Ok(t) => Ok(t), - Err((p, mut e)) => { - e.sort(); - Err((p, e)) - } - } - } - macro_rules! assert_parse_failure { ($actual:expr, $pos:expr, $($err:expr),+) => { { let errors = vec![$($err),+]; - let constructed = Err(($pos, errors)); - let expected = sort_parse_result(constructed); - let actual = sort_parse_result($actual); - assert_eq!(actual, expected); + let expected = Err(ParseError::from(($pos, errors))); + assert_eq!($actual, expected); } } } @@ -1904,4 +1926,12 @@ mod test { assert_parse_failure!(r, 1, UnknownNamespacePrefix); } + + #[test] + fn failure_is_an_error() { + fn __assert_well_behaved_error() + where + ParseError: ::std::error::Error + Send + Sync + 'static, + {} + } } From 0d572181a617c2c7643f441d134f00eab6ce1ed9 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 28 Feb 2018 20:04:37 -0500 Subject: [PATCH 18/21] Rename types to make the public error just Error --- src/parser.rs | 196 +++++++++++++++++++++++++------------------------- 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 8a6b8d1..995ba0d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -30,7 +30,7 @@ use super::dom; use super::str::XmlStr; #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub enum Error { +enum SpecificError { Expected(&'static str), ExpectedAttribute, @@ -86,9 +86,9 @@ pub enum Error { UnclosedElement, } -impl Recoverable for Error { +impl Recoverable for SpecificError { fn recoverable(&self) -> bool { - use self::Error::*; + use self::SpecificError::*; match *self { ExpectedEncoding | @@ -111,10 +111,10 @@ impl Recoverable for Error { } } -impl fmt::Display for Error { +impl fmt::Display for SpecificError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use self::error::Error; - use self::Error::*; + use self::SpecificError::*; match *self { Expected(s) | @@ -127,9 +127,9 @@ impl fmt::Display for Error { } } -impl error::Error for Error { +impl error::Error for SpecificError { fn description(&self) -> &str { - use self::Error::*; + use self::SpecificError::*; match *self { Expected(_) => "expected", @@ -175,8 +175,8 @@ impl error::Error for Error { } } -type XmlMaster<'a> = peresil::ParseMaster, Error>; -type XmlProgress<'a, T> = peresil::Progress, T, Error>; +type XmlMaster<'a> = peresil::ParseMaster, SpecificError>; +type XmlProgress<'a, T> = peresil::Progress, T, SpecificError>; fn success(data: T, point: StringPoint) -> XmlProgress { peresil::Progress { point: point, status: peresil::Status::Success(data) } @@ -270,7 +270,7 @@ trait PrivateXmlParseExt<'a> { impl<'a> PrivateXmlParseExt<'a> for StringPoint<'a> { fn consume_attribute_value(&self, quote: &str) -> XmlProgress<'a, &'a str> { - self.consume_to(self.s.end_of_attribute(quote)).map_err(|_| Error::ExpectedAttributeValue) + self.consume_to(self.s.end_of_attribute(quote)).map_err(|_| SpecificError::ExpectedAttributeValue) } fn consume_name(&self) -> peresil::Progress, &'a str, ()> { @@ -278,31 +278,31 @@ impl<'a> PrivateXmlParseExt<'a> for StringPoint<'a> { } fn consume_hex_chars(&self) -> XmlProgress<'a, &'a str> { - self.consume_to(self.s.end_of_hex_chars()).map_err(|_| Error::ExpectedHexReferenceValue) + self.consume_to(self.s.end_of_hex_chars()).map_err(|_| SpecificError::ExpectedHexReferenceValue) } fn consume_char_data(&self) -> XmlProgress<'a, &'a str> { - self.consume_to(self.s.end_of_char_data()).map_err(|_| Error::ExpectedCharacterData) + self.consume_to(self.s.end_of_char_data()).map_err(|_| SpecificError::ExpectedCharacterData) } fn consume_cdata(&self) -> XmlProgress<'a, &'a str> { - self.consume_to(self.s.end_of_cdata()).map_err(|_| Error::ExpectedCData) + self.consume_to(self.s.end_of_cdata()).map_err(|_| SpecificError::ExpectedCData) } fn consume_comment(&self) -> XmlProgress<'a, &'a str> { - self.consume_to(self.s.end_of_comment()).map_err(|_| Error::ExpectedCommentBody) + self.consume_to(self.s.end_of_comment()).map_err(|_| SpecificError::ExpectedCommentBody) } fn consume_pi_value(&self) -> XmlProgress<'a, &'a str> { - self.consume_to(self.s.end_of_pi_value()).map_err(|_| Error::ExpectedProcessingInstructionValue) + self.consume_to(self.s.end_of_pi_value()).map_err(|_| SpecificError::ExpectedProcessingInstructionValue) } fn consume_start_tag(&self) -> XmlProgress<'a, &'a str> { - self.consume_to(self.s.end_of_start_tag()).map_err(|_| Error::ExpectedElement) + self.consume_to(self.s.end_of_start_tag()).map_err(|_| SpecificError::ExpectedElement) } fn consume_encoding(&self) -> XmlProgress<'a, &'a str> { - self.consume_to(self.s.end_of_encoding()).map_err(|_| Error::ExpectedEncoding) + self.consume_to(self.s.end_of_encoding()).map_err(|_| SpecificError::ExpectedEncoding) } } @@ -313,11 +313,11 @@ trait X<'a> { impl<'a> X<'a> for StringPoint<'a> { fn expect_space(&self) -> XmlProgress<'a, &'a str> { - self.consume_space().map_err(|_| Error::ExpectedWhitespace) + self.consume_space().map_err(|_| SpecificError::ExpectedWhitespace) } fn expect_literal(&self, s: &'static str) -> XmlProgress<'a, &'a str> { - self.consume_literal(s).map_err(|_| Error::Expected(s)) + self.consume_literal(s).map_err(|_| SpecificError::Expected(s)) } } @@ -369,7 +369,7 @@ impl<'a> PullParser<'a> { } fn parse_comment<'a>(xml: StringPoint<'a>) -> XmlProgress<'a, Token> { - let (xml, _) = try_parse!(xml.consume_literal("")); @@ -381,9 +381,9 @@ fn parse_one_quoted_value<'a, T, F>(xml: StringPoint<'a>, quote: &'static str, f where F: FnMut(StringPoint<'a>) -> XmlProgress<'a, T> { let mut f = f; - let (xml, _) = try_parse!(xml.consume_literal(quote).map_err(|_| Error::ExpectedOpeningQuote(quote))); + let (xml, _) = try_parse!(xml.consume_literal(quote).map_err(|_| SpecificError::ExpectedOpeningQuote(quote))); let (xml, value) = try_parse!(f(xml)); - let (xml, _) = try_parse!(xml.consume_literal(quote).map_err(|_| Error::ExpectedClosingQuote(quote))); + let (xml, _) = try_parse!(xml.consume_literal(quote).map_err(|_| SpecificError::ExpectedClosingQuote(quote))); success(value, xml) } @@ -420,7 +420,7 @@ fn parse_version_info<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> XmlPr let (xml, _) = try_parse!(xml.expect_literal("version")); let (xml, _) = try_parse!(parse_eq(xml)); let (xml, version) = try_parse!( - parse_quoted_value(pm, xml, |_, xml, _| version_num(xml).map_err(|_| Error::ExpectedVersionNumber)) + parse_quoted_value(pm, xml, |_, xml, _| version_num(xml).map_err(|_| SpecificError::ExpectedVersionNumber)) ); success(version, xml) @@ -451,7 +451,7 @@ fn parse_standalone_declaration<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a> .one(|_| xml.expect_literal("yes")) .one(|_| xml.expect_literal("no")) .finish() - .map_err(|_| Error::ExpectedYesNo) + .map_err(|_| SpecificError::ExpectedYesNo) }) ); @@ -481,7 +481,7 @@ fn parse_external_id<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) let (xml, _) = try_parse!(xml.expect_literal("SYSTEM")); let (xml, _) = try_parse!(xml.expect_space()); let (xml, external_id) = try_parse!( - parse_quoted_value(pm, xml, |_, xml, _| xml.consume_name().map_err(|_| Error::ExpectedSystemLiteral)) + parse_quoted_value(pm, xml, |_, xml, _| xml.consume_name().map_err(|_| SpecificError::ExpectedSystemLiteral)) ); success(external_id, xml) @@ -491,7 +491,7 @@ fn parse_external_id<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) fn parse_document_type_declaration<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> XmlProgress<'a, Token<'a>> { let (xml, _) = try_parse!(xml.expect_literal("")); @@ -505,14 +505,14 @@ fn parse_pi_value(xml: StringPoint) -> XmlProgress<&str> { } fn parse_pi<'a>(xml: StringPoint<'a>) -> XmlProgress<'a, Token> { - let (xml, _) = try_parse!(xml.consume_literal("")); if target.eq_ignore_ascii_case("xml") { - return peresil::Progress::failure(target_xml, Error::InvalidProcessingInstructionTarget); + return peresil::Progress::failure(target_xml, SpecificError::InvalidProcessingInstructionTarget); } success(Token::ProcessingInstruction(target, value), xml) @@ -520,7 +520,7 @@ fn parse_pi<'a>(xml: StringPoint<'a>) -> XmlProgress<'a, Token> { fn parse_element_start(xml: StringPoint) -> XmlProgress { let (xml, _) = try_parse!(xml.consume_start_tag()); - let (xml, name) = try_parse!(Span::parse(xml, |xml| xml.consume_prefixed_name().map_err(|_| Error::ExpectedElementName))); + let (xml, name) = try_parse!(Span::parse(xml, |xml| xml.consume_prefixed_name().map_err(|_| SpecificError::ExpectedElementName))); success(Token::ElementStart(name), xml) } @@ -530,7 +530,7 @@ fn parse_element_start_close(xml: StringPoint) -> XmlProgress { xml.consume_literal(">") .map(|_| Token::ElementStartClose) - .map_err(|_| Error::ExpectedElementEnd) + .map_err(|_| SpecificError::ExpectedElementEnd) } fn parse_element_self_close(xml: StringPoint) -> XmlProgress { @@ -538,13 +538,13 @@ fn parse_element_self_close(xml: StringPoint) -> XmlProgress { xml.consume_literal("/>") .map(|_| Token::ElementSelfClose) - .map_err(|_| Error::ExpectedElementSelfClosed) + .map_err(|_| SpecificError::ExpectedElementSelfClosed) } fn parse_element_close(xml: StringPoint) -> XmlProgress { let (xml, _) = try_parse!(xml.expect_literal("")); @@ -558,14 +558,14 @@ const APOS: &'static str = r#"'"#; fn parse_attribute_start<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> XmlProgress<'a, Token<'a>> { let (xml, _) = try_parse!(xml.expect_space()); - let (xml, name) = try_parse!(Span::parse(xml, |xml| xml.consume_prefixed_name().map_err(|_| Error::ExpectedAttribute))); + let (xml, name) = try_parse!(Span::parse(xml, |xml| xml.consume_prefixed_name().map_err(|_| SpecificError::ExpectedAttribute))); let (xml, _) = try_parse!(parse_eq(xml)); let (xml, q) = try_parse!( pm.alternate() - .one(|_| xml.expect_literal(QUOT).map_err(|_| Error::ExpectedOpeningQuote(QUOT))) - .one(|_| xml.expect_literal(APOS).map_err(|_| Error::ExpectedOpeningQuote(APOS))) + .one(|_| xml.expect_literal(QUOT).map_err(|_| SpecificError::ExpectedOpeningQuote(QUOT))) + .one(|_| xml.expect_literal(APOS).map_err(|_| SpecificError::ExpectedOpeningQuote(APOS))) .finish()); let q = if q == QUOT { QUOT } else { APOS }; @@ -576,7 +576,7 @@ fn parse_attribute_start<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> Xm fn parse_attribute_end<'a>(xml: StringPoint<'a>, quote: &'static str) -> XmlProgress<'a, Token<'a>> { xml.consume_literal(quote) .map(|_| Token::AttributeEnd) - .map_err(|_| Error::ExpectedClosingQuote(quote)) + .map_err(|_| SpecificError::ExpectedClosingQuote(quote)) } fn parse_attribute_literal<'a>(xml: StringPoint<'a>, quote: &str) -> XmlProgress<'a, Token<'a>> { @@ -586,23 +586,23 @@ fn parse_attribute_literal<'a>(xml: StringPoint<'a>, quote: &str) -> XmlProgress } fn parse_entity_ref(xml: StringPoint) -> XmlProgress { - let (xml, _) = try_parse!(xml.consume_literal("&").map_err(|_| Error::ExpectedNamedReference)); - let (xml, name) = try_parse!(Span::parse(xml, |xml| xml.consume_name().map_err(|_| Error::ExpectedNamedReferenceValue))); + let (xml, _) = try_parse!(xml.consume_literal("&").map_err(|_| SpecificError::ExpectedNamedReference)); + let (xml, name) = try_parse!(Span::parse(xml, |xml| xml.consume_name().map_err(|_| SpecificError::ExpectedNamedReferenceValue))); let (xml, _) = try_parse!(xml.expect_literal(";")); success(EntityReference(name), xml) } fn parse_decimal_char_ref(xml: StringPoint) -> XmlProgress { - let (xml, _) = try_parse!(xml.consume_literal("&#").map_err(|_| Error::ExpectedDecimalReference)); - let (xml, dec) = try_parse!(Span::parse(xml, |xml| xml.consume_decimal_chars().map_err(|_| Error::ExpectedDecimalReferenceValue))); + let (xml, _) = try_parse!(xml.consume_literal("&#").map_err(|_| SpecificError::ExpectedDecimalReference)); + let (xml, dec) = try_parse!(Span::parse(xml, |xml| xml.consume_decimal_chars().map_err(|_| SpecificError::ExpectedDecimalReferenceValue))); let (xml, _) = try_parse!(xml.expect_literal(";")); success(DecimalCharReference(dec), xml) } fn parse_hex_char_ref(xml: StringPoint) -> XmlProgress { - let (xml, _) = try_parse!(xml.consume_literal("&#x").map_err(|_| Error::ExpectedHexReference)); + let (xml, _) = try_parse!(xml.consume_literal("&#x").map_err(|_| SpecificError::ExpectedHexReference)); let (xml, hex) = try_parse!(Span::parse(xml, |xml| xml.consume_hex_chars())); let (xml, _) = try_parse!(xml.expect_literal(";")); @@ -642,7 +642,7 @@ fn parse_content_reference<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> } impl<'a> Iterator for PullParser<'a> { - type Item = Result, (usize, Vec)>; + type Item = Result, (usize, Vec)>; fn next(&mut self) -> Option { let pm = &mut self.pm; @@ -848,7 +848,7 @@ impl<'d> DomBuilder<'d> { let value = try!(AttributeValueBuilder::convert(&ns.values)); if value.is_empty() { - return Err(ns.name.map(|_| Error::EmptyNamespace)); + return Err(ns.name.map(|_| SpecificError::EmptyNamespace)); } new_prefix_mappings.insert(ns.name.value.local_part, value); @@ -866,7 +866,7 @@ impl<'d> DomBuilder<'d> { element.set_preferred_prefix(Some(prefix)); element } else { - return Err(deferred_element.map(|_| Error::UnknownNamespacePrefix)); + return Err(deferred_element.map(|_| SpecificError::UnknownNamespacePrefix)); } } else if let Some(ns_uri) = default_namespace { if ns_uri.is_empty() { @@ -912,7 +912,7 @@ impl<'d> DomBuilder<'d> { let attr = element.set_attribute_value((ns_uri, name.local_part), &builder); attr.set_preferred_prefix(Some(prefix)); } else { - return Err(attribute.name.map(|_| Error::UnknownNamespacePrefix)) + return Err(attribute.name.map(|_| SpecificError::UnknownNamespacePrefix)) } } else { element.set_attribute_value(name.local_part, &builder); @@ -965,7 +965,7 @@ impl<'d> DomBuilder<'d> { self.elements.pop(); if n.value != open_name.value { - return Err(n.map(|_| Error::MismatchedElementEndName)); + return Err(n.map(|_| SpecificError::MismatchedElementEndName)); } }, @@ -1010,42 +1010,42 @@ impl<'d> DomBuilder<'d> { } #[derive(Debug, PartialEq, Eq)] -pub struct ParseError { +pub struct Error { location: usize, - errors: BTreeSet, + errors: BTreeSet, } -impl ParseError { - fn new(location: usize, error: Error) -> Self { +impl Error { + fn new(location: usize, error: SpecificError) -> Self { let mut errors = BTreeSet::new(); errors.insert(error); - ParseError { location, errors } + Error { location, errors } } pub fn location(&self) -> usize { self.location } } -impl From<(usize, Vec)> for ParseError { - fn from(other: (usize, Vec)) -> Self { +impl From<(usize, Vec)> for Error { + fn from(other: (usize, Vec)) -> Self { let (location, errors) = other; let errors = errors.into_iter().collect(); - ParseError { location, errors } + Error { location, errors } } } -impl From> for ParseError { - fn from(other: Span) -> Self { +impl From> for Error { + fn from(other: Span) -> Self { Self::new(other.offset, other.value) } } -impl fmt::Display for ParseError { +impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "XML parsing error at {}: {:?}", self.location, self.errors) } } -impl error::Error for ParseError { +impl error::Error for Error { fn description(&self) -> &str { "Unable to parse XML" } @@ -1053,7 +1053,7 @@ impl error::Error for ParseError { /// Parses a string into a DOM. On failure, the location of the /// parsing failure and all possible failures will be returned. -pub fn parse(xml: &str) -> Result { +pub fn parse(xml: &str) -> Result { let parser = PullParser::new(xml); let package = super::Package::new(); @@ -1067,14 +1067,14 @@ pub fn parse(xml: &str) -> Result { } if builder.has_unclosed_elements() { - return Err(ParseError::new(xml.len(), Error::UnclosedElement)); + return Err(Error::new(xml.len(), SpecificError::UnclosedElement)); } } Ok(package) } -type DomBuilderResult = Result>; +type DomBuilderResult = Result>; fn decode_reference(ref_data: Reference, cb: F) -> DomBuilderResult<()> where F: FnOnce(&str) @@ -1083,7 +1083,7 @@ fn decode_reference(ref_data: Reference, cb: F) -> DomBuilderResult<()> DecimalCharReference(span) => { u32::from_str_radix(span.value, 10).ok() .and_then(char::from_u32) - .ok_or(span.map(|_| Error::InvalidDecimalReference)) + .ok_or(span.map(|_| SpecificError::InvalidDecimalReference)) .and_then(|c| { let s: String = iter::repeat(c).take(1).collect(); cb(&s); @@ -1093,7 +1093,7 @@ fn decode_reference(ref_data: Reference, cb: F) -> DomBuilderResult<()> HexCharReference(span) => { u32::from_str_radix(span.value, 16).ok() .and_then(char::from_u32) - .ok_or(span.map(|_| Error::InvalidHexReference)) + .ok_or(span.map(|_| SpecificError::InvalidHexReference)) .and_then(|c| { let s: String = iter::repeat(c).take(1).collect(); cb(&s); @@ -1107,7 +1107,7 @@ fn decode_reference(ref_data: Reference, cb: F) -> DomBuilderResult<()> "gt" => ">", "apos" => "'", "quot" => "\"", - _ => return Err(span.map(|_| Error::UnknownNamedReference)), + _ => return Err(span.map(|_| SpecificError::UnknownNamedReference)), }; cb(s); Ok(()) @@ -1206,13 +1206,13 @@ impl<'a> DeferredAttributes<'a> { fn check_duplicates(&self) -> DomBuilderResult<()> { for w in self.attributes.windows(2) { if w[0].name.value == w[1].name.value { - return Err(w[1].name.map(|_| Error::DuplicateAttribute)); + return Err(w[1].name.map(|_| SpecificError::DuplicateAttribute)); } } for w in self.namespaces.windows(2) { if w[0].name.value == w[1].name.value { - return Err(w[1].name.map(|_| Error::RedefinedNamespace)); + return Err(w[1].name.map(|_| SpecificError::RedefinedNamespace)); } } @@ -1237,7 +1237,7 @@ impl<'a> DeferredAttributes<'a> { }, _ => { let last_namespace = self.default_namespaces.last().unwrap(); - Err(last_namespace.name.map(|_| Error::RedefinedDefaultNamespace)) + Err(last_namespace.name.map(|_| SpecificError::RedefinedDefaultNamespace)) }, } } @@ -1252,7 +1252,7 @@ mod test { ($l:expr, $r:expr) => (assert_eq!(Into::::into($l), $r.into())); ); - fn full_parse(xml: &str) -> Result { + fn full_parse(xml: &str) -> Result { super::parse(xml) } @@ -1678,7 +1678,7 @@ mod test { ($actual:expr, $pos:expr, $($err:expr),+) => { { let errors = vec![$($err),+]; - let expected = Err(ParseError::from(($pos, errors))); + let expected = Err(Error::from(($pos, errors))); assert_eq!($actual, expected); } } @@ -1686,7 +1686,7 @@ mod test { #[test] fn failure_invalid_encoding() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1695,7 +1695,7 @@ mod test { #[test] fn failure_invalid_standalone() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1704,7 +1704,7 @@ mod test { #[test] fn failure_no_open_brace() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse("hi />"); @@ -1713,7 +1713,7 @@ mod test { #[test] fn failure_unclosed_tag() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1731,7 +1731,7 @@ mod test { #[test] fn failure_attribute_without_open_quote() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1740,7 +1740,7 @@ mod test { #[test] fn failure_attribute_without_close_quote() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1767,7 +1767,7 @@ mod test { #[test] fn failure_missing_close_tag() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse("wow"); @@ -1776,7 +1776,7 @@ mod test { #[test] fn failure_nested_unexpected_space() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1785,7 +1785,7 @@ mod test { #[test] fn failure_malformed_entity_reference() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse("Entity: &;"); @@ -1794,7 +1794,7 @@ mod test { #[test] fn failure_nested_malformed_entity_reference() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse("Entity: &;"); @@ -1803,7 +1803,7 @@ mod test { #[test] fn failure_nested_attribute_without_open_quote() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1812,7 +1812,7 @@ mod test { #[test] fn failure_nested_attribute_without_close_quote() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1830,7 +1830,7 @@ mod test { #[test] fn failure_pi_target_as_xml() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1839,7 +1839,7 @@ mod test { #[test] fn failure_end_tag_does_not_match() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1848,7 +1848,7 @@ mod test { #[test] fn failure_invalid_decimal_reference() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1857,7 +1857,7 @@ mod test { #[test] fn failure_invalid_hex_reference() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1866,7 +1866,7 @@ mod test { #[test] fn failure_unknown_named_reference() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse("&fake;"); @@ -1875,7 +1875,7 @@ mod test { #[test] fn failure_duplicate_attribute() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1884,7 +1884,7 @@ mod test { #[test] fn failure_redefined_namespace() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1893,7 +1893,7 @@ mod test { #[test] fn failure_redefined_default_namespace() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1902,7 +1902,7 @@ mod test { #[test] fn failure_empty_namespace() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1911,7 +1911,7 @@ mod test { #[test] fn failure_unknown_attribute_namespace_prefix() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1920,7 +1920,7 @@ mod test { #[test] fn failure_unknown_element_namespace_prefix() { - use super::Error::*; + use super::SpecificError::*; let r = full_parse(""); @@ -1931,7 +1931,7 @@ mod test { fn failure_is_an_error() { fn __assert_well_behaved_error() where - ParseError: ::std::error::Error + Send + Sync + 'static, + Error: ::std::error::Error + Send + Sync + 'static, {} } } From 6a78cbb9d9be8b2a1c0fcd0bea285fd1b55894ee Mon Sep 17 00:00:00 2001 From: Owen Nelson Date: Sat, 10 Mar 2018 14:24:26 -0800 Subject: [PATCH 19/21] Fixes DTD parser issue resulting in `ExpectedClosingQuote` error. Certain characters (such as `/`) in the document type declaration (DTD) could cause the parser to fail. This updates `parse_external_id()` to use a slightly more permissive function to consume the attribute string all the way to the closing quote. --- src/parser.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 995ba0d..634c10c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -481,8 +481,10 @@ fn parse_external_id<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) let (xml, _) = try_parse!(xml.expect_literal("SYSTEM")); let (xml, _) = try_parse!(xml.expect_space()); let (xml, external_id) = try_parse!( - parse_quoted_value(pm, xml, |_, xml, _| xml.consume_name().map_err(|_| SpecificError::ExpectedSystemLiteral)) - ); + parse_quoted_value(pm, xml, |_, xml, quote| + xml.consume_attribute_value(quote).map_err(|_| SpecificError::ExpectedSystemLiteral) + ) + ); success(external_id, xml) } @@ -1311,7 +1313,9 @@ mod test { #[test] fn a_prolog_with_a_document_type_declaration() { - let package = quick_parse(""); + let package = quick_parse(r#" + + "#); let doc = package.as_document(); let top = top(&doc); From d12be50f0c2d77ac320912abe8ddc1b3405a6fa2 Mon Sep 17 00:00:00 2001 From: Owen Nelson Date: Sat, 10 Mar 2018 23:18:18 -0800 Subject: [PATCH 20/21] Extend DTD parsing to handle more internal DTDs refs #50 --- src/parser.rs | 129 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/str.rs | 9 ++++ 2 files changed, 134 insertions(+), 4 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 634c10c..81375a2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -58,6 +58,7 @@ enum SpecificError { ExpectedWhitespace, ExpectedDocumentTypeName, + ExpectedIntSubset, ExpectedSystemLiteral, ExpectedClosingQuote(&'static str), @@ -151,6 +152,7 @@ impl error::Error for SpecificError { ExpectedYesNo => "expected yes or no", ExpectedWhitespace => "expected whitespace", ExpectedDocumentTypeName => "expected document type name", + ExpectedIntSubset => "expected int subset", ExpectedSystemLiteral => "expected system literal", ExpectedClosingQuote(_) => "expected closing quote", ExpectedOpeningQuote(_) => "expected opening quote", @@ -262,6 +264,7 @@ trait PrivateXmlParseExt<'a> { fn consume_hex_chars(&self) -> XmlProgress<'a, &'a str>; fn consume_char_data(&self) -> XmlProgress<'a, &'a str>; fn consume_cdata(&self) -> XmlProgress<'a, &'a str>; + fn consume_int_subset(&self) -> XmlProgress<'a, &'a str>; fn consume_comment(&self) -> XmlProgress<'a, &'a str>; fn consume_pi_value(&self) -> XmlProgress<'a, &'a str>; fn consume_start_tag(&self) -> XmlProgress<'a, &'a str>; @@ -289,6 +292,10 @@ impl<'a> PrivateXmlParseExt<'a> for StringPoint<'a> { self.consume_to(self.s.end_of_cdata()).map_err(|_| SpecificError::ExpectedCData) } + fn consume_int_subset(&self) -> XmlProgress<'a, &'a str> { + self.consume_to(self.s.end_of_int_subset()).map_err(|_| SpecificError::ExpectedIntSubset) + } + fn consume_comment(&self) -> XmlProgress<'a, &'a str> { self.consume_to(self.s.end_of_comment()).map_err(|_| SpecificError::ExpectedCommentBody) } @@ -489,13 +496,30 @@ fn parse_external_id<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) success(external_id, xml) } -/* without the optional intSubset */ +fn parse_int_subset<'a>(_pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) + -> XmlProgress<'a, &'a str> +{ + let (xml, _) = try_parse!(xml.expect_literal("[")); + let (xml, _) = xml.consume_space().optional(xml); + let (xml, elements) = try_parse!( + xml.consume_int_subset().map_err(|_| SpecificError::ExpectedIntSubset) + ); + let (xml, _) = xml.consume_space().optional(xml); + let (xml, _) = try_parse!(xml.expect_literal("]")); + let (xml, _) = xml.consume_space().optional(xml); + + success(elements, xml) +} + fn parse_document_type_declaration<'a>(pm: &mut XmlMaster<'a>, xml: StringPoint<'a>) -> XmlProgress<'a, Token<'a>> { let (xml, _) = try_parse!(xml.expect_literal("")); success(Token::DocumentTypeDeclaration, xml) @@ -1312,7 +1336,7 @@ mod test { } #[test] - fn a_prolog_with_a_document_type_declaration() { + fn a_prolog_with_a_doc_type_declaration_external_id() { let package = quick_parse(r#" "#); @@ -1322,6 +1346,103 @@ mod test { assert_qname_eq!(top.name(), "hello"); } + #[test] + fn a_prolog_with_a_doc_type_declaration_int_subset() { + let package = quick_parse(r#" + + + + + + ]> + + Tove + Jani + Reminder + Don't forget me this weekend + + "#); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "note"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_int_subset_trailing_ws() { + let package = quick_parse(r#" + + + + + + ] + + > + + Tove + Jani + Reminder + Don't forget me this weekend + + "#); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "note"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_zero_def() { + let package = quick_parse(" + + "); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "hello"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_zero_def_trailing_ws() { + let package = quick_parse(" + + "); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "hello"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_both_int_subset_and_external_id() { + let package = quick_parse(r#" + + ]> + "#); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "hello"); + } + + #[test] + fn a_prolog_with_a_doc_type_declaration_both_int_subset_and_external_id_trailing_ws() { + let package = quick_parse(r#" + + ] > + "#); + let doc = package.as_document(); + let top = top(&doc); + + assert_qname_eq!(top.name(), "hello"); + } + #[test] fn a_document_with_a_single_element() { let package = quick_parse(""); diff --git a/src/str.rs b/src/str.rs index cd6fa71..a62031a 100644 --- a/src/str.rs +++ b/src/str.rs @@ -49,6 +49,8 @@ pub trait XmlStr { /// Find the end of the starting tag fn end_of_start_tag(&self) -> Option; fn end_of_encoding(&self) -> Option; + /// Find the end of the internal doc type declaration, not including the ] + fn end_of_int_subset(&self) -> Option; } impl<'a> XmlStr for &'a str { @@ -143,6 +145,8 @@ impl<'a> XmlStr for &'a str { fn end_of_encoding(&self) -> Option { self.end_of_start_rest(|c| c.is_encoding_start_char(), |c| c.is_encoding_rest_char()) } + + fn end_of_int_subset(&self) -> Option { self.find("]") } } /// Predicates used when parsing an characters in an XML document. @@ -297,4 +301,9 @@ mod test { fn end_of_char_data_includes_multiple_right_squares() { assert_eq!("hello]]world".end_of_char_data(), Some("hello]]world".len())); } + + #[test] + fn end_of_int_subset_excludes_right_square() { + assert_eq!("hello]>world".end_of_int_subset(), Some("hello".len())) + } } From 1987153cccc01dc635557e4c65ff4ea116a4c681 Mon Sep 17 00:00:00 2001 From: Jake Goulding Date: Wed, 4 Apr 2018 20:21:18 -0400 Subject: [PATCH 21/21] Remove very old calls predating `Result::unwrap` --- src/bin/open.rs | 2 +- src/writer.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bin/open.rs b/src/bin/open.rs index 8fe524c..307448e 100644 --- a/src/bin/open.rs +++ b/src/bin/open.rs @@ -33,7 +33,7 @@ fn process_input(input: R) let mut out = io::sink(); let d = package.as_document(); - sxd_document::writer::format_document(&d, &mut out).ok().expect("I can't output"); + sxd_document::writer::format_document(&d, &mut out).expect("I can't output"); // Remove when we move back to stdout_raw + buffer or when stdout flushed at program exit out.flush().unwrap(); } diff --git a/src/writer.rs b/src/writer.rs index c81176a..8212a53 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -13,7 +13,7 @@ //! doc.root().append_child(hello); //! //! let mut output = Vec::new(); -//! format_document(&doc, &mut output).ok().expect("unable to output XML"); +//! format_document(&doc, &mut output).expect("unable to output XML"); //! ``` //! //! ### Potential options to support