{
/// (_, Event::Start(_)) => unreachable!(),
///
/// (_, Event::Text(e)) => {
- /// txt.push(e.unescape().unwrap().into_owned())
+ /// txt.push(e.decode().unwrap().into_owned())
/// }
/// (_, Event::Eof) => break,
/// _ => (),
@@ -664,7 +664,7 @@ impl<'i> NsReader<&'i [u8]> {
/// }
/// }
/// Event::Text(e) => {
- /// txt.push(e.unescape().unwrap().into_owned())
+ /// txt.push(e.decode().unwrap().into_owned())
/// }
/// Event::Eof => break,
/// _ => (),
@@ -726,7 +726,7 @@ impl<'i> NsReader<&'i [u8]> {
/// (_, Event::Start(_)) => unreachable!(),
///
/// (_, Event::Text(e)) => {
- /// txt.push(e.unescape().unwrap().into_owned())
+ /// txt.push(e.decode().unwrap().into_owned())
/// }
/// (_, Event::Eof) => break,
/// _ => (),
diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs
index 08287592..311edf6a 100644
--- a/src/reader/slice_reader.rs
+++ b/src/reader/slice_reader.rs
@@ -14,7 +14,7 @@ use crate::errors::{Error, Result};
use crate::events::Event;
use crate::name::QName;
use crate::parser::Parser;
-use crate::reader::{BangType, ReadTextResult, Reader, Span, XmlSource};
+use crate::reader::{BangType, ReadRefResult, ReadTextResult, Reader, Span, XmlSource};
use crate::utils::is_whitespace;
/// This is an implementation for reading from a `&[u8]` as underlying byte stream.
@@ -62,7 +62,7 @@ impl<'a> Reader<&'a [u8]> {
/// loop {
/// match reader.read_event().unwrap() {
/// Event::Start(e) => count += 1,
- /// Event::Text(e) => txt.push(e.unescape().unwrap().into_owned()),
+ /// Event::Text(e) => txt.push(e.decode().unwrap().into_owned()),
/// Event::Eof => break,
/// _ => (),
/// }
@@ -263,27 +263,79 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
#[inline]
fn read_text(&mut self, _buf: (), position: &mut u64) -> ReadTextResult<'a, ()> {
- match memchr::memchr(b'<', self) {
- Some(0) => {
- *position += 1;
+ // Search for start of markup or an entity or character reference
+ match memchr::memchr2(b'<', b'&', self) {
+ Some(0) if self[0] == b'<' => {
*self = &self[1..];
+ *position += 1;
ReadTextResult::Markup(())
}
- Some(i) => {
- *position += i as u64 + 1;
+ // Do not consume `&` because it may be lone and we would be need to
+ // return it as part of Text event
+ Some(0) => ReadTextResult::Ref(()),
+ Some(i) if self[i] == b'<' => {
let bytes = &self[..i];
*self = &self[i + 1..];
+ *position += i as u64 + 1;
ReadTextResult::UpToMarkup(bytes)
}
+ Some(i) => {
+ let (bytes, rest) = self.split_at(i);
+ *self = rest;
+ *position += i as u64;
+ ReadTextResult::UpToRef(bytes)
+ }
None => {
- *position += self.len() as u64;
let bytes = &self[..];
*self = &[];
+ *position += bytes.len() as u64;
ReadTextResult::UpToEof(bytes)
}
}
}
+ #[inline]
+ fn read_ref(&mut self, _buf: (), position: &mut u64) -> ReadRefResult<'a> {
+ debug_assert_eq!(
+ self.first(),
+ Some(&b'&'),
+ "`read_ref` must be called at `&`"
+ );
+ // Search for the end of reference or a start of another reference or a markup
+ match memchr::memchr3(b';', b'&', b'<', &self[1..]) {
+ // Do not consume `&` because it may be lone and we would be need to
+ // return it as part of Text event
+ Some(i) if self[i + 1] == b'&' => {
+ let (bytes, rest) = self.split_at(i + 1);
+ *self = rest;
+ *position += i as u64 + 1;
+
+ ReadRefResult::UpToRef(bytes)
+ }
+ Some(i) => {
+ let end = i + 1;
+ let is_end = self[end] == b';';
+ let bytes = &self[..end];
+ // +1 -- skip the end `;` or `<`
+ *self = &self[end + 1..];
+ *position += end as u64 + 1;
+
+ if is_end {
+ ReadRefResult::Ref(bytes)
+ } else {
+ ReadRefResult::UpToMarkup(bytes)
+ }
+ }
+ None => {
+ let bytes = &self[..];
+ *self = &[];
+ *position += bytes.len() as u64;
+
+ ReadRefResult::UpToEof(bytes)
+ }
+ }
+ }
+
#[inline]
fn read_with(&mut self, mut parser: P, _buf: (), position: &mut u64) -> Result<&'a [u8]>
where
diff --git a/src/writer.rs b/src/writer.rs
index 19d120bf..f0a6a97d 100644
--- a/src/writer.rs
+++ b/src/writer.rs
@@ -221,6 +221,7 @@ impl Writer {
Event::Decl(e) => self.write_wrapped(b"", &e, b"?>"),
Event::PI(e) => self.write_wrapped(b"", &e, b"?>"),
Event::DocType(e) => self.write_wrapped(b""),
+ Event::GeneralRef(e) => self.write_wrapped(b"&", &e, b";"),
Event::Eof => Ok(()),
};
if let Some(i) = self.indent.as_mut() {
diff --git a/src/writer/async_tokio.rs b/src/writer/async_tokio.rs
index dab4c5b2..4f1c79a6 100644
--- a/src/writer/async_tokio.rs
+++ b/src/writer/async_tokio.rs
@@ -40,6 +40,7 @@ impl Writer {
Event::Decl(e) => self.write_wrapped_async(b"", &e, b"?>").await,
Event::PI(e) => self.write_wrapped_async(b"", &e, b"?>").await,
Event::DocType(e) => self.write_wrapped_async(b"").await,
+ Event::GeneralRef(e) => self.write_wrapped_async(b"&", &e, b";").await,
Event::Eof => Ok(()),
};
if let Some(i) = self.indent.as_mut() {
diff --git a/tests/async-tokio.rs b/tests/async-tokio.rs
index 25ec86bc..94003c0c 100644
--- a/tests/async-tokio.rs
+++ b/tests/async-tokio.rs
@@ -29,18 +29,19 @@ async fn test_sample() {
loop {
reads += 1;
assert!(
- reads <= 5245,
+ reads <= 10000,
"too many events, possible infinity loop: {reads}"
);
- match reader.read_event_into_async(&mut buf).await.unwrap() {
- Start(_) => count += 1,
- Decl(e) => assert_eq!(e.version().unwrap(), b"1.0".as_ref()),
- Eof => break,
- _ => (),
+ match reader.read_event_into_async(&mut buf).await {
+ Ok(Start(_)) => count += 1,
+ Ok(Decl(e)) => assert_eq!(e.version().unwrap(), b"1.0".as_ref()),
+ Ok(Eof) => break,
+ Ok(_) => (),
+ Err(e) => panic!("{} at {}", e, reader.error_position()),
}
buf.clear();
}
- assert_eq!((count, reads), (1247, 5245));
+ assert_eq!((count, reads), (1247, 5457));
}
/// This tests checks that read_to_end() correctly returns span even when
diff --git a/tests/documents/html5.txt b/tests/documents/html5.txt
index 05f200d4..de0a5b43 100644
--- a/tests/documents/html5.txt
+++ b/tests/documents/html5.txt
@@ -5,6 +5,8 @@ StartElement(a, attr-error: position 7: attribute value must be enclosed in `"`
Characters(Hey)
EndElement(a)
Characters(
-
+)
+Reference(nbsp)
+Characters(
)
EndDocument
diff --git a/tests/encodings.rs b/tests/encodings.rs
index 5f5676fa..7b64e167 100644
--- a/tests/encodings.rs
+++ b/tests/encodings.rs
@@ -37,7 +37,7 @@ fn test_koi8_r_encoding() {
loop {
match r.read_event_into(&mut buf) {
Ok(Text(e)) => {
- e.unescape().unwrap();
+ e.decode().unwrap();
}
Ok(Eof) => break,
_ => (),
diff --git a/tests/fuzzing.rs b/tests/fuzzing.rs
index 2740763c..25cf6989 100644
--- a/tests/fuzzing.rs
+++ b/tests/fuzzing.rs
@@ -38,7 +38,7 @@ fn fuzz_101() {
}
}
Ok(Event::Text(e)) => {
- if e.unescape().is_err() {
+ if e.decode().is_err() {
break;
}
}
diff --git a/tests/html.rs b/tests/html.rs
index 19688064..b93c788a 100644
--- a/tests/html.rs
+++ b/tests/html.rs
@@ -21,7 +21,12 @@ fn escaped_characters_html() {
r#"╔╗╔╗╔╗"#,
r#"
|StartElement(e [attr="ℏÈℓ𝕝⨀"])
- |Characters(╔╗╔╗╔╗)
+ |Reference(boxDR)
+ |Reference(boxDL)
+ |Reference(#x02554)
+ |Reference(#x02557)
+ |Reference(#9556)
+ |Reference(#9559)
|EndElement(e)
|EndDocument
"#,
@@ -86,6 +91,10 @@ fn test_bytes(input: &[u8], output: &[u8], trim: bool) {
Ok(c) => format!("Characters({})", &c),
Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err),
},
+ Ok((_, Event::GeneralRef(e))) => match unescape(&decoder.decode(&e).unwrap()) {
+ Ok(c) => format!("Reference({})", &c),
+ Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err),
+ },
Ok((_, Event::Eof)) => "EndDocument".to_string(),
Err(e) => format!("Error: {}", e),
};
diff --git a/tests/reader-config.rs b/tests/reader-config.rs
index 8796075e..09f820a3 100644
--- a/tests/reader-config.rs
+++ b/tests/reader-config.rs
@@ -6,9 +6,75 @@
//! Please keep tests sorted (exceptions are allowed if options are tightly related).
use quick_xml::errors::{Error, IllFormedError};
-use quick_xml::events::{BytesCData, BytesEnd, BytesPI, BytesStart, BytesText, Event};
+use quick_xml::events::{BytesCData, BytesEnd, BytesPI, BytesRef, BytesStart, BytesText, Event};
use quick_xml::reader::Reader;
+mod allow_dangling_amp {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn false_() {
+ let mut reader = Reader::from_str("&&<&");
+ reader.config_mut().allow_dangling_amp = false;
+
+ match reader.read_event() {
+ Err(Error::IllFormed(cause)) => {
+ assert_eq!(cause, IllFormedError::UnclosedReference);
+ }
+ x => panic!("Expected `Err(Syntax(_))`, but got `{:?}`", x),
+ }
+ assert_eq!(reader.error_position()..reader.buffer_position(), 0..1);
+
+ match reader.read_event() {
+ Err(Error::IllFormed(cause)) => {
+ assert_eq!(cause, IllFormedError::UnclosedReference);
+ }
+ x => panic!("Expected `Err(Syntax(_))`, but got `{:?}`", x),
+ }
+ assert_eq!(reader.error_position()..reader.buffer_position(), 1..2);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Event::GeneralRef(BytesRef::new("lt"))
+ );
+ match reader.read_event() {
+ Err(Error::IllFormed(cause)) => {
+ assert_eq!(cause, IllFormedError::UnclosedReference);
+ }
+ x => panic!("Expected `Err(Syntax(_))`, but got `{:?}`", x),
+ }
+ assert_eq!(reader.error_position()..reader.buffer_position(), 6..7);
+
+ assert_eq!(reader.read_event().unwrap(), Event::Eof);
+ assert_eq!(reader.error_position()..reader.buffer_position(), 6..7);
+ }
+
+ #[test]
+ fn true_() {
+ let mut reader = Reader::from_str("&&<&");
+ reader.config_mut().allow_dangling_amp = true;
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Event::Text(BytesText::from_escaped("&"))
+ );
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Event::Text(BytesText::from_escaped("&"))
+ );
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Event::GeneralRef(BytesRef::new("lt"))
+ );
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Event::Text(BytesText::from_escaped("&"))
+ );
+ assert_eq!(reader.read_event().unwrap(), Event::Eof);
+ }
+}
+
mod allow_unmatched_ends {
use super::*;
use pretty_assertions::assert_eq;
diff --git a/tests/reader-errors.rs b/tests/reader-errors.rs
index 8f9c578e..0eecca7d 100644
--- a/tests/reader-errors.rs
+++ b/tests/reader-errors.rs
@@ -896,4 +896,29 @@ mod ill_formed {
// ^= 5
err!(double_hyphen_in_comment4("") => 5: IllFormedError::DoubleHyphenInComment);
// ^= 5
+
+ mod reference {
+ use super::*;
+ use quick_xml::events::BytesRef;
+
+ err2!(unclosed1(".&") => 1: IllFormedError::UnclosedReference);
+ err2!(unclosed2(".&x") => 1: IllFormedError::UnclosedReference);
+ err2!(unclosed_num(".") => 1: IllFormedError::UnclosedReference);
+ err2!(unclosed_dec(".") => 1: IllFormedError::UnclosedReference);
+ err2!(unclosed_hex1(".") => 1: IllFormedError::UnclosedReference);
+ err2!(unclosed_hex2(".") => 1: IllFormedError::UnclosedReference);
+
+ // We do not check correctness of references during parsing
+ ok!(empty("&;") => 2: Event::GeneralRef(BytesRef::new("")));
+ ok!(normal1("&x;") => 3: Event::GeneralRef(BytesRef::new("x")));
+ ok!(normal2("&x;rest") => 3: Event::GeneralRef(BytesRef::new("x")));
+ ok!(num("") => 3: Event::GeneralRef(BytesRef::new("#")));
+ ok!(dec("") => 4: Event::GeneralRef(BytesRef::new("#2")));
+ ok!(hex1("") => 4: Event::GeneralRef(BytesRef::new("#x")));
+ ok!(hex2("") => 5: Event::GeneralRef(BytesRef::new("#xF")));
+
+ // XML specification explicitly allowed any number of leading zeroes
+ ok!(long_dec(" ") => 44: Event::GeneralRef(BytesRef::new("#00000000000000000000000000000000000000032")));
+ ok!(long_hex(" ") => 45: Event::GeneralRef(BytesRef::new("#x00000000000000000000000000000000000000020")));
+ }
}
diff --git a/tests/reader-references.rs b/tests/reader-references.rs
new file mode 100644
index 00000000..b0f3456e
--- /dev/null
+++ b/tests/reader-references.rs
@@ -0,0 +1,546 @@
+use quick_xml::events::{
+ BytesCData, BytesDecl, BytesEnd, BytesPI, BytesRef, BytesStart, BytesText, Event::*,
+};
+use quick_xml::reader::Reader;
+
+use pretty_assertions::assert_eq;
+
+mod character_reference {
+ use super::*;
+
+ mod dec {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn decl() {
+ for i in 0..=0x10FFFF {
+ let input = format!("");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Decl(BytesDecl::new(&format!("&{i};"), None, None)),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn pi() {
+ for i in 0..=0x10FFFF {
+ let input = format!("&{i};?>");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ PI(BytesPI::new(&format!("&{i};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn doctype() {
+ for i in 0..=0x10FFFF {
+ let input = format!("");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ DocType(BytesText::from_escaped(&format!("&{i};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn comment() {
+ for i in 0..=0x10FFFF {
+ let input = format!("");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Comment(BytesText::from_escaped(&format!("&{i};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn cdata() {
+ for i in 0..=0x10FFFF {
+ let input = format!("");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ CData(BytesCData::new(format!("&{i};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn text() {
+ for i in 0..=0x10FFFF {
+ let input = format!("&{i};");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ GeneralRef(BytesRef::new(format!("{i}"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn empty() {
+ for i in 0u32..=0x10FFFF {
+ let input = format!("<&{i}; &{i};='&{i};' &{i};=\"&{i};\" &{i};=&{i};/>");
+ let mut reader = Reader::from_str(&input);
+
+ let name_len = format!("&{i};").len();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Empty(BytesStart::from_content(
+ format!("&{i}; &{i};='&{i};' &{i};=\"&{i};\" &{i};=&{i};"),
+ name_len
+ )),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn start() {
+ for i in 0..=0x10FFFF {
+ let input = format!("<&{i}; &{i};='&{i};' &{i};=\"&{i};\" &{i};=&{i};>");
+ let mut reader = Reader::from_str(&input);
+
+ let name_len = format!("&{i};").len();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Start(BytesStart::from_content(
+ format!("&{i}; &{i};='&{i};' &{i};=\"&{i};\" &{i};=&{i};"),
+ name_len
+ )),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn end() {
+ for i in 0..=0x10FFFF {
+ let input = format!("<>&{i};>");
+ let mut reader = Reader::from_str(&input);
+ reader.config_mut().check_end_names = false;
+
+ // Skip <>
+ reader.read_event().unwrap();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ End(BytesEnd::new(format!("&{i};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+ }
+
+ mod hex {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn decl() {
+ for i in 0..=0x10FFFF {
+ let input = format!("");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Decl(BytesDecl::new(&format!("{i:x};"), None, None)),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn pi() {
+ for i in 0..=0x10FFFF {
+ let input = format!("{i:x};?>");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ PI(BytesPI::new(&format!("{i:x};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn doctype() {
+ for i in 0..=0x10FFFF {
+ let input = format!("");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ DocType(BytesText::from_escaped(&format!("{i:x};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn comment() {
+ for i in 0..=0x10FFFF {
+ let input = format!("");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Comment(BytesText::from_escaped(&format!("{i:x};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn cdata() {
+ for i in 0..=0x10FFFF {
+ let input = format!("");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ CData(BytesCData::new(format!("{i:x};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn text() {
+ for i in 0..=0x10FFFF {
+ let input = format!("{i:x};");
+ let mut reader = Reader::from_str(&input);
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ GeneralRef(BytesRef::new(format!("#{i:x}"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn empty() {
+ for i in 0u32..=0x10FFFF {
+ let input = format!(
+ "<{i:x}; {i:x};='{i:x};' {i:x};=\"{i:x};\" {i:x};={i:x};/>"
+ );
+ let mut reader = Reader::from_str(&input);
+
+ let name_len = format!("{i:x};").len();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Empty(BytesStart::from_content(
+ format!(
+ "{i:x}; {i:x};='{i:x};' {i:x};=\"{i:x};\" {i:x};={i:x};"
+ ),
+ name_len
+ )),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn start() {
+ for i in 0..=0x10FFFF {
+ let input = format!(
+ "<{i:x}; {i:x};='{i:x};' {i:x};=\"{i:x};\" {i:x};={i:x};>"
+ );
+ let mut reader = Reader::from_str(&input);
+
+ let name_len = format!("{i:x};").len();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Start(BytesStart::from_content(
+ format!(
+ "{i:x}; {i:x};='{i:x};' {i:x};=\"{i:x};\" {i:x};={i:x};"
+ ),
+ name_len
+ )),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+
+ #[test]
+ fn end() {
+ for i in 0..=0x10FFFF {
+ let input = format!("<>{i:x};>");
+ let mut reader = Reader::from_str(&input);
+ reader.config_mut().check_end_names = false;
+
+ // Skip <>
+ reader.read_event().unwrap();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ End(BytesEnd::new(format!("{i:x};"))),
+ "Character reference {i}=0x{i:x}: {input}"
+ );
+ }
+ }
+ }
+}
+
+mod general_entity_reference {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn decl() {
+ let mut reader = Reader::from_str("");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Decl(BytesDecl::new("&entity;", None, None)),
+ );
+ }
+
+ #[test]
+ fn pi() {
+ let mut reader = Reader::from_str("&entity;?>");
+
+ assert_eq!(reader.read_event().unwrap(), PI(BytesPI::new("&entity;")));
+ }
+
+ #[test]
+ fn doctype() {
+ let mut reader = Reader::from_str("");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ DocType(BytesText::from_escaped("&entity;")),
+ );
+ }
+
+ #[test]
+ fn comment() {
+ let mut reader = Reader::from_str("");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Comment(BytesText::from_escaped("&entity;")),
+ );
+ }
+
+ #[test]
+ fn cdata() {
+ let mut reader = Reader::from_str("");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ CData(BytesCData::new("&entity;")),
+ );
+ }
+
+ #[test]
+ fn text() {
+ let mut reader = Reader::from_str("&entity;");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ GeneralRef(BytesRef::new("entity")),
+ );
+ }
+
+ #[test]
+ fn empty() {
+ let mut reader = Reader::from_str(
+ "<&entity; &entity;='&entity;' &entity;=\"&entity;\" &entity;=&entity;/>",
+ );
+
+ let name_len = "&entity;".len();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Empty(BytesStart::from_content(
+ "&entity; &entity;='&entity;' &entity;=\"&entity;\" &entity;=&entity;",
+ name_len
+ )),
+ );
+ }
+
+ #[test]
+ fn start() {
+ let mut reader = Reader::from_str(
+ "<&entity; &entity;='&entity;' &entity;=\"&entity;\" &entity;=&entity;>",
+ );
+
+ let name_len = "&entity;".len();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Start(BytesStart::from_content(
+ "&entity; &entity;='&entity;' &entity;=\"&entity;\" &entity;=&entity;",
+ name_len
+ )),
+ );
+ }
+
+ #[test]
+ fn end() {
+ let mut reader = Reader::from_str("<>&entity;>");
+ reader.config_mut().check_end_names = false;
+
+ // Skip <>
+ reader.read_event().unwrap();
+ assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("&entity;")));
+ }
+}
+
+/// _Parameter entity references_ are references to entities recognized within DTD.
+/// That references recognized [only] inside DTD (`` declaration) and have a
+/// form `%name;` (percent sign, name, semicolon).
+///
+/// Parameter entities are so-called _parsed entities_, i.e. the content of this
+/// reference is a part of DTD and MUST follow DTD grammar after all substitutions.
+/// That also means that DTD could be self-modified.
+///
+/// In those tests, however, parameter entity references are not recognized.
+///
+/// [only]: https://www.w3.org/TR/xml11/#indtd
+mod parameter_entity_reference {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn decl() {
+ let mut reader = Reader::from_str("");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Decl(BytesDecl::new("%param;", None, None)),
+ );
+ }
+
+ #[test]
+ fn pi() {
+ let mut reader = Reader::from_str("%param;?>");
+
+ assert_eq!(reader.read_event().unwrap(), PI(BytesPI::new("%param;")));
+ }
+
+ /// Because we do not parse DTD, we do not recognize parameter reference here yet.
+ /// TODO: Recognize parameter entity references when DTD parsing will be implemented
+ #[test]
+ fn doctype() {
+ let mut reader = Reader::from_str("");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ DocType(BytesText::from_escaped("%param;")),
+ );
+ }
+
+ /// Comments can be part of DTD, but parameter entity references does not recognized within them.
+ ///
+ /// See:
+ #[test]
+ fn comment() {
+ let mut reader = Reader::from_str("");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Comment(BytesText::from_escaped("%param;")),
+ );
+ }
+
+ #[test]
+ fn cdata() {
+ let mut reader = Reader::from_str("");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ CData(BytesCData::new("%param;")),
+ );
+ }
+
+ #[test]
+ fn text() {
+ let mut reader = Reader::from_str("%param;");
+
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Text(BytesText::from_escaped("%param;")),
+ );
+ }
+
+ #[test]
+ fn empty() {
+ let mut reader =
+ Reader::from_str("<%param; %param;='%param;' %param;=\"%param;\" %param;=%param;/>");
+
+ let name_len = "%param;".len();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Empty(BytesStart::from_content(
+ "%param; %param;='%param;' %param;=\"%param;\" %param;=%param;",
+ name_len
+ )),
+ );
+ }
+
+ #[test]
+ fn start() {
+ let mut reader =
+ Reader::from_str("<%param; %param;='%param;' %param;=\"%param;\" %param;=%param;>");
+
+ let name_len = "%param;".len();
+ assert_eq!(
+ reader.read_event().unwrap(),
+ Start(BytesStart::from_content(
+ "%param; %param;='%param;' %param;=\"%param;\" %param;=%param;",
+ name_len
+ )),
+ );
+ }
+
+ #[test]
+ fn end() {
+ let mut reader = Reader::from_str("<>%param;>");
+ reader.config_mut().check_end_names = false;
+
+ // Skip <>
+ reader.read_event().unwrap();
+ assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("%param;")));
+ }
+}
+
+#[test]
+fn mixed_text() {
+ let input = "text with <&' ' or ' '";
+ let mut r = Reader::from_str(input);
+
+ assert_eq!(
+ r.read_event().unwrap(),
+ Text(BytesText::from_escaped("text with "))
+ );
+ assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("lt")));
+ assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("amp")));
+ assert_eq!(r.read_event().unwrap(), Text(BytesText::from_escaped("'")));
+ assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("#32")));
+ assert_eq!(
+ r.read_event().unwrap(),
+ Text(BytesText::from_escaped("' or '"))
+ );
+ assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("#x20")));
+ assert_eq!(r.read_event().unwrap(), Text(BytesText::from_escaped("'")));
+ assert_eq!(r.read_event().unwrap(), Eof);
+}
diff --git a/tests/reader.rs b/tests/reader.rs
index 2bc27e57..fecdeabc 100644
--- a/tests/reader.rs
+++ b/tests/reader.rs
@@ -1,6 +1,6 @@
use std::str::from_utf8;
-use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event::*};
+use quick_xml::events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event::*};
use quick_xml::name::QName;
use quick_xml::reader::Reader;
@@ -163,16 +163,17 @@ fn test_escaped_content() {
let mut r = Reader::from_str("<test>");
assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("a")));
+ assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("lt")));
match r.read_event() {
Ok(Text(e)) => {
assert_eq!(
&*e,
- b"<test>",
- "content unexpected: expecting '<test>', got '{:?}'",
+ b"test",
+ "content unexpected: expecting 'test', got '{:?}'",
from_utf8(&e)
);
- match e.unescape() {
- Ok(c) => assert_eq!(c, ""),
+ match e.decode() {
+ Ok(c) => assert_eq!(c, "test"),
Err(e) => panic!(
"cannot escape content at position {}: {:?}",
r.error_position(),
@@ -187,6 +188,7 @@ fn test_escaped_content() {
e
),
}
+ assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("gt")));
assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a")));
}
diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs
index 68726195..4fb9ec53 100644
--- a/tests/roundtrip.rs
+++ b/tests/roundtrip.rs
@@ -236,7 +236,7 @@ fn reescape_text() {
match reader.read_event().unwrap() {
Eof => break,
Text(e) => {
- let t = e.unescape().unwrap();
+ let t = e.decode().unwrap();
assert!(writer.write_event(Text(BytesText::new(&t))).is_ok());
}
e => assert!(writer.write_event(e).is_ok()),