Merge pull request #767 from Mingun/move-to-integration

Convert some unit tests to integration tests
tafia · Jun 23, 2024 · 649f3d8 · 649f3d8
2 parents 2659775 + a24ed89
commit 649f3d8
Show file tree

Hide file tree

Showing 11 changed files with 812 additions and 827 deletions.
diff --git a/src/de/mod.rs b/src/de/mod.rs
@@ -2165,8 +2165,9 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
  replace(&mut self.lookahead, self.reader.next())
  }
 
+ /// Returns `true` when next event is not a text event in any form.
  #[inline(always)]
- const fn need_trim_end(&self) -> bool {
+ const fn current_event_is_last_text(&self) -> bool {
  // If next event is a text or CDATA, we should not trim trailing spaces
  !matches!(
  self.lookahead,
@@ -2182,43 +2183,27 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
  /// [`CData`]: PayloadEvent::CData
  fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
  loop {
- match self.lookahead {
- Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => {
- let text = self.next_text()?;
-
- let mut s = result.into_owned();
- s += &text;
- result = Cow::Owned(s);
- }
- _ => break,
+ if self.current_event_is_last_text() {
+ break;
  }
- }
- Ok(DeEvent::Text(Text { text: result }))
- }
 
- /// Read one text event, panics if current event is not a text event
- ///
- /// |Event |XML |Handling
- /// |-----------------------|---------------------------|----------------------------------------
- /// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
- /// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
- /// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
- /// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
- /// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
- #[inline(always)]
- fn next_text(&mut self) -> Result<Cow<'i, str>, DeError> {
- match self.next_impl()? {
- PayloadEvent::Text(mut e) => {
- if self.need_trim_end() {
- e.inplace_trim_end();
+ match self.next_impl()? {
+ PayloadEvent::Text(mut e) => {
+ if self.current_event_is_last_text() {
+ // FIXME: Actually, we should trim after decoding text, but now we trim before
+ e.inplace_trim_end();
+ }
+ result
+ .to_mut()
+ .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
  }
- Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
- }
- PayloadEvent::CData(e) => Ok(e.decode()?),
+ PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
 
- // SAFETY: this method is called only when we peeked Text or CData
- _ => unreachable!("Only `Text` and `CData` events can come here"),
+ // SAFETY: current_event_is_last_text checks that event is Text or CData
+ _ => unreachable!("Only `Text` and `CData` events can come here"),
+ }
  }
+ Ok(DeEvent::Text(Text { text: result }))
  }
 
  /// Return an input-borrowing event.
@@ -2228,7 +2213,8 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
  PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
  PayloadEvent::End(e) => Ok(DeEvent::End(e)),
  PayloadEvent::Text(mut e) => {
- if self.need_trim_end() && e.inplace_trim_end() {
+ if self.current_event_is_last_text() && e.inplace_trim_end() {
+ // FIXME: Actually, we should trim after decoding text, but now we trim before
  continue;
  }
  self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)

diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs
@@ -359,7 +359,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
 #[cfg(test)]
 mod test {
  use super::TokioAdapter;
- use crate::reader::test::{check, small_buffers};
+ use crate::reader::test::check;
 
  check!(
  #[tokio::test]
@@ -370,12 +370,6 @@ mod test {
  async, await
  );
 
- small_buffers!(
- #[tokio::test]
- read_event_into_async: tokio::io::BufReader<_>,
- async, await
- );
-
  #[test]
  fn test_future_is_send() {
  // This test should just compile, no actual runtime checks are performed here.

diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs
@@ -445,7 +445,7 @@ impl Reader<BufReader<File>> {
 
 #[cfg(test)]
 mod test {
- use crate::reader::test::{check, small_buffers};
+ use crate::reader::test::check;
  use crate::reader::XmlSource;
 
  /// Default buffer constructor just pass the byte array from the test
@@ -460,59 +460,4 @@ mod test {
  identity,
  &mut Vec::new()
  );
-
- small_buffers!(
- #[test]
- read_event_into: std::io::BufReader<_>
- );
-
- #[cfg(feature = "encoding")]
- mod encoding {
- use crate::events::Event;
- use crate::reader::Reader;
- use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251};
- use pretty_assertions::assert_eq;
-
- /// Checks that encoding is detected by BOM and changed after XML declaration
- /// BOM indicates UTF-16LE, but XML - windows-1251
- #[test]
- fn bom_detected() {
- let mut reader =
- Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
- let mut buf = Vec::new();
-
- assert_eq!(reader.decoder().encoding(), UTF_8);
- assert!(matches!(
- reader.read_event_into(&mut buf).unwrap(),
- Event::Decl(_)
- ));
- assert_eq!(reader.decoder().encoding(), WINDOWS_1251);
-
- assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
- }
-
- /// Checks that encoding is changed by XML declaration, but only once
- #[test]
- fn xml_declaration() {
- let mut reader = Reader::from_reader(
- b"<?xml encoding='UTF-16'?><?xml encoding='windows-1251'?>".as_ref(),
- );
- let mut buf = Vec::new();
-
- assert_eq!(reader.decoder().encoding(), UTF_8);
- assert!(matches!(
- reader.read_event_into(&mut buf).unwrap(),
- Event::Decl(_)
- ));
- assert_eq!(reader.decoder().encoding(), UTF_16LE);
-
- assert!(matches!(
- reader.read_event_into(&mut buf).unwrap(),
- Event::Decl(_)
- ));
- assert_eq!(reader.decoder().encoding(), UTF_16LE);
-
- assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
- }
- }
 }
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
@@ -1826,157 +1826,8 @@ mod test {
  };
  }
 
- /// Tests for https://github.com/tafia/quick-xml/issues/469
- macro_rules! small_buffers {
- (
- #[$test:meta]
- $read_event:ident: $BufReader:ty
- $(, $async:ident, $await:ident)?
- ) => {
- mod small_buffers {
- use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
- use crate::reader::Reader;
- use pretty_assertions::assert_eq;
-
- #[$test]
- $($async)? fn decl() {
- let xml = "<?xml ?>";
- // ^^^^^^^ data that fit into buffer
- let size = xml.match_indices("?>").next().unwrap().0 + 1;
- let br = <$BufReader>::with_capacity(size, xml.as_bytes());
- let mut reader = Reader::from_reader(br);
- let mut buf = Vec::new();
-
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
- );
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Eof
- );
- }
-
- #[$test]
- $($async)? fn pi() {
- let xml = "<?pi?>";
- // ^^^^^ data that fit into buffer
- let size = xml.match_indices("?>").next().unwrap().0 + 1;
- let br = <$BufReader>::with_capacity(size, xml.as_bytes());
- let mut reader = Reader::from_reader(br);
- let mut buf = Vec::new();
-
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::PI(BytesPI::new("pi"))
- );
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Eof
- );
- }
-
- #[$test]
- $($async)? fn empty() {
- let xml = "<empty/>";
- // ^^^^^^^ data that fit into buffer
- let size = xml.match_indices("/>").next().unwrap().0 + 1;
- let br = <$BufReader>::with_capacity(size, xml.as_bytes());
- let mut reader = Reader::from_reader(br);
- let mut buf = Vec::new();
-
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Empty(BytesStart::new("empty"))
- );
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Eof
- );
- }
-
- #[$test]
- $($async)? fn cdata1() {
- let xml = "<![CDATA[cdata]]>";
- // ^^^^^^^^^^^^^^^ data that fit into buffer
- let size = xml.match_indices("]]>").next().unwrap().0 + 1;
- let br = <$BufReader>::with_capacity(size, xml.as_bytes());
- let mut reader = Reader::from_reader(br);
- let mut buf = Vec::new();
-
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::CData(BytesCData::new("cdata"))
- );
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Eof
- );
- }
-
- #[$test]
- $($async)? fn cdata2() {
- let xml = "<![CDATA[cdata]]>";
- // ^^^^^^^^^^^^^^^^ data that fit into buffer
- let size = xml.match_indices("]]>").next().unwrap().0 + 2;
- let br = <$BufReader>::with_capacity(size, xml.as_bytes());
- let mut reader = Reader::from_reader(br);
- let mut buf = Vec::new();
-
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::CData(BytesCData::new("cdata"))
- );
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Eof
- );
- }
-
- #[$test]
- $($async)? fn comment1() {
- let xml = "<!--comment-->";
- // ^^^^^^^^^^^^ data that fit into buffer
- let size = xml.match_indices("-->").next().unwrap().0 + 1;
- let br = <$BufReader>::with_capacity(size, xml.as_bytes());
- let mut reader = Reader::from_reader(br);
- let mut buf = Vec::new();
-
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Comment(BytesText::new("comment"))
- );
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Eof
- );
- }
-
- #[$test]
- $($async)? fn comment2() {
- let xml = "<!--comment-->";
- // ^^^^^^^^^^^^^ data that fit into buffer
- let size = xml.match_indices("-->").next().unwrap().0 + 2;
- let br = <$BufReader>::with_capacity(size, xml.as_bytes());
- let mut reader = Reader::from_reader(br);
- let mut buf = Vec::new();
-
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Comment(BytesText::new("comment"))
- );
- assert_eq!(
- reader.$read_event(&mut buf) $(.$await)? .unwrap(),
- Event::Eof
- );
- }
- }
- };
- }
-
  // Export macros for the child modules:
  // - buffered_reader
  // - slice_reader
  pub(super) use check;
- pub(super) use small_buffers;
 }
diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs
@@ -376,25 +376,4 @@ mod test {
  identity,
  ()
  );
-
- #[cfg(feature = "encoding")]
- mod encoding {
- use crate::events::Event;
- use crate::reader::Reader;
- use encoding_rs::UTF_8;
- use pretty_assertions::assert_eq;
-
- /// Checks that XML declaration cannot change the encoding from UTF-8 if
- /// a `Reader` was created using `from_str` method
- #[test]
- fn str_always_has_utf8() {
- let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");
-
- assert_eq!(reader.decoder().encoding(), UTF_8);
- reader.read_event().unwrap();
- assert_eq!(reader.decoder().encoding(), UTF_8);
-
- assert_eq!(reader.read_event().unwrap(), Event::Eof);
- }
- }
 }
diff --git a/src/reader/state.rs b/src/reader/state.rs
@@ -71,9 +71,20 @@ impl ReaderState {
  BytesText::wrap(content, self.decoder())
  }
 
- /// reads `BytesElement` starting with a `!`,
- /// return `Comment`, `CData` or `DocType` event
+ /// Returns `Comment`, `CData` or `DocType` event.
+ ///
+ /// `buf` contains data between `<` and `>`:
+ /// - CDATA: `![CDATA[...]]`
+ /// - Comment: `!--...--`
+ /// - Doctype (uppercase): `!D...`
+ /// - Doctype (lowercase): `!d...`
  pub fn emit_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result<Event<'b>> {
+ debug_assert_eq!(
+ buf.first(),
+ Some(&b'!'),
+ "CDATA, comment or DOCTYPE should start from '!'"
+ );
+
  let uncased_starts_with = |string: &[u8], prefix: &[u8]| {
  string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix)
  };
@@ -153,7 +164,15 @@ impl ReaderState {
 
  /// Wraps content of `buf` into the [`Event::End`] event. Does the check that
  /// end name matches the last opened start name if `self.config.check_end_names` is set.
+ ///
+ /// `buf` contains data between `<` and `>`, for example `/tag`.
  pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
+ debug_assert_eq!(
+ buf.first(),
+ Some(&b'/'),
+ "closing tag should start from '/'"
+ );
+
  // Strip the `/` character. `content` contains data between `</` and `>`
  let content = &buf[1..];
  // XML standard permits whitespaces after the markup name in closing tags.