diff --git a/benches/microbenches.rs b/benches/microbenches.rs index aa5c8b70..be0df142 100644 --- a/benches/microbenches.rs +++ b/benches/microbenches.rs @@ -30,7 +30,7 @@ fn read_event(c: &mut Criterion) { group.bench_function("trim_text = false", |b| { b.iter(|| { let mut r = Reader::from_str(SAMPLE); - r.check_end_names(false).check_comments(false); + r.check_end_names(false); let mut count = criterion::black_box(0); loop { match r.read_event() { @@ -49,9 +49,7 @@ fn read_event(c: &mut Criterion) { group.bench_function("trim_text = true", |b| { b.iter(|| { let mut r = Reader::from_str(SAMPLE); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); + r.trim_text(true).check_end_names(false); let mut count = criterion::black_box(0); loop { match r.read_event() { @@ -76,7 +74,7 @@ fn read_resolved_event_into(c: &mut Criterion) { group.bench_function("trim_text = false", |b| { b.iter(|| { let mut r = NsReader::from_str(SAMPLE); - r.check_end_names(false).check_comments(false); + r.check_end_names(false); let mut count = criterion::black_box(0); loop { match r.read_resolved_event() { @@ -95,9 +93,7 @@ fn read_resolved_event_into(c: &mut Criterion) { group.bench_function("trim_text = true", |b| { b.iter(|| { let mut r = NsReader::from_str(SAMPLE); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); + r.trim_text(true).check_end_names(false); let mut count = criterion::black_box(0); loop { match r.read_resolved_event() { @@ -124,9 +120,7 @@ fn one_event(c: &mut Criterion) { b.iter(|| { let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); + r.trim_text(true).check_end_names(false); match r.read_event() { Ok(Event::Start(ref e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), @@ -141,9 +135,7 @@ fn one_event(c: &mut Criterion) { b.iter(|| { let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); + r.trim_text(true).check_end_names(false); match r.read_event() { Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(), something_else => panic!("Did not expect {:?}", something_else), @@ -158,9 +150,7 @@ fn one_event(c: &mut Criterion) { b.iter(|| { let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); - r.check_end_names(false) - .check_comments(false) - .trim_text(true); + r.trim_text(true).check_end_names(false); match r.read_event() { Ok(Event::CData(ref e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), @@ -178,7 +168,7 @@ fn attributes(c: &mut Criterion) { group.bench_function("with_checks = true", |b| { b.iter(|| { let mut r = Reader::from_str(PLAYERS); - r.check_end_names(false).check_comments(false); + r.check_end_names(false); let mut count = criterion::black_box(0); loop { match r.read_event() { @@ -199,7 +189,7 @@ fn attributes(c: &mut Criterion) { group.bench_function("with_checks = false", |b| { b.iter(|| { let mut r = Reader::from_str(PLAYERS); - r.check_end_names(false).check_comments(false); + r.check_end_names(false); let mut count = criterion::black_box(0); loop { match r.read_event() { @@ -220,7 +210,7 @@ fn attributes(c: &mut Criterion) { group.bench_function("try_get_attribute", |b| { b.iter(|| { let mut r = Reader::from_str(PLAYERS); - r.check_end_names(false).check_comments(false); + r.check_end_names(false); let mut count = criterion::black_box(0); loop { match r.read_event() { diff --git a/compare/benches/bench.rs b/compare/benches/bench.rs index c8a2e37b..8b2a736f 100644 --- a/compare/benches/bench.rs +++ b/compare/benches/bench.rs @@ -59,7 +59,7 @@ fn low_level_comparison(c: &mut Criterion) { |b, input| { b.iter(|| { let mut r = Reader::from_reader(input.as_bytes()); - r.check_end_names(false).check_comments(false); + r.check_end_names(false); let mut count = criterion::black_box(0); let mut buf = Vec::new(); loop { diff --git a/src/de/mod.rs b/src/de/mod.rs index b97b19db..c3068b9e 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -3055,6 +3055,11 @@ mod tests { use super::*; use pretty_assertions::assert_eq; + fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> { + dbg!(source); + Deserializer::from_str(source) + } + #[cfg(feature = "overlapped-lists")] mod skip { use super::*; @@ -3065,7 +3070,7 @@ mod tests { /// Checks that `peek()` and `read()` behaves correctly after `skip()` #[test] fn read_and_peek() { - let mut de = Deserializer::from_str( + let mut de = make_de( r#" @@ -3196,7 +3201,7 @@ mod tests { /// Checks that `read_to_end()` behaves correctly after `skip()` #[test] fn read_to_end() { - let mut de = Deserializer::from_str( + let mut de = make_de( r#" @@ -3289,7 +3294,7 @@ mod tests { /// Test for https://github.com/tafia/quick-xml/issues/435 #[test] fn partial_replay() { - let mut de = Deserializer::from_str( + let mut de = make_de( r#" @@ -3495,7 +3500,7 @@ mod tests { item: Vec<()>, } - let mut de = Deserializer::from_str( + let mut de = make_de( r#" @@ -3521,7 +3526,7 @@ mod tests { fn invalid_xml() { use crate::de::DeEvent::*; - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); // Cache all events let checkpoint = de.skip_checkpoint(); @@ -3538,7 +3543,7 @@ mod tests { #[test] fn complex() { - let mut de = Deserializer::from_str( + let mut de = make_de( r#" textcontent @@ -3572,7 +3577,7 @@ mod tests { #[test] fn invalid_xml1() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag"))); @@ -3586,7 +3591,7 @@ mod tests { #[test] fn invalid_xml2() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); assert_eq!(de.peek().unwrap(), &Text("".into())); @@ -3716,43 +3721,43 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str("text"); + let mut de = make_de("text"); assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); } #[test] fn cdata() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into())); } #[test] fn text_and_cdata() { - let mut de = Deserializer::from_str("text and "); + let mut de = make_de("text and "); assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into())); } #[test] fn text_and_empty_cdata() { - let mut de = Deserializer::from_str("text and "); + let mut de = make_de("text and "); assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into())); } #[test] fn cdata_and_text() { - let mut de = Deserializer::from_str(" and text"); + let mut de = make_de(" and text"); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into())); } #[test] fn empty_cdata_and_text() { - let mut de = Deserializer::from_str(" and text"); + let mut de = make_de(" and text"); assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into())); } #[test] fn cdata_and_cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ cdata]]>\ @@ -3767,7 +3772,7 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ text \ \ @@ -3779,7 +3784,7 @@ mod tests { #[test] fn cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ \ @@ -3791,7 +3796,7 @@ mod tests { #[test] fn text_and_cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ text \ \ @@ -3803,7 +3808,7 @@ mod tests { #[test] fn text_and_empty_cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ text \ \ @@ -3815,7 +3820,7 @@ mod tests { #[test] fn cdata_and_text() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ \ @@ -3827,7 +3832,7 @@ mod tests { #[test] fn empty_cdata_and_text() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ \ @@ -3839,7 +3844,7 @@ mod tests { #[test] fn cdata_and_cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ \ @@ -3856,7 +3861,7 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ text \ \ @@ -3868,7 +3873,7 @@ mod tests { #[test] fn cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ \ @@ -3880,7 +3885,7 @@ mod tests { #[test] fn text_and_cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ text \ \ @@ -3892,7 +3897,7 @@ mod tests { #[test] fn text_and_empty_cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ text \ \ @@ -3904,7 +3909,7 @@ mod tests { #[test] fn cdata_and_text() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ \ @@ -3916,7 +3921,7 @@ mod tests { #[test] fn empty_cdata_and_text() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ \ @@ -3928,7 +3933,7 @@ mod tests { #[test] fn cdata_and_cdata() { - let mut de = Deserializer::from_str( + let mut de = make_de( "\ \ \ @@ -3958,7 +3963,7 @@ mod tests { #[test] fn start() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3"))); @@ -3968,7 +3973,7 @@ mod tests { /// Not matching end tag will result to error #[test] fn end() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2"))); @@ -3977,7 +3982,7 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); // Text is trimmed from both sides @@ -3987,7 +3992,7 @@ mod tests { #[test] fn cdata() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); @@ -3996,7 +4001,7 @@ mod tests { #[test] fn eof() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4011,7 +4016,7 @@ mod tests { #[test] fn start() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); @@ -4020,7 +4025,7 @@ mod tests { #[test] fn end() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); match de.next() { @@ -4035,7 +4040,7 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); // Text is trimmed from both sides @@ -4045,7 +4050,7 @@ mod tests { #[test] fn cdata() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); @@ -4054,7 +4059,7 @@ mod tests { #[test] fn eof() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4069,7 +4074,7 @@ mod tests { #[test] fn start() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); @@ -4079,7 +4084,7 @@ mod tests { #[test] fn end() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); @@ -4091,7 +4096,7 @@ mod tests { #[test] fn cdata() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from the start assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into())); @@ -4100,7 +4105,7 @@ mod tests { #[test] fn eof() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); @@ -4116,7 +4121,7 @@ mod tests { #[test] fn start() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); @@ -4125,7 +4130,7 @@ mod tests { #[test] fn end() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); @@ -4134,7 +4139,7 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from the end assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into())); @@ -4143,8 +4148,7 @@ mod tests { #[test] fn cdata() { - let mut de = - Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4152,7 +4156,7 @@ mod tests { #[test] fn eof() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4164,7 +4168,7 @@ mod tests { /// Start from End event will always generate an error #[test] fn end() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); match de.next() { Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => { assert_eq!(expected, ""); @@ -4185,7 +4189,7 @@ mod tests { #[test] fn start() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); @@ -4196,7 +4200,7 @@ mod tests { /// Not matching end tag will result in error #[test] fn end() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); @@ -4206,7 +4210,7 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str(" text text2 "); + let mut de = make_de(" text text2 "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); @@ -4217,7 +4221,7 @@ mod tests { #[test] fn cdata() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); @@ -4228,7 +4232,7 @@ mod tests { #[test] fn eof() { // Text is trimmed from both sides - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4239,7 +4243,7 @@ mod tests { /// End event without corresponding start event will always generate an error #[test] fn end() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); match de.next() { @@ -4260,7 +4264,7 @@ mod tests { #[test] fn start() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from the start assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); @@ -4269,7 +4273,7 @@ mod tests { #[test] fn end() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from the start assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into())); match de.next() { @@ -4284,7 +4288,7 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str(" text text2 "); + let mut de = make_de(" text text2 "); // Text is trimmed from the start and from the end assert_eq!( de.next().unwrap(), @@ -4295,8 +4299,7 @@ mod tests { #[test] fn cdata() { - let mut de = - Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from the start assert_eq!( de.next().unwrap(), @@ -4307,7 +4310,7 @@ mod tests { #[test] fn eof() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from the start assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4326,7 +4329,7 @@ mod tests { #[test] fn start() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); @@ -4336,7 +4339,7 @@ mod tests { /// Not matching end tag will result in error #[test] fn end() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); @@ -4345,7 +4348,7 @@ mod tests { #[test] fn text() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides @@ -4355,8 +4358,7 @@ mod tests { #[test] fn cdata() { - let mut de = - Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into())); @@ -4365,7 +4367,7 @@ mod tests { #[test] fn eof() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4376,7 +4378,7 @@ mod tests { /// End event without corresponding start event will always generate an error #[test] fn end() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); match de.next() { Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => { @@ -4394,7 +4396,7 @@ mod tests { #[test] fn start() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from the end assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); @@ -4403,7 +4405,7 @@ mod tests { #[test] fn end() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from the end assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into())); match de.next() { @@ -4420,8 +4422,7 @@ mod tests { #[test] fn cdata() { - let mut de = - Deserializer::from_str(" text "); + let mut de = make_de(" text "); assert_eq!( de.next().unwrap(), DeEvent::Text(" cdata text cdata2 ".into()) @@ -4431,7 +4432,7 @@ mod tests { #[test] fn eof() { - let mut de = Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from the end assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4445,8 +4446,7 @@ mod tests { #[test] fn start() { - let mut de = - Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); @@ -4454,8 +4454,7 @@ mod tests { #[test] fn end() { - let mut de = - Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); match de.next() { Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => { @@ -4469,8 +4468,7 @@ mod tests { #[test] fn text() { - let mut de = - Deserializer::from_str(" text "); + let mut de = make_de(" text "); // Text is trimmed from the end assert_eq!( de.next().unwrap(), @@ -4481,9 +4479,8 @@ mod tests { #[test] fn cdata() { - let mut de = Deserializer::from_str( - "", - ); + let mut de = + make_de(""); assert_eq!( de.next().unwrap(), DeEvent::Text(" cdata cdata2 cdata3 ".into()) @@ -4493,7 +4490,7 @@ mod tests { #[test] fn eof() { - let mut de = Deserializer::from_str(""); + let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index c3cec060..a91eb185 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -274,6 +274,7 @@ impl Reader { /// # Examples /// /// ``` + /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 95deaf06..d024fe18 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -7,7 +7,7 @@ use std::ops::Range; use crate::encoding::Decoder; use crate::errors::{Error, Result}; use crate::events::Event; -use crate::reader::parser::Parser; +use crate::reader::state::ReaderState; use memchr; @@ -31,7 +31,7 @@ macro_rules! configure_methods { /// [`End`]: Event::End /// [`check_end_names`]: Self::check_end_names pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .parser.expand_empty_elements = val; + self $(.$holder)? .state.expand_empty_elements = val; self } @@ -58,8 +58,8 @@ macro_rules! configure_methods { /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end pub fn trim_text(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .parser.trim_text_start = val; - self $(.$holder)? .parser.trim_text_end = val; + self $(.$holder)? .state.trim_text_start = val; + self $(.$holder)? .state.trim_text_end = val; self } @@ -83,7 +83,7 @@ macro_rules! configure_methods { /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end pub fn trim_text_end(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .parser.trim_text_end = val; + self $(.$holder)? .state.trim_text_end = val; self } @@ -99,7 +99,7 @@ macro_rules! configure_methods { /// /// [`End`]: Event::End pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .parser.trim_markup_names_in_closing_tags = val; + self $(.$holder)? .state.trim_markup_names_in_closing_tags = val; self } @@ -137,7 +137,7 @@ macro_rules! configure_methods { /// [`End`]: Event::End /// [`expand_empty_elements`]: Self::expand_empty_elements pub fn check_end_names(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .parser.check_end_names = val; + self $(.$holder)? .state.check_end_names = val; self } @@ -152,7 +152,7 @@ macro_rules! configure_methods { /// /// [`Comment`]: Event::Comment pub fn check_comments(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .parser.check_comments = val; + self $(.$holder)? .state.check_comments = val; self } }; @@ -167,7 +167,7 @@ macro_rules! read_event_impl { $(, $await:ident)? ) => {{ let event = loop { - match $self.parser.state { + match $self.state.state { ParseState::Init => { // Go to OpenedTag state // If encoding set explicitly, we not need to detect it. For example, // explicit UTF-8 set automatically if Reader was created using `from_str`. @@ -175,8 +175,8 @@ macro_rules! read_event_impl { // feature enabled path #[cfg(feature = "encoding")] if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? { - if $self.parser.encoding.can_be_refined() { - $self.parser.encoding = crate::reader::EncodingRef::BomDetected(encoding); + if $self.state.encoding.can_be_refined() { + $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding); } } @@ -200,12 +200,12 @@ macro_rules! read_event_impl { }, // Go to ClosedTag state in next two arms ParseState::OpenedTag => break $self.$read_until_close($buf) $(.$await)?, - ParseState::Empty => break $self.parser.close_expanded_empty(), + ParseState::Empty => break $self.state.close_expanded_empty(), ParseState::Exit => break Ok(Event::Eof), }; }; match event { - Err(_) | Ok(Event::Eof) => $self.parser.state = ParseState::Exit, + Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Exit, _ => {} } event @@ -228,24 +228,24 @@ macro_rules! read_until_open { $read_event:ident $(, $await:ident)? ) => {{ - $self.parser.state = ParseState::OpenedTag; + $self.state.state = ParseState::OpenedTag; - if $self.parser.trim_text_start { - $reader.skip_whitespace(&mut $self.parser.offset) $(.$await)? ?; + if $self.state.trim_text_start { + $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?; } // If we already at the `<` symbol, do not try to return an empty Text event - if $reader.skip_one(b'<', &mut $self.parser.offset) $(.$await)? ? { + if $reader.skip_one(b'<', &mut $self.state.offset) $(.$await)? ? { // Pass $buf to the next next iteration of parsing loop return Ok(Err($buf)); } match $reader - .read_bytes_until(b'<', $buf, &mut $self.parser.offset) + .read_bytes_until(b'<', $buf, &mut $self.state.offset) $(.$await)? { // Return Text event with `bytes` content - Ok(Some(bytes)) => $self.parser.emit_text(bytes).map(Ok), + Ok(Some(bytes)) => $self.state.emit_text(bytes).map(Ok), Ok(None) => Ok(Ok(Event::Eof)), Err(e) => Err(e), } @@ -278,43 +278,43 @@ macro_rules! read_until_close { $reader:expr $(, $await:ident)? ) => {{ - $self.parser.state = ParseState::ClosedTag; + $self.state.state = ParseState::ClosedTag; match $reader.peek_one() $(.$await)? { // ` match $reader - .read_bang_element($buf, &mut $self.parser.offset) + .read_bang_element($buf, &mut $self.state.offset) $(.$await)? { Ok(None) => Ok(Event::Eof), - Ok(Some((bang_type, bytes))) => $self.parser.emit_bang(bang_type, bytes), + Ok(Some((bang_type, bytes))) => $self.state.emit_bang(bang_type, bytes), Err(e) => Err(e), }, // ` match $reader - .read_bytes_until(b'>', $buf, &mut $self.parser.offset) + .read_bytes_until(b'>', $buf, &mut $self.state.offset) $(.$await)? { Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => $self.parser.emit_end(bytes), + Ok(Some(bytes)) => $self.state.emit_end(bytes), Err(e) => Err(e), }, // ` match $reader - .read_bytes_until(b'>', $buf, &mut $self.parser.offset) + .read_bytes_until(b'>', $buf, &mut $self.state.offset) $(.$await)? { Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => $self.parser.emit_question_mark(bytes), + Ok(Some(bytes)) => $self.state.emit_question_mark(bytes), Err(e) => Err(e), }, // `<...` - opening or self-closed tag Ok(Some(_)) => match $reader - .read_element($buf, &mut $self.parser.offset) + .read_element($buf, &mut $self.state.offset) $(.$await)? { Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => $self.parser.emit_start(bytes), + Ok(Some(bytes)) => $self.state.emit_start(bytes), Err(e) => Err(e), }, Ok(None) => Ok(Event::Eof), @@ -361,8 +361,8 @@ macro_rules! read_to_end { mod async_tokio; mod buffered_reader; mod ns_reader; -mod parser; mod slice_reader; +mod state; pub use ns_reader::NsReader; @@ -524,7 +524,7 @@ pub struct Reader { /// Source of data for parse reader: R, /// Configuration and current parse state - parser: Parser, + state: ReaderState, } /// Builder methods @@ -533,7 +533,7 @@ impl Reader { pub fn from_reader(reader: R) -> Self { Self { reader, - parser: Parser::default(), + state: ReaderState::default(), } } @@ -615,10 +615,10 @@ impl Reader { pub fn buffer_position(&self) -> usize { // when internal state is OpenedTag, we have actually read until '<', // which we don't want to show - if let ParseState::OpenedTag = self.parser.state { - self.parser.offset - 1 + if let ParseState::OpenedTag = self.state.state { + self.state.offset - 1 } else { - self.parser.offset + self.state.offset } } @@ -633,7 +633,7 @@ impl Reader { /// [`encoding`]: ../index.html#encoding #[inline] pub fn decoder(&self) -> Decoder { - self.parser.decoder() + self.state.decoder() } } diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 3f5c48a8..aff8dcbb 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -27,7 +27,7 @@ impl<'a> Reader<&'a [u8]> { #[cfg(feature = "encoding")] { let mut reader = Self::from_reader(s.as_bytes()); - reader.parser.encoding = EncodingRef::Explicit(UTF_8); + reader.state.encoding = EncodingRef::Explicit(UTF_8); reader } diff --git a/src/reader/parser.rs b/src/reader/state.rs similarity index 97% rename from src/reader/parser.rs rename to src/reader/state.rs index b03bbf03..6108a51b 100644 --- a/src/reader/parser.rs +++ b/src/reader/state.rs @@ -10,12 +10,12 @@ use crate::reader::{is_whitespace, BangType, ParseState}; use memchr; -/// A struct that holds a current parse state and a parser configuration. +/// A struct that holds a current reader state and a parser configuration. /// It is independent on a way of reading data: the reader feed data into it and /// get back produced [`Event`]s. #[derive(Clone)] -pub(super) struct Parser { - /// Number of bytes read from the source of data since the parser was created +pub(super) struct ReaderState { + /// Number of bytes read from the source of data since the reader was created pub offset: usize, /// Defines how to process next byte pub state: ParseState, @@ -58,7 +58,7 @@ pub(super) struct Parser { pub encoding: EncodingRef, } -impl Parser { +impl ReaderState { /// Trims whitespaces from `bytes`, if required, and returns a [`Text`] event. /// /// # Parameters @@ -135,7 +135,7 @@ impl Parser { // XML standard permits whitespaces after the markup name in closing tags. // Let's strip them from the buffer before comparing tag names. let name = if self.trim_markup_names_in_closing_tags { - if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !b.is_ascii_whitespace()) { + if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !is_whitespace(b)) { let (name, _) = buf[1..].split_at(pos_end_name + 1); name } else { @@ -265,7 +265,7 @@ impl Parser { } } -impl Default for Parser { +impl Default for ReaderState { fn default() -> Self { Self { offset: 0, diff --git a/tests/async-tokio.rs b/tests/async-tokio.rs index c378181e..12a21f95 100644 --- a/tests/async-tokio.rs +++ b/tests/async-tokio.rs @@ -1,3 +1,4 @@ +use pretty_assertions::assert_eq; use quick_xml::events::Event::*; use quick_xml::reader::Reader; @@ -7,14 +8,21 @@ async fn test_sample() { let mut reader = Reader::from_reader(src.as_bytes()); let mut buf = Vec::new(); let mut count = 0; + // Expected number of iterations, to prevent infinity loops if refactoring breaks test + let mut reads = 0; loop { + reads += 1; + assert!( + reads <= 5245, + "too many events, possible infinity loop: {reads}" + ); match reader.read_event_into_async(&mut buf).await.unwrap() { Start(_) => count += 1, - Decl(e) => println!("{:?}", e.version()), + Decl(e) => assert_eq!(e.version().unwrap(), b"1.0".as_ref()), Eof => break, _ => (), } buf.clear(); } - println!("{}", count); + assert_eq!((count, reads), (1247, 5245)); } diff --git a/tests/encodings.rs b/tests/encodings.rs index fa721e93..a8b57e13 100644 --- a/tests/encodings.rs +++ b/tests/encodings.rs @@ -31,7 +31,7 @@ fn test_koi8_r_encoding() { let src = include_bytes!("documents/opennews_all.rss").as_ref(); let mut buf = vec![]; let mut r = Reader::from_reader(src); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); loop { match r.read_event_into(&mut buf) { Ok(Event::Text(e)) => { diff --git a/tests/namespaces.rs b/tests/namespaces.rs index 58f8c67d..0f68ad53 100644 --- a/tests/namespaces.rs +++ b/tests/namespaces.rs @@ -134,7 +134,7 @@ fn attributes_empty_ns() { let src = ""; let mut r = NsReader::from_str(src); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); let e = match r.read_resolved_event() { Ok((Unbound, Empty(e))) => e, @@ -215,7 +215,7 @@ fn default_ns_shadowing_empty() { let src = ""; let mut r = NsReader::from_str(src); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); // { diff --git a/tests/test.rs b/tests/test.rs index 55da32fa..b068079f 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -27,7 +27,7 @@ fn test_sample() { fn test_attributes_empty() { let src = ""; let mut r = Reader::from_str(src); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); match r.read_event() { Ok(Empty(e)) => { let mut attrs = e.attributes(); @@ -55,7 +55,7 @@ fn test_attributes_empty() { fn test_attribute_equal() { let src = ""; let mut r = Reader::from_str(src); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); match r.read_event() { Ok(Empty(e)) => { let mut attrs = e.attributes(); @@ -76,7 +76,7 @@ fn test_attribute_equal() { fn test_comment_starting_with_gt() { let src = "-->"; let mut r = Reader::from_str(src); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); loop { match r.read_event() { Ok(Comment(e)) => { diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 502cd502..e0438c9b 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -84,7 +84,7 @@ fn test_start_end_attr() { #[test] fn test_empty() { let mut r = Reader::from_str(""); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); next_eq!(r, Empty, b"a"); } @@ -98,14 +98,14 @@ fn test_empty_can_be_expanded() { #[test] fn test_empty_attr() { let mut r = Reader::from_str(""); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); next_eq!(r, Empty, b"a"); } #[test] fn test_start_end_comment() { let mut r = Reader::from_str(" "); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); next_eq!(r, Start, b"b", Empty, b"a", Empty, b"a", Comment, b"t", End, b"b"); } @@ -165,7 +165,6 @@ fn test_trim_test() { next_eq!(r, Start, b"a", Start, b"b", End, b"b", End, b"a"); let mut r = Reader::from_str(txt); - r.trim_text(false); next_eq!(r, Start, b"a", Start, b"b", Text, b" ", End, b"b", End, b"a"); } @@ -193,7 +192,7 @@ fn test_start_attr() { #[test] fn test_nested() { let mut r = Reader::from_str("test"); - r.trim_text(true).expand_empty_elements(false); + r.trim_text(true); next_eq!(r, Start, b"a", Start, b"b", Text, b"test", End, b"b", Empty, b"c", End, b"a"); } @@ -276,7 +275,6 @@ fn test_write_empty_element_attrs() -> Result<()> { let str_from = r#""#; let expected = r#""#; let mut reader = Reader::from_str(str_from); - reader.expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); loop { match reader.read_event()? { @@ -403,7 +401,7 @@ fn test_new_xml_decl_empty() { #[test] fn test_offset_err_end_element() { let mut r = Reader::from_str(""); - r.trim_text(true).check_end_names(true); + r.trim_text(true); match r.read_event() { Err(_) if r.buffer_position() == 2 => (), // error at char 2: no opening tag @@ -419,7 +417,7 @@ fn test_offset_err_end_element() { #[test] fn test_offset_err_comment() { let mut r = Reader::from_str("