From 31ca532b384b786d1317343574923edbdac48a5a Mon Sep 17 00:00:00 2001 From: Mingun Date: Mon, 17 Jun 2024 01:48:04 +0500 Subject: [PATCH 1/3] Remove unused import in doctest --- src/reader/ns_reader.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index cfeb73de..07220815 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -337,7 +337,6 @@ impl NsReader { /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; - /// use quick_xml::events::attributes::Attribute; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// From 2553b62e2e1f98eb0a1ef7e930826e7d5c7630b1 Mon Sep 17 00:00:00 2001 From: Mingun Date: Mon, 17 Jun 2024 01:13:49 +0500 Subject: [PATCH 2/3] Accept `Decoder` instead of `Reader` when decoding attributes. This will allow decode them even if Reader does not exists, for example, when you only write events. --- Changelog.md | 5 +++++ benches/macrobenches.rs | 8 ++++---- examples/custom_entities.rs | 2 +- examples/read_nodes.rs | 6 +++--- src/events/attributes.rs | 15 ++++++++------- tests/fuzzing.rs | 6 +++--- 6 files changed, 24 insertions(+), 18 deletions(-) diff --git a/Changelog.md b/Changelog.md index e022b44f..92f37f68 100644 --- a/Changelog.md +++ b/Changelog.md @@ -19,6 +19,11 @@ ### Misc Changes +- [#760]: `Attribute::decode_and_unescape_value` and `Attribute::decode_and_unescape_value_with` now + accepts `Decoder` instead of `Reader`. Use `Reader::decoder()` to get it. + +[#760]: https://github.com/tafia/quick-xml/pull/760 + ## 0.33.0 -- 2024-06-21 diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs index a8cbbd53..2b882b12 100644 --- a/benches/macrobenches.rs +++ b/benches/macrobenches.rs @@ -50,7 +50,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> { match criterion::black_box(r.read_event()?) { Event::Start(e) | Event::Empty(e) => { for attr in e.attributes() { - criterion::black_box(attr?.decode_and_unescape_value(&r)?); + criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?); } } Event::Text(e) => { @@ -75,7 +75,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> { match criterion::black_box(r.read_event_into(&mut buf)?) { Event::Start(e) | Event::Empty(e) => { for attr in e.attributes() { - criterion::black_box(attr?.decode_and_unescape_value(&r)?); + criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?); } } Event::Text(e) => { @@ -101,7 +101,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> { (resolved_ns, Event::Start(e) | Event::Empty(e)) => { criterion::black_box(resolved_ns); for attr in e.attributes() { - criterion::black_box(attr?.decode_and_unescape_value(&r)?); + criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?); } } (resolved_ns, Event::Text(e)) => { @@ -129,7 +129,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> { (resolved_ns, Event::Start(e) | Event::Empty(e)) => { criterion::black_box(resolved_ns); for attr in e.attributes() { - criterion::black_box(attr?.decode_and_unescape_value(&r)?); + criterion::black_box(attr?.decode_and_unescape_value(r.decoder())?); } } (resolved_ns, Event::Text(e)) => { diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs index 5be53454..e68a6824 100644 --- a/examples/custom_entities.rs +++ b/examples/custom_entities.rs @@ -47,7 +47,7 @@ fn main() -> Result<(), Box> { .attributes() .map(|a| { a.unwrap() - .decode_and_unescape_value_with(&reader, |ent| { + .decode_and_unescape_value_with(reader.decoder(), |ent| { custom_entities.get(ent).map(|s| s.as_str()) }) .unwrap() diff --git a/examples/read_nodes.rs b/examples/read_nodes.rs index 776f71d5..6c32f486 100644 --- a/examples/read_nodes.rs +++ b/examples/read_nodes.rs @@ -70,8 +70,8 @@ impl Translation { for attr_result in element.attributes() { let a = attr_result?; match a.key.as_ref() { - b"Language" => lang = a.decode_and_unescape_value(reader)?, - b"Tag" => tag = a.decode_and_unescape_value(reader)?, + b"Language" => lang = a.decode_and_unescape_value(reader.decoder())?, + b"Tag" => tag = a.decode_and_unescape_value(reader.decoder())?, _ => (), } } @@ -138,7 +138,7 @@ fn main() -> Result<(), AppError> { Ok::, Infallible>(std::borrow::Cow::from("")) }) .unwrap().to_string(); - let value = a.decode_and_unescape_value(&reader).or_else(|err| { + let value = a.decode_and_unescape_value(reader.decoder()).or_else(|err| { dbg!("unable to read key in DefaultSettings attribute {:?}, utf8 error {:?}", &a, err); Ok::, Infallible>(std::borrow::Cow::from("")) }).unwrap().to_string(); diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 2d1abfde..5f56c045 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -2,11 +2,12 @@ //! //! Provides an iterator over attributes key/value pairs +use crate::encoding::Decoder; use crate::errors::Result as XmlResult; use crate::escape::{escape, resolve_predefined_entity, unescape_with}; use crate::name::QName; -use crate::reader::Reader; use crate::utils::{is_whitespace, write_byte_string, write_cow_string, Bytes}; + use std::fmt::{self, Debug, Display, Formatter}; use std::iter::FusedIterator; use std::{borrow::Cow, ops::Range}; @@ -84,23 +85,23 @@ impl<'a> Attribute<'a> { /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. - pub fn decode_and_unescape_value(&self, reader: &Reader) -> XmlResult> { - self.decode_and_unescape_value_with(reader, resolve_predefined_entity) + pub fn decode_and_unescape_value(&self, decoder: Decoder) -> XmlResult> { + self.decode_and_unescape_value_with(decoder, resolve_predefined_entity) } /// Decodes then unescapes the value with custom entities. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. - pub fn decode_and_unescape_value_with<'entity, B>( + pub fn decode_and_unescape_value_with<'entity>( &self, - reader: &Reader, + decoder: Decoder, resolve_entity: impl FnMut(&str) -> Option<&'entity str>, ) -> XmlResult> { let decoded = match &self.value { - Cow::Borrowed(bytes) => reader.decoder().decode(bytes)?, + Cow::Borrowed(bytes) => decoder.decode(bytes)?, // Convert to owned, because otherwise Cow will be bound with wrong lifetime - Cow::Owned(bytes) => reader.decoder().decode(bytes)?.into_owned().into(), + Cow::Owned(bytes) => decoder.decode(bytes)?.into_owned().into(), }; match unescape_with(&decoded, resolve_entity)? { diff --git a/tests/fuzzing.rs b/tests/fuzzing.rs index 43242537..deca382a 100644 --- a/tests/fuzzing.rs +++ b/tests/fuzzing.rs @@ -30,9 +30,9 @@ fn fuzz_101() { match reader.read_event_into(&mut buf) { Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { for a in e.attributes() { - if a.ok() - .map_or(true, |a| a.decode_and_unescape_value(&reader).is_err()) - { + if a.ok().map_or(true, |a| { + a.decode_and_unescape_value(reader.decoder()).is_err() + }) { break; } } From 55f7aa14dcbbdea9f2ecf147da4d9cf198697a82 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 10 Jul 2022 01:21:23 +0500 Subject: [PATCH 3/3] Consume event when write. Use `Event::borrow()` if you want to keep ownership This change will allow to feed to the writer different structs which, otherwise, would be impossible if are not holds an `Event` inside your struct. In the future the Event may be split into `reader::Event` and `writer::Event` and the reader variant may evolve to be borrow-only. --- Changelog.md | 1 + fuzz/fuzz_targets/fuzz_target_1.rs | 2 +- fuzz/fuzz_targets/structured_roundtrip.rs | 2 +- src/writer.rs | 32 +++++++++++------------ src/writer/async_tokio.rs | 30 ++++++++++----------- tests/roundtrip.rs | 2 +- 6 files changed, 35 insertions(+), 34 deletions(-) diff --git a/Changelog.md b/Changelog.md index 92f37f68..255f2b37 100644 --- a/Changelog.md +++ b/Changelog.md @@ -21,6 +21,7 @@ - [#760]: `Attribute::decode_and_unescape_value` and `Attribute::decode_and_unescape_value_with` now accepts `Decoder` instead of `Reader`. Use `Reader::decoder()` to get it. +- [#760]: `Writer::write_event` now consumes event. Use `Event::borrow()` if you want to keep ownership. [#760]: https://github.com/tafia/quick-xml/pull/760 diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index 9809526f..08d983e9 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -26,7 +26,7 @@ where let _event = black_box(event.borrow()); let _event = black_box(event.as_ref()); debug_format!(event); - debug_format!(writer.write_event(event)); + debug_format!(writer.write_event(event.borrow())); } match event_result { Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { diff --git a/fuzz/fuzz_targets/structured_roundtrip.rs b/fuzz/fuzz_targets/structured_roundtrip.rs index 2eb6962e..6432cb95 100644 --- a/fuzz/fuzz_targets/structured_roundtrip.rs +++ b/fuzz/fuzz_targets/structured_roundtrip.rs @@ -51,7 +51,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> { // TODO: Handle error cases. use WriterFunc::*; match writer_func { - WriteEvent(event) => writer.write_event(event)?, + WriteEvent(event) => writer.write_event(event.borrow())?, WriteBom => writer.write_bom()?, WriteIndent => writer.write_indent()?, CreateElement { diff --git a/src/writer.rs b/src/writer.rs index 7dec0ccf..81b7df34 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -51,7 +51,7 @@ use {crate::de::DeError, serde::Serialize}; /// }, /// Ok(Event::Eof) => break, /// // we can either move or borrow the event to write, depending on your use-case -/// Ok(e) => assert!(writer.write_event(e).is_ok()), +/// Ok(e) => assert!(writer.write_event(e.borrow()).is_ok()), /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), /// } /// } @@ -192,37 +192,37 @@ impl Writer { } /// Writes the given event to the underlying writer. - pub fn write_event<'a, E: AsRef>>(&mut self, event: E) -> Result<()> { + pub fn write_event<'a, E: Into>>(&mut self, event: E) -> Result<()> { let mut next_should_line_break = true; - let result = match *event.as_ref() { - Event::Start(ref e) => { - let result = self.write_wrapped(b"<", e, b">"); + let result = match event.into() { + Event::Start(e) => { + let result = self.write_wrapped(b"<", &e, b">"); if let Some(i) = self.indent.as_mut() { i.grow(); } result } - Event::End(ref e) => { + Event::End(e) => { if let Some(i) = self.indent.as_mut() { i.shrink(); } - self.write_wrapped(b"") + self.write_wrapped(b"") } - Event::Empty(ref e) => self.write_wrapped(b"<", e, b"/>"), - Event::Text(ref e) => { + Event::Empty(e) => self.write_wrapped(b"<", &e, b"/>"), + Event::Text(e) => { next_should_line_break = false; - self.write(e) + self.write(&e) } - Event::Comment(ref e) => self.write_wrapped(b""), - Event::CData(ref e) => { + Event::Comment(e) => self.write_wrapped(b""), + Event::CData(e) => { next_should_line_break = false; self.write(b"") } - Event::Decl(ref e) => self.write_wrapped(b""), - Event::PI(ref e) => self.write_wrapped(b""), - Event::DocType(ref e) => self.write_wrapped(b""), + Event::Decl(e) => self.write_wrapped(b""), + Event::PI(e) => self.write_wrapped(b""), + Event::DocType(e) => self.write_wrapped(b""), Event::Eof => Ok(()), }; if let Some(i) = self.indent.as_mut() { diff --git a/src/writer/async_tokio.rs b/src/writer/async_tokio.rs index dfecbe4a..dab4c5b2 100644 --- a/src/writer/async_tokio.rs +++ b/src/writer/async_tokio.rs @@ -9,37 +9,37 @@ use crate::{ElementWriter, Writer}; impl Writer { /// Writes the given event to the underlying writer. Async version of [`Writer::write_event`]. - pub async fn write_event_async<'a, E: AsRef>>(&mut self, event: E) -> Result<()> { + pub async fn write_event_async<'a, E: Into>>(&mut self, event: E) -> Result<()> { let mut next_should_line_break = true; - let result = match *event.as_ref() { - Event::Start(ref e) => { - let result = self.write_wrapped_async(b"<", e, b">").await; + let result = match event.into() { + Event::Start(e) => { + let result = self.write_wrapped_async(b"<", &e, b">").await; if let Some(i) = self.indent.as_mut() { i.grow(); } result } - Event::End(ref e) => { + Event::End(e) => { if let Some(i) = self.indent.as_mut() { i.shrink(); } - self.write_wrapped_async(b"").await + self.write_wrapped_async(b"").await } - Event::Empty(ref e) => self.write_wrapped_async(b"<", e, b"/>").await, - Event::Text(ref e) => { + Event::Empty(e) => self.write_wrapped_async(b"<", &e, b"/>").await, + Event::Text(e) => { next_should_line_break = false; - self.write_async(e).await + self.write_async(&e).await } - Event::Comment(ref e) => self.write_wrapped_async(b"").await, - Event::CData(ref e) => { + Event::Comment(e) => self.write_wrapped_async(b"").await, + Event::CData(e) => { next_should_line_break = false; self.write_async(b"").await } - Event::Decl(ref e) => self.write_wrapped_async(b"").await, - Event::PI(ref e) => self.write_wrapped_async(b"").await, - Event::DocType(ref e) => self.write_wrapped_async(b"").await, + Event::Decl(e) => self.write_wrapped_async(b"").await, + Event::PI(e) => self.write_wrapped_async(b"").await, + Event::DocType(e) => self.write_wrapped_async(b"").await, Event::Eof => Ok(()), }; if let Some(i) = self.indent.as_mut() { diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs index 0f0ed0f9..68726195 100644 --- a/tests/roundtrip.rs +++ b/tests/roundtrip.rs @@ -180,7 +180,7 @@ fn with_trim_ref() { loop { match reader.read_event().unwrap() { Eof => break, - e => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` + e => assert!(writer.write_event(e.borrow()).is_ok()), // either `e` or `&e` } }