Skip to content

Commit

Permalink
Merge pull request #660 from Mingun/fix-580
Browse files Browse the repository at this point in the history
Fix `UnexpectedEof` when deserialize `xs:list`s and newtypes
  • Loading branch information
Mingun committed Oct 6, 2023
2 parents ae8db96 + 1077d00 commit dd34cab
Show file tree
Hide file tree
Showing 6 changed files with 298 additions and 99 deletions.
6 changes: 6 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition.

### Bug Fixes

- [#660]: Fixed incorrect deserialization of `xs:list`s from empty tags (`<tag/>`
or `<tag></tag>`). Previously an `DeError::UnexpectedEof")` was returned in that case
- [#580]: Fixed incorrect deserialization of vectors of newtypes from sequences of tags.

### Misc Changes

- [#643]: Bumped MSRV to 1.56. In practice the previous MSRV was incorrect in many cases.
Expand All @@ -32,11 +36,13 @@ MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
(and newly added `ElementWriter::write_inner_content_async` of course).

[#545]: https://github.com/tafia/quick-xml/pull/545
[#580]: https://github.com/tafia/quick-xml/issues/580
[#619]: https://github.com/tafia/quick-xml/issues/619
[#635]: https://github.com/tafia/quick-xml/pull/635
[#643]: https://github.com/tafia/quick-xml/pull/643
[#649]: https://github.com/tafia/quick-xml/pull/646
[#651]: https://github.com/tafia/quick-xml/pull/651
[#660]: https://github.com/tafia/quick-xml/pull/660


## 0.30.0 -- 2023-07-23
Expand Down
159 changes: 129 additions & 30 deletions src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,52 @@ macro_rules! forward {
/// A deserializer for a value of map or struct. That deserializer slightly
/// differently processes events for a primitive types and sequences than
/// a [`Deserializer`].
///
/// This deserializer can see two kind of events at the start:
/// - [`DeEvent::Text`]
/// - [`DeEvent::Start`]
///
/// which represents two possible variants of items:
/// ```xml
/// <item>A tag item</item>
/// A text item
/// <yet another="tag item"/>
/// ```
///
/// This deserializer are very similar to a [`SeqItemDeserializer`]. The only difference
/// in the `deserialize_seq` method. This deserializer will act as an iterator
/// over tags / text within it's parent tag, whereas the [`SeqItemDeserializer`]
/// will represent sequences as an `xs:list`.
///
/// This deserializer processes items as following:
/// - primitives (numbers, booleans, strings, characters) are deserialized either
/// from a text content, or unwrapped from a one level of a tag. So, `123` and
/// `<int>123</int>` both can be deserialized into an `u32`;
/// - `Option`:
/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
/// - everything else are deserialized as `Some` using the same deserializer,
/// including `<tag/>` or `<tag></tag>`;
/// - units (`()`) and unit structs consumes the whole text or element subtree;
/// - newtype structs are deserialized by forwarding deserialization of inner type
/// with the same deserializer;
/// - sequences, tuples and tuple structs are deserialized by iterating within the
/// parent tag and deserializing each tag or text content using [`SeqItemDeserializer`];
/// - structs and maps are deserialized using new instance of [`MapAccess`];
/// - enums:
/// - in case of [`DeEvent::Text`] event the text content is deserialized as
/// a `$text` variant. Enum content is deserialized from the text using
/// [`SimpleTypeDeserializer`];
/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
/// an enum tag, and the content inside are deserialized as an enum content.
/// Depending on a variant kind deserialization is performed as:
/// - unit variants: consuming text content or a subtree;
/// - newtype variants: forward deserialization to the inner type using
/// this deserializer;
/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
/// - struct variants: call [`deserialize_struct`] of this deserializer.
///
/// [`deserialize_tuple`]: #method.deserialize_tuple
/// [`deserialize_struct`]: #method.deserialize_struct
struct MapValueDeserializer<'de, 'a, 'm, R, E>
where
R: XmlRead<'de>,
Expand Down Expand Up @@ -485,7 +531,6 @@ where

forward!(deserialize_unit);

forward!(deserialize_map);
forward!(deserialize_struct(
name: &'static str,
fields: &'static [&'static str]
Expand All @@ -497,7 +542,6 @@ where
));

forward!(deserialize_any);
forward!(deserialize_ignored_any);

fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
where
Expand All @@ -506,6 +550,19 @@ where
deserialize_option!(self.map.de, self, visitor)
}

/// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
/// with the same deserializer.
fn deserialize_newtype_struct<V>(
self,
_name: &'static str,
visitor: V,
) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
visitor.visit_newtype_struct(self)
}

/// Deserializes each `<tag>` in
/// ```xml
/// <any-tag>
Expand Down Expand Up @@ -716,7 +773,59 @@ where

////////////////////////////////////////////////////////////////////////////////////////////////////

/// A deserializer for a single item of a sequence.
/// A deserializer for a single item of a mixed sequence of tags and text.
///
/// This deserializer can see two kind of events at the start:
/// - [`DeEvent::Text`]
/// - [`DeEvent::Start`]
///
/// which represents two possible variants of items:
/// ```xml
/// <item>A tag item</item>
/// A text item
/// <yet another="tag item"/>
/// ```
///
/// This deserializer are very similar to a [`MapValueDeserializer`]. The only difference
/// in the `deserialize_seq` method. This deserializer will perform deserialization
/// from the textual content (the text itself in case of [`DeEvent::Text`] event
/// and the text between tags in case of [`DeEvent::Start`] event), whereas
/// the [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
///
/// This deserializer processes items as following:
/// - primitives (numbers, booleans, strings, characters) are deserialized either
/// from a text content, or unwrapped from a one level of a tag. So, `123` and
/// `<int>123</int>` both can be deserialized into an `u32`;
/// - `Option`:
/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
/// - everything else are deserialized as `Some` using the same deserializer,
/// including `<tag/>` or `<tag></tag>`;
/// - units (`()`) and unit structs consumes the whole text or element subtree;
/// - newtype structs are deserialized as tuple structs with one element;
/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
/// (this is the difference):
/// - in case of [`DeEvent::Text`] event text content passed to the deserializer directly;
/// - in case of [`DeEvent::Start`] event the start and end tags are stripped,
/// and text between them is passed to [`SimpleTypeDeserializer`]. If the tag
/// contains something else other than text, an error is returned, but if it
/// contains a text and something else (for example, `<item>text<tag/></item>`),
/// then the trail is just ignored;
/// - structs and maps are deserialized using new [`MapAccess`];
/// - enums:
/// - in case of [`DeEvent::Text`] event the text content is deserialized as
/// a `$text` variant. Enum content is deserialized from the text using
/// [`SimpleTypeDeserializer`];
/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
/// an enum tag, and the content inside are deserialized as an enum content.
/// Depending on a variant kind deserialization is performed as:
/// - unit variants: consuming text content or a subtree;
/// - newtype variants: forward deserialization to the inner type using
/// this deserializer;
/// - tuple variants: deserialize it as an `xs:list`;
/// - struct variants: call [`deserialize_struct`] of this deserializer.
///
/// [`deserialize_tuple`]: #method.deserialize_tuple
/// [`deserialize_struct`]: #method.deserialize_struct
struct SeqItemDeserializer<'de, 'a, 'm, R, E>
where
R: XmlRead<'de>,
Expand Down Expand Up @@ -754,7 +863,6 @@ where

forward!(deserialize_unit);

forward!(deserialize_map);
forward!(deserialize_struct(
name: &'static str,
fields: &'static [&'static str]
Expand All @@ -766,7 +874,6 @@ where
));

forward!(deserialize_any);
forward!(deserialize_ignored_any);

fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
where
Expand All @@ -775,6 +882,20 @@ where
deserialize_option!(self.map.de, self, visitor)
}

/// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
/// with the [`SimpleTypeDeserializer`].
fn deserialize_newtype_struct<V>(
mut self,
_name: &'static str,
visitor: V,
) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
let text = self.read_string()?;
visitor.visit_newtype_struct(SimpleTypeDeserializer::from_text(text))
}

/// This method deserializes a sequence inside of element that itself is a
/// sequence element:
///
Expand All @@ -787,34 +908,12 @@ where
/// ...
/// </>
/// ```
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
match self.map.de.next()? {
DeEvent::Text(e) => {
SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor)
}
// This is a sequence element. We cannot treat it as another flatten
// sequence if type will require `deserialize_seq` We instead forward
// it to `xs:simpleType` implementation
DeEvent::Start(e) => {
let value = match self.map.de.next()? {
DeEvent::Text(e) => {
SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor)
}
e => Err(DeError::Unsupported(
format!("unsupported event {:?}", e).into(),
)),
};
// TODO: May be assert that here we expect only matching closing tag?
self.map.de.read_to_end(e.name())?;
value
}
// SAFETY: we use that deserializer only when Start(element) or Text
// event was peeked already
_ => unreachable!(),
}
let text = self.read_string()?;
SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
}

#[inline]
Expand Down
81 changes: 34 additions & 47 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1904,18 +1904,6 @@ macro_rules! deserialize_primitives {
self.deserialize_unit(visitor)
}

/// Representation of the newtypes the same as one-element [tuple](#method.deserialize_tuple).
fn deserialize_newtype_struct<V>(
self,
_name: &'static str,
visitor: V,
) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_tuple(1, visitor)
}

/// Representation of tuples the same as [sequences](#method.deserialize_seq).
fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
where
Expand All @@ -1937,13 +1925,32 @@ macro_rules! deserialize_primitives {
self.deserialize_tuple(len, visitor)
}

/// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
/// with empty name and fields.
#[inline]
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_struct("", &[], visitor)
}

/// Identifiers represented as [strings](#method.deserialize_str).
fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_str(visitor)
}

/// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
#[inline]
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_unit(visitor)
}
};
}

Expand Down Expand Up @@ -2820,30 +2827,36 @@ where
}
}

fn deserialize_enum<V>(
/// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
/// with the same deserializer.
fn deserialize_newtype_struct<V>(
self,
_name: &'static str,
_variants: &'static [&'static str],
visitor: V,
) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
visitor.visit_enum(var::EnumAccess::new(self))
visitor.visit_newtype_struct(self)
}

fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
fn deserialize_enum<V>(
self,
_name: &'static str,
_variants: &'static [&'static str],
visitor: V,
) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
visitor.visit_seq(self)
visitor.visit_enum(var::EnumAccess::new(self))
}

fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_struct("", &[], visitor)
visitor.visit_seq(self)
}

fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
Expand All @@ -2853,39 +2866,13 @@ where
deserialize_option!(self, self, visitor)
}

/// Always call `visitor.visit_unit()` because returned value ignored in any case.
///
/// This method consumes any single [event][DeEvent] except the [`Start`]
/// event, in which case all events up to and including corresponding [`End`]
/// event will be consumed.
///
/// This method returns error if current event is [`End`] or [`Eof`].
///
/// [`Start`]: DeEvent::Start
/// [`End`]: DeEvent::End
/// [`Eof`]: DeEvent::Eof
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
match self.next()? {
DeEvent::Start(e) => self.read_to_end(e.name())?,
DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
DeEvent::Eof => return Err(DeError::UnexpectedEof),
_ => (),
}
visitor.visit_unit()
}

fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
match self.peek()? {
DeEvent::Start(_) => self.deserialize_map(visitor),
// Redirect to deserialize_unit in order to consume an event and return an appropriate error
DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor),
_ => self.deserialize_string(visitor),
DeEvent::Text(_) => self.deserialize_str(visitor),
_ => self.deserialize_map(visitor),
}
}
}
Expand Down
Loading

0 comments on commit dd34cab

Please sign in to comment.