From a6d486ea5011acb8d8abd04bb0bd2fc0fcca1b82 Mon Sep 17 00:00:00 2001
From: Mingun <Alexander_Sergey@mail.ru>
Date: Sun, 7 Jul 2024 20:40:58 +0500
Subject: [PATCH 1/6] Add tests for `XmlSource::read_text`

---
 src/reader/mod.rs | 70 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 3 deletions(-)
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index a05e5bc5..8e8e1342 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -913,13 +913,14 @@ impl<R> Reader<R> {
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
-/// Result of an attempt to read XML textual data from the reader.
+/// Result of an attempt to read XML textual data from the source.
+#[derive(Debug)]
 enum ReadTextResult<'r, B> {
-    /// Start of markup (`<` character) was found in the first byte.
+    /// Start of markup (`<` character) was found in the first byte. `<` was consumed.
     /// Contains buffer that should be returned back to the next iteration cycle
     /// to satisfy borrow checker requirements.
     Markup(B),
-    /// Contains text block up to start of markup (`<` character).
+    /// Contains text block up to start of markup (`<` character). `<` was consumed.
     UpToMarkup(&'r [u8]),
     /// Contains text block up to EOF, start of markup (`<` character) was not found.
     UpToEof(&'r [u8]),
@@ -1518,6 +1519,69 @@ mod test {
                 }
             }
 
+            mod read_text {
+                use super::*;
+                use crate::reader::ReadTextResult;
+                use crate::utils::Bytes;
+                use pretty_assertions::assert_eq;
+
+                #[$test]
+                $($async)? fn empty() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"".as_ref();
+                    //                ^= 1
+
+                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
+                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
+                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 1);
+                }
+
+                #[$test]
+                $($async)? fn markup() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"<".as_ref();
+                    //                 ^= 2
+
+                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
+                        ReadTextResult::Markup(b) => assert_eq!(b, $buf),
+                        x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 2);
+                }
+
+                #[$test]
+                $($async)? fn up_to_markup() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"a<".as_ref();
+                    //                1 ^= 3
+
+                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
+                        ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
+                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 3);
+                }
+
+                #[$test]
+                $($async)? fn up_to_eof() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"a".as_ref();
+                    //                 ^= 2
+
+                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
+                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
+                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 2);
+                }
+            }
+
             mod read_element {
                 use super::*;
                 use crate::errors::{Error, SyntaxError};

From dfea1106696a3d74412eae712876576f24c3da66 Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Wed, 12 Jun 2024 00:45:52 +0500
Subject: [PATCH 2/6] Introduce a new `Event::GeneralRef` that is emitted on
 each `&...;` construction in a text

failures (16):
  serde-de (9):
    borrow::escaped::element
    borrow::escaped::top_level
    resolve::resolve_custom_entity
    trivial::text::byte_buf
    trivial::text::bytes
    trivial::text::string::field
    trivial::text::string::naked
    trivial::text::string::text
    xml_schema_lists::element::text::string
  serde-migrated (1):
    test_parse_string
  serde-se (5):
    with_root::char_amp
    with_root::char_gt
    with_root::char_lt
    with_root::str_escaped
    with_root::tuple
  --doc (1):
    src\de\resolver.rs - de::resolver::EntityResolver (line 13)
---
 Changelog.md                       |  13 +
 fuzz/fuzz_targets/fuzz_target_1.rs |   5 +
 src/errors.rs                      |   6 +
 src/escape.rs                      |   2 +-
 src/events/mod.rs                  | 157 ++++++++-
 src/reader/async_tokio.rs          |   8 +-
 src/reader/buffered_reader.rs      | 101 +++++-
 src/reader/mod.rs                  | 215 +++++++++++-
 src/reader/slice_reader.rs         |  66 +++-
 src/writer.rs                      |   1 +
 src/writer/async_tokio.rs          |   1 +
 tests/async-tokio.rs               |  15 +-
 tests/documents/html5.txt          |   4 +-
 tests/html.rs                      |  11 +-
 tests/reader-errors.rs             |  25 ++
 tests/reader-references.rs         | 546 +++++++++++++++++++++++++++++
 tests/reader.rs                    |  10 +-
 17 files changed, 1151 insertions(+), 35 deletions(-)
 create mode 100644 tests/reader-references.rs

diff --git a/Changelog.md b/Changelog.md
index e096aab2..7b6efd9f 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -13,12 +13,25 @@
 
 ## Unreleased
 
+### Significant changes
+
+Now references to entities (as predefined, such as `&lt;`, as user-defined) reported as a new
+`Event::GeneralRef`.
+Caller can parse the content of the entity and stream events from it as it is required by the
+XML specification.
+
 ### New Features
 
+- [#766]: Allow to parse resolved entities as XML fragments and stream events from them.
+- [#766]: Added new event `Event::GeneralRef` with content of [general entity].
+
 ### Bug Fixes
 
 ### Misc Changes
 
+[#766]: https://github.com/tafia/quick-xml/pull/766
+[general entity]: https://www.w3.org/TR/xml11/#gen-entity
+
 
 ## 0.37.0 -- 2024-10-27
 
diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs
index 08d983e9..d13e6081 100644
--- a/fuzz/fuzz_targets/fuzz_target_1.rs
+++ b/fuzz/fuzz_targets/fuzz_target_1.rs
@@ -55,6 +55,11 @@ where
                     break;
                 }
             }
+            Ok(Event::GeneralRef(ref e)) => {
+                debug_format!(e);
+                debug_format!(e.is_char_ref());
+                debug_format!(e.resolve_char_ref());
+            }
             Ok(Event::PI(ref e)) => {
                 debug_format!(e);
             }
diff --git a/src/errors.rs b/src/errors.rs
index 5a15a5ad..f7c7a8c6 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -114,6 +114,9 @@ pub enum IllFormedError {
     /// [specification]: https://www.w3.org/TR/xml11/#sec-comments
     /// [configuration]: crate::reader::Config::check_comments
     DoubleHyphenInComment,
+    /// The parser started to parse entity or character reference (`&...;`) in text,
+    /// but the input ended before the closing `;` character was found.
+    UnclosedReference,
 }
 
 impl fmt::Display for IllFormedError {
@@ -144,6 +147,9 @@ impl fmt::Display for IllFormedError {
             Self::DoubleHyphenInComment => {
                 f.write_str("forbidden string `--` was found in a comment")
             }
+            Self::UnclosedReference => f.write_str(
+                "entity or character reference not closed: `;` not found before end of input",
+            ),
         }
     }
 }
diff --git a/src/escape.rs b/src/escape.rs
index 7175ed88..dd0f5f47 100644
--- a/src/escape.rs
+++ b/src/escape.rs
@@ -1820,7 +1820,7 @@ pub const fn resolve_html5_entity(entity: &str) -> Option<&'static str> {
     Some(s)
 }
 
-fn parse_number(num: &str) -> Result<char, ParseCharRefError> {
+pub(crate) fn parse_number(num: &str) -> Result<char, ParseCharRefError> {
     let code = if let Some(hex) = num.strip_prefix('x') {
         from_str_radix(hex, 16)?
     } else {
diff --git a/src/events/mod.rs b/src/events/mod.rs
index 704c4ef6..c274085a 100644
--- a/src/events/mod.rs
+++ b/src/events/mod.rs
@@ -48,7 +48,8 @@ use std::str::from_utf8;
 use crate::encoding::{Decoder, EncodingError};
 use crate::errors::{Error, IllFormedError};
 use crate::escape::{
-    escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
+    escape, minimal_escape, parse_number, partial_escape, resolve_predefined_entity, unescape_with,
+    EscapeError,
 };
 use crate::name::{LocalName, QName};
 #[cfg(feature = "serialize")]
@@ -1291,6 +1292,154 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
+/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
+///
+/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
+/// returns the content of this event between `&` and `;`:
+///
+/// ```
+/// # use quick_xml::events::{BytesRef, Event};
+/// # use quick_xml::reader::Reader;
+/// # use pretty_assertions::assert_eq;
+/// let mut reader = Reader::from_str(r#"&entity;"#);
+/// let content = "entity";
+/// let event = BytesRef::new(content);
+///
+/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
+/// // deref coercion of &BytesRef to &[u8]
+/// assert_eq!(&event as &[u8], content.as_bytes());
+/// // AsRef<[u8]> for &T + deref coercion
+/// assert_eq!(event.as_ref(), content.as_bytes());
+/// ```
+#[derive(Clone, Eq, PartialEq)]
+pub struct BytesRef<'a> {
+    content: Cow<'a, [u8]>,
+    /// Encoding in which the `content` is stored inside the event.
+    decoder: Decoder,
+}
+
+impl<'a> BytesRef<'a> {
+    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
+    #[inline]
+    pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
+        Self {
+            content: Cow::Borrowed(content),
+            decoder,
+        }
+    }
+
+    /// Creates a new `BytesRef` borrowing a slice.
+    ///
+    /// # Warning
+    ///
+    /// `name` must be a valid name.
+    #[inline]
+    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
+        Self {
+            content: str_cow_to_bytes(name),
+            decoder: Decoder::utf8(),
+        }
+    }
+
+    /// Converts the event into an owned event.
+    pub fn into_owned(self) -> BytesRef<'static> {
+        BytesRef {
+            content: Cow::Owned(self.content.into_owned()),
+            decoder: self.decoder,
+        }
+    }
+
+    /// Extracts the inner `Cow` from the `BytesRef` event container.
+    #[inline]
+    pub fn into_inner(self) -> Cow<'a, [u8]> {
+        self.content
+    }
+
+    /// Converts the event into a borrowed event.
+    #[inline]
+    pub fn borrow(&self) -> BytesRef {
+        BytesRef {
+            content: Cow::Borrowed(&self.content),
+            decoder: self.decoder,
+        }
+    }
+
+    /// Decodes the content of the event.
+    ///
+    /// This will allocate if the value contains any escape sequences or in
+    /// non-UTF-8 encoding.
+    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
+        self.decoder.decode_cow(&self.content)
+    }
+
+    /// Returns `true` if the specified reference represents the character reference
+    /// (`&#<number>;`).
+    ///
+    /// ```
+    /// # use quick_xml::events::BytesRef;
+    /// # use pretty_assertions::assert_eq;
+    /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
+    /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
+    /// assert_eq!(BytesRef::new("lt"  ).is_char_ref(), false);
+    /// ```
+    pub fn is_char_ref(&self) -> bool {
+        matches!(self.content.first(), Some(b'#'))
+    }
+
+    /// If this reference represents character reference, then resolves it and
+    /// returns the character, otherwise returns `None`.
+    ///
+    /// This method does not check if character is allowed for XML, in other words,
+    /// well-formedness constraint [WFC: Legal Char] is not enforced.
+    /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
+    ///
+    /// ```
+    /// # use quick_xml::events::BytesRef;
+    /// # use pretty_assertions::assert_eq;
+    /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
+    /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
+    /// assert_eq!(BytesRef::new("lt"  ).resolve_char_ref().unwrap(), None);
+    /// ```
+    ///
+    /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
+    pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
+        if let Some(num) = self.decode()?.strip_prefix('#') {
+            let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
+            return Ok(Some(ch));
+        }
+        Ok(None)
+    }
+}
+
+impl<'a> Debug for BytesRef<'a> {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "BytesRef {{ content: ")?;
+        write_cow_string(f, &self.content)?;
+        write!(f, " }}")
+    }
+}
+
+impl<'a> Deref for BytesRef<'a> {
+    type Target = [u8];
+
+    fn deref(&self) -> &[u8] {
+        &self.content
+    }
+}
+
+#[cfg(feature = "arbitrary")]
+impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
+    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
+        Ok(Self::new(<&str>::arbitrary(u)?))
+    }
+
+    fn size_hint(depth: usize) -> (usize, Option<usize>) {
+        <&str as arbitrary::Arbitrary>::size_hint(depth)
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
 /// Event emitted by [`Reader::read_event_into`].
 ///
 /// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
@@ -1315,6 +1464,9 @@ pub enum Event<'a> {
     PI(BytesPI<'a>),
     /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
     DocType(BytesText<'a>),
+    /// General reference `&entity;` in the textual data. Can be either an entity
+    /// reference, or a character reference.
+    GeneralRef(BytesRef<'a>),
     /// End of XML document.
     Eof,
 }
@@ -1333,6 +1485,7 @@ impl<'a> Event<'a> {
             Event::Decl(e) => Event::Decl(e.into_owned()),
             Event::PI(e) => Event::PI(e.into_owned()),
             Event::DocType(e) => Event::DocType(e.into_owned()),
+            Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
             Event::Eof => Event::Eof,
         }
     }
@@ -1350,6 +1503,7 @@ impl<'a> Event<'a> {
             Event::Decl(e) => Event::Decl(e.borrow()),
             Event::PI(e) => Event::PI(e.borrow()),
             Event::DocType(e) => Event::DocType(e.borrow()),
+            Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
             Event::Eof => Event::Eof,
         }
     }
@@ -1368,6 +1522,7 @@ impl<'a> Deref for Event<'a> {
             Event::CData(ref e) => e,
             Event::Comment(ref e) => e,
             Event::DocType(ref e) => e,
+            Event::GeneralRef(ref e) => e,
             Event::Eof => &[],
         }
     }
diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs
index ac74e232..a9237de0 100644
--- a/src/reader/async_tokio.rs
+++ b/src/reader/async_tokio.rs
@@ -7,12 +7,14 @@ use std::task::{Context, Poll};
 
 use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, ReadBuf};
 
-use crate::errors::{Error, Result, SyntaxError};
-use crate::events::Event;
+use crate::errors::{Error, IllFormedError, Result, SyntaxError};
+use crate::events::{BytesRef, Event};
 use crate::name::{QName, ResolveResult};
 use crate::parser::{ElementParser, Parser, PiParser};
 use crate::reader::buffered_reader::impl_buffered_source;
-use crate::reader::{BangType, BinaryStream, NsReader, ParseState, ReadTextResult, Reader, Span};
+use crate::reader::{
+    BangType, BinaryStream, NsReader, ParseState, ReadRefResult, ReadTextResult, Reader, Span,
+};
 use crate::utils::is_whitespace;
 
 /// A struct for read XML asynchronously from an [`AsyncBufRead`].
diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs
index 0136a55e..f268448c 100644
--- a/src/reader/buffered_reader.rs
+++ b/src/reader/buffered_reader.rs
@@ -9,7 +9,7 @@ use crate::errors::{Error, Result};
 use crate::events::Event;
 use crate::name::QName;
 use crate::parser::Parser;
-use crate::reader::{BangType, ReadTextResult, Reader, Span, XmlSource};
+use crate::reader::{BangType, ReadRefResult, ReadTextResult, Reader, Span, XmlSource};
 use crate::utils::is_whitespace;
 
 macro_rules! impl_buffered_source {
@@ -69,17 +69,22 @@ macro_rules! impl_buffered_source {
                     }
                 };
 
-                match memchr::memchr(b'<', available) {
+                // Search for start of markup or an entity or character reference
+                match memchr::memchr2(b'<', b'&', available) {
                     // Special handling is needed only on the first iteration.
                     // On next iterations we already read something and should emit Text event
-                    Some(0) if read == 0 => {
+                    Some(0) if read == 0 && available[0] == b'<' => {
                         self $(.$reader)? .consume(1);
                         *position += 1;
                         return ReadTextResult::Markup(buf);
                     }
-                    Some(i) => {
+                    // Do not consume `&` because it may be lone and we would be need to
+                    // return it as part of Text event
+                    Some(0) if read == 0 => return ReadTextResult::Ref(buf),
+                    Some(i) if available[i] == b'<' => {
                         buf.extend_from_slice(&available[..i]);
 
+                        // +1 to skip `<`
                         let used = i + 1;
                         self $(.$reader)? .consume(used);
                         read += used as u64;
@@ -87,6 +92,15 @@ macro_rules! impl_buffered_source {
                         *position += read;
                         return ReadTextResult::UpToMarkup(&buf[start..]);
                     }
+                    Some(i) => {
+                        buf.extend_from_slice(&available[..i]);
+
+                        self $(.$reader)? .consume(i);
+                        read += i as u64;
+
+                        *position += read;
+                        return ReadTextResult::UpToRef(&buf[start..]);
+                    }
                     None => {
                         buf.extend_from_slice(available);
 
@@ -101,6 +115,85 @@ macro_rules! impl_buffered_source {
             ReadTextResult::UpToEof(&buf[start..])
         }
 
+        #[inline]
+        $($async)? fn read_ref $(<$lf>)? (
+            &mut self,
+            buf: &'b mut Vec<u8>,
+            position: &mut u64,
+        ) -> ReadRefResult<'b> {
+            let mut read = 0;
+            let start = buf.len();
+            loop {
+                let available = match self $(.$reader)? .fill_buf() $(.$await)? {
+                    Ok(n) if n.is_empty() => break,
+                    Ok(n) => n,
+                    Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
+                    Err(e) => {
+                        *position += read;
+                        return ReadRefResult::Err(e);
+                    }
+                };
+                // `read_ref` called when the first character is `&`, so we
+                // should explicitly skip it at first iteration lest we confuse
+                // it with the end
+                if read == 0 {
+                    debug_assert_eq!(
+                        available.first(),
+                        Some(&b'&'),
+                        "`read_ref` must be called at `&`"
+                    );
+                    // If that ampersand is lone, then it will be part of text
+                    // and we should keep it
+                    buf.push(b'&');
+                    self $(.$reader)? .consume(1);
+                    read += 1;
+                    continue;
+                }
+
+                match memchr::memchr3(b';', b'&', b'<', available) {
+                    // Do not consume `&` because it may be lone and we would be need to
+                    // return it as part of Text event
+                    Some(i) if available[i] == b'&' => {
+                        buf.extend_from_slice(&available[..i]);
+
+                        self $(.$reader)? .consume(i);
+                        read += i as u64;
+
+                        *position += read;
+
+                        return ReadRefResult::UpToRef;
+                    }
+                    Some(i) => {
+                        let is_end = available[i] == b';';
+                        buf.extend_from_slice(&available[..i]);
+
+                        // +1 -- skip the end `;` or `<`
+                        let used = i + 1;
+                        self $(.$reader)? .consume(used);
+                        read += used as u64;
+
+                        *position += read;
+
+                        return if is_end {
+                            ReadRefResult::Ref(&buf[start..])
+                        } else {
+                            ReadRefResult::UpToMarkup
+                        };
+                    }
+                    None => {
+                        buf.extend_from_slice(available);
+
+                        let used = available.len();
+                        self $(.$reader)? .consume(used);
+                        read += used as u64;
+                    }
+                }
+            }
+
+            *position += read;
+            ReadRefResult::UpToEof
+        }
+
         #[inline]
         $($async)? fn read_with<$($lf,)? P: Parser>(
             &mut self,
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 8e8e1342..cf806e3e 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -6,8 +6,8 @@ use std::io;
 use std::ops::Range;
 
 use crate::encoding::Decoder;
-use crate::errors::{Error, SyntaxError};
-use crate::events::Event;
+use crate::errors::{Error, IllFormedError, SyntaxError};
+use crate::events::{BytesRef, Event};
 use crate::parser::{ElementParser, Parser, PiParser};
 use crate::reader::state::ReaderState;
 
@@ -232,7 +232,7 @@ macro_rules! read_event_impl {
     ) => {{
         let event = loop {
             break match $self.state.state {
-                ParseState::Init => { // Go to InsideMarkup state
+                ParseState::Init => { // Go to InsideText state
                     // If encoding set explicitly, we not need to detect it. For example,
                     // explicit UTF-8 set automatically if Reader was created using `from_str`.
                     // But we still need to remove BOM for consistency with no encoding
@@ -251,6 +251,35 @@ macro_rules! read_event_impl {
                     $self.state.state = ParseState::InsideText;
                     continue;
                 },
+                ParseState::InsideRef => { // Go to InsideText
+                    let start = $self.state.offset;
+                    match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
+                        // Emit reference, go to InsideText state
+                        ReadRefResult::Ref(bytes) => {
+                            $self.state.state = ParseState::InsideText;
+                            // +1 to skip start `&`
+                            Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..], $self.decoder())))
+                        }
+                        // Go to Done state
+                        ReadRefResult::UpToEof => {
+                            $self.state.state = ParseState::Done;
+                            $self.state.last_error_offset = start;
+                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
+                        }
+                        // Do not change state, stay in InsideRef
+                        ReadRefResult::UpToRef => {
+                            $self.state.last_error_offset = start;
+                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
+                        }
+                        // Go to InsideMarkup state
+                        ReadRefResult::UpToMarkup => {
+                            $self.state.state = ParseState::InsideMarkup;
+                            $self.state.last_error_offset = start;
+                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
+                        }
+                        ReadRefResult::Err(e) => Err(Error::Io(e.into())),
+                    }
+                }
                 ParseState::InsideText => { // Go to InsideMarkup or Done state
                     if $self.state.config.trim_text_start {
                         $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
@@ -263,6 +292,12 @@ macro_rules! read_event_impl {
                             $buf = buf;
                             continue;
                         }
+                        ReadTextResult::Ref(buf) => {
+                            $self.state.state = ParseState::InsideRef;
+                            // Pass `buf` to the next next iteration of parsing loop
+                            $buf = buf;
+                            continue;
+                        }
                         ReadTextResult::UpToMarkup(bytes) => {
                             $self.state.state = ParseState::InsideMarkup;
                             // FIXME: Can produce an empty event if:
@@ -271,6 +306,11 @@ macro_rules! read_event_impl {
                             // - trim_text_end = true
                             Ok(Event::Text($self.state.emit_text(bytes)))
                         }
+                        ReadTextResult::UpToRef(bytes) => {
+                            $self.state.state = ParseState::InsideRef;
+                            // Return Text event with `bytes` content or Eof if bytes is empty
+                            Ok(Event::Text($self.state.emit_text(bytes)))
+                        }
                         ReadTextResult::UpToEof(bytes) => {
                             $self.state.state = ParseState::Done;
                             // Trim bytes from end if required
@@ -484,6 +524,7 @@ pub type Span = Range<u64>;
 ///     Init         -- "(no event)"\n                                       --> InsideMarkup
 ///     InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText
 ///     InsideText   -- "#lt;false#gt;\n(no event)"\nText                    --> InsideMarkup
+///     InsideRef    -- "(no event)"\nGeneralRef                             --> InsideText
 ///   end
 ///   InsideText     -- "#lt;true#gt;"\nStart --> InsideEmpty
 ///   InsideEmpty    -- End                   --> InsideText
@@ -497,6 +538,11 @@ enum ParseState {
     /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the
     /// first symbol not `<`, otherwise no event are emitted.
     Init,
+    /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other
+    /// events could be generated.
+    ///
+    /// After generating one event the reader moves to the `ClosedTag` state.
+    InsideRef,
     /// State after seeing the `<` symbol. Depending on the next symbol all other
     /// events could be generated.
     ///
@@ -788,7 +834,12 @@ impl<R> Reader<R> {
         &mut self.reader
     }
 
-    /// Gets the current byte position in the input data.
+    /// Gets the byte position in the input data just after the last emitted event
+    /// (i.e. this is position where data of last event ends).
+    ///
+    /// Note, that for text events which is originally ended with whitespace characters
+    /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position
+    /// before trim, not the position of the last byte of the [`Event::Text`] content.
     pub const fn buffer_position(&self) -> u64 {
         // when internal state is InsideMarkup, we have actually read until '<',
         // which we don't want to show
@@ -920,14 +971,43 @@ enum ReadTextResult<'r, B> {
     /// Contains buffer that should be returned back to the next iteration cycle
     /// to satisfy borrow checker requirements.
     Markup(B),
+    /// Start of reference (`&` character) was found in the first byte.
+    /// `&` was not consumed.
+    /// Contains buffer that should be returned back to the next iteration cycle
+    /// to satisfy borrow checker requirements.
+    Ref(B),
     /// Contains text block up to start of markup (`<` character). `<` was consumed.
     UpToMarkup(&'r [u8]),
-    /// Contains text block up to EOF, start of markup (`<` character) was not found.
+    /// Contains text block up to start of reference (`&` character).
+    /// `&` was not consumed.
+    UpToRef(&'r [u8]),
+    /// Contains text block up to EOF, neither start of markup (`<` character)
+    /// or start of reference (`&` character) was found.
     UpToEof(&'r [u8]),
     /// IO error occurred.
     Err(io::Error),
 }
 
+/// Result of an attempt to read general reference from the reader.
+#[derive(Debug)]
+enum ReadRefResult<'r> {
+    /// Contains text block up to end of reference (`;` character).
+    /// Result includes start `&`, but not end `;`.
+    Ref(&'r [u8]),
+    /// Contains text block up to EOF. Neither end of reference (`;`), start of
+    /// another reference (`&`) or start of markup (`<`) characters was found.
+    /// Result includes start `&`.
+    UpToEof,
+    /// Contains text block up to next possible reference (`&` character).
+    /// Result includes start `&`.
+    UpToRef,
+    /// Contains text block up to start of markup (`<` character).
+    /// Result includes start `&`.
+    UpToMarkup,
+    /// IO error occurred.
+    Err(io::Error),
+}
+
 /// Represents an input for a reader that can return borrowed data.
 ///
 /// There are two implementors of this trait: generic one that read data from
@@ -951,7 +1031,8 @@ trait XmlSource<'r, B> {
     #[cfg(feature = "encoding")]
     fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>>;
 
-    /// Read input until start of markup (the `<`) is found or end of input is reached.
+    /// Read input until start of markup (the `<`) is found, start of general entity
+    /// reference (the `&`) is found or end of input is reached.
     ///
     /// # Parameters
     /// - `buf`: Buffer that could be filled from an input (`Self`) and
@@ -961,6 +1042,19 @@ trait XmlSource<'r, B> {
     /// [events]: crate::events::Event
     fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
 
+    /// Read input until end of general reference (the `;`) is found, start of
+    /// another general reference (the `&`) is found or end of input is reached.
+    ///
+    /// This method must be called when current character is `&`.
+    ///
+    /// # Parameters
+    /// - `buf`: Buffer that could be filled from an input (`Self`) and
+    ///   from which [events] could borrow their data
+    /// - `position`: Will be increased by amount of bytes consumed
+    ///
+    /// [events]: crate::events::Event
+    fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
+
     /// Read input until processing instruction is finished.
     ///
     /// This method expect that start sequence of a parser already was read.
@@ -1553,6 +1647,20 @@ mod test {
                     assert_eq!(position, 2);
                 }
 
+                #[$test]
+                $($async)? fn ref_() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"&".as_ref();
+                    //                ^= 1
+
+                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
+                        ReadTextResult::Ref(b) => assert_eq!(b, $buf),
+                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 1);
+                }
+
                 #[$test]
                 $($async)? fn up_to_markup() {
                     let buf = $buf;
@@ -1567,6 +1675,20 @@ mod test {
                     assert_eq!(position, 3);
                 }
 
+                #[$test]
+                $($async)? fn up_to_ref() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"a&".as_ref();
+                    //                 ^= 2
+
+                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
+                        ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
+                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 2);
+                }
+
                 #[$test]
                 $($async)? fn up_to_eof() {
                     let buf = $buf;
@@ -1582,6 +1704,87 @@ mod test {
                 }
             }
 
+            mod read_ref {
+                use super::*;
+                use crate::reader::ReadRefResult;
+                use crate::utils::Bytes;
+                use pretty_assertions::assert_eq;
+
+                // Empty input is not allowed for `read_ref` so not tested.
+                // Borrowed source triggers debug assertion,
+                // buffered do nothing due to implementation details.
+
+                #[$test]
+                $($async)? fn up_to_eof() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"&".as_ref();
+                    //                 ^= 2
+
+                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
+                        ReadRefResult::UpToEof => (),
+                        x => panic!("Expected `UpToEof`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 2);
+                }
+
+                #[$test]
+                $($async)? fn up_to_ref() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"&&".as_ref();
+                    //                 ^= 2
+
+                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
+                        ReadRefResult::UpToRef => (),
+                        x => panic!("Expected `UpToRef`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 2);
+                }
+
+                #[$test]
+                $($async)? fn up_to_markup() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"&<".as_ref();
+                    //                  ^= 3
+
+                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
+                        ReadRefResult::UpToMarkup => (),
+                        x => panic!("Expected `UpToMarkup`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 3);
+                }
+
+                #[$test]
+                $($async)? fn empty_ref() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"&;".as_ref();
+                    //                  ^= 3
+
+                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
+                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
+                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 3);
+                }
+
+                #[$test]
+                $($async)? fn normal() {
+                    let buf = $buf;
+                    let mut position = 1;
+                    let mut input = b"&lt;".as_ref();
+                    //                    ^= 5
+
+                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
+                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&lt")),
+                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
+                    }
+                    assert_eq!(position, 5);
+                }
+            }
+
             mod read_element {
                 use super::*;
                 use crate::errors::{Error, SyntaxError};
diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs
index 08287592..37439597 100644
--- a/src/reader/slice_reader.rs
+++ b/src/reader/slice_reader.rs
@@ -14,7 +14,7 @@ use crate::errors::{Error, Result};
 use crate::events::Event;
 use crate::name::QName;
 use crate::parser::Parser;
-use crate::reader::{BangType, ReadTextResult, Reader, Span, XmlSource};
+use crate::reader::{BangType, ReadRefResult, ReadTextResult, Reader, Span, XmlSource};
 use crate::utils::is_whitespace;
 
 /// This is an implementation for reading from a `&[u8]` as underlying byte stream.
@@ -263,27 +263,79 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
 
     #[inline]
     fn read_text(&mut self, _buf: (), position: &mut u64) -> ReadTextResult<'a, ()> {
-        match memchr::memchr(b'<', self) {
-            Some(0) => {
-                *position += 1;
+        // Search for start of markup or an entity or character reference
+        match memchr::memchr2(b'<', b'&', self) {
+            Some(0) if self[0] == b'<' => {
                 *self = &self[1..];
+                *position += 1;
                 ReadTextResult::Markup(())
             }
-            Some(i) => {
-                *position += i as u64 + 1;
+            // Do not consume `&` because it may be lone and we would be need to
+            // return it as part of Text event
+            Some(0) => ReadTextResult::Ref(()),
+            Some(i) if self[i] == b'<' => {
                 let bytes = &self[..i];
                 *self = &self[i + 1..];
+                *position += i as u64 + 1;
                 ReadTextResult::UpToMarkup(bytes)
             }
+            Some(i) => {
+                let (bytes, rest) = self.split_at(i);
+                *self = rest;
+                *position += i as u64;
+                ReadTextResult::UpToRef(bytes)
+            }
             None => {
-                *position += self.len() as u64;
                 let bytes = &self[..];
                 *self = &[];
+                *position += bytes.len() as u64;
                 ReadTextResult::UpToEof(bytes)
             }
         }
     }
 
+    #[inline]
+    fn read_ref(&mut self, _buf: (), position: &mut u64) -> ReadRefResult<'a> {
+        debug_assert_eq!(
+            self.first(),
+            Some(&b'&'),
+            "`read_ref` must be called at `&`"
+        );
+        // Search for the end of reference or a start of another reference or a markup
+        match memchr::memchr3(b';', b'&', b'<', &self[1..]) {
+            // Do not consume `&` because it may be lone and we would be need to
+            // return it as part of Text event
+            Some(i) if self[i + 1] == b'&' => {
+                let (_, rest) = self.split_at(i + 1);
+                *self = rest;
+                *position += i as u64 + 1;
+
+                ReadRefResult::UpToRef
+            }
+            Some(i) => {
+                let end = i + 1;
+                let is_end = self[end] == b';';
+                let bytes = &self[..end];
+                // +1 -- skip the end `;` or `<`
+                *self = &self[end + 1..];
+                *position += end as u64 + 1;
+
+                if is_end {
+                    ReadRefResult::Ref(bytes)
+                } else {
+                    ReadRefResult::UpToMarkup
+                }
+            }
+            None => {
+                let bytes = &self[..];
+                *self = &[];
+                *position += bytes.len() as u64;
+
+                ReadRefResult::UpToEof
+            }
+        }
+    }
+
     #[inline]
     fn read_with<P>(&mut self, mut parser: P, _buf: (), position: &mut u64) -> Result<&'a [u8]>
     where
diff --git a/src/writer.rs b/src/writer.rs
index 19d120bf..f0a6a97d 100644
--- a/src/writer.rs
+++ b/src/writer.rs
@@ -221,6 +221,7 @@ impl<W: Write> Writer<W> {
             Event::Decl(e) => self.write_wrapped(b"<?", &e, b"?>"),
             Event::PI(e) => self.write_wrapped(b"<?", &e, b"?>"),
             Event::DocType(e) => self.write_wrapped(b"<!DOCTYPE ", &e, b">"),
+            Event::GeneralRef(e) => self.write_wrapped(b"&", &e, b";"),
             Event::Eof => Ok(()),
         };
         if let Some(i) = self.indent.as_mut() {
diff --git a/src/writer/async_tokio.rs b/src/writer/async_tokio.rs
index dab4c5b2..4f1c79a6 100644
--- a/src/writer/async_tokio.rs
+++ b/src/writer/async_tokio.rs
@@ -40,6 +40,7 @@ impl<W: AsyncWrite + Unpin> Writer<W> {
             Event::Decl(e) => self.write_wrapped_async(b"<?", &e, b"?>").await,
             Event::PI(e) => self.write_wrapped_async(b"<?", &e, b"?>").await,
             Event::DocType(e) => self.write_wrapped_async(b"<!DOCTYPE ", &e, b">").await,
+            Event::GeneralRef(e) => self.write_wrapped_async(b"&", &e, b";").await,
             Event::Eof => Ok(()),
         };
         if let Some(i) = self.indent.as_mut() {
diff --git a/tests/async-tokio.rs b/tests/async-tokio.rs
index 25ec86bc..94003c0c 100644
--- a/tests/async-tokio.rs
+++ b/tests/async-tokio.rs
@@ -29,18 +29,19 @@ async fn test_sample() {
     loop {
         reads += 1;
         assert!(
-            reads <= 5245,
+            reads <= 10000,
             "too many events, possible infinity loop: {reads}"
         );
-        match reader.read_event_into_async(&mut buf).await.unwrap() {
-            Start(_) => count += 1,
-            Decl(e) => assert_eq!(e.version().unwrap(), b"1.0".as_ref()),
-            Eof => break,
-            _ => (),
+        match reader.read_event_into_async(&mut buf).await {
+            Ok(Start(_)) => count += 1,
+            Ok(Decl(e)) => assert_eq!(e.version().unwrap(), b"1.0".as_ref()),
+            Ok(Eof) => break,
+            Ok(_) => (),
+            Err(e) => panic!("{} at {}", e, reader.error_position()),
         }
         buf.clear();
     }
-    assert_eq!((count, reads), (1247, 5245));
+    assert_eq!((count, reads), (1247, 5457));
 }
 
 /// This tests checks that read_to_end() correctly returns span even when
diff --git a/tests/documents/html5.txt b/tests/documents/html5.txt
index 05f200d4..de0a5b43 100644
--- a/tests/documents/html5.txt
+++ b/tests/documents/html5.txt
@@ -5,6 +5,8 @@ StartElement(a, attr-error: position 7: attribute value must be enclosed in `"`
 Characters(Hey)
 EndElement(a)
 Characters(
- 
+)
+Reference(nbsp)
+Characters(
 )
 EndDocument
diff --git a/tests/html.rs b/tests/html.rs
index 19688064..b93c788a 100644
--- a/tests/html.rs
+++ b/tests/html.rs
@@ -21,7 +21,12 @@ fn escaped_characters_html() {
         r#"<e attr="&planck;&Egrave;&ell;&#x1D55D;&bigodot;">&boxDR;&boxDL;&#x02554;&#x02557;&#9556;&#9559;</e>"#,
         r#"
             |StartElement(e [attr="ℏÈℓ𝕝⨀"])
-            |Characters(╔╗╔╗╔╗)
+            |Reference(boxDR)
+            |Reference(boxDL)
+            |Reference(#x02554)
+            |Reference(#x02557)
+            |Reference(#9556)
+            |Reference(#9559)
             |EndElement(e)
             |EndDocument
         "#,
@@ -86,6 +91,10 @@ fn test_bytes(input: &[u8], output: &[u8], trim: bool) {
                 Ok(c) => format!("Characters({})", &c),
                 Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err),
             },
+            Ok((_, Event::GeneralRef(e))) => match unescape(&decoder.decode(&e).unwrap()) {
+                Ok(c) => format!("Reference({})", &c),
+                Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err),
+            },
             Ok((_, Event::Eof)) => "EndDocument".to_string(),
             Err(e) => format!("Error: {}", e),
         };
diff --git a/tests/reader-errors.rs b/tests/reader-errors.rs
index 8f9c578e..0eecca7d 100644
--- a/tests/reader-errors.rs
+++ b/tests/reader-errors.rs
@@ -896,4 +896,29 @@ mod ill_formed {
     //                                   ^= 5
     err!(double_hyphen_in_comment4("<!-- -- -->") => 5: IllFormedError::DoubleHyphenInComment);
     //                                   ^= 5
+
+    mod reference {
+        use super::*;
+        use quick_xml::events::BytesRef;
+
+        err2!(unclosed1(".&")        => 1: IllFormedError::UnclosedReference);
+        err2!(unclosed2(".&x")       => 1: IllFormedError::UnclosedReference);
+        err2!(unclosed_num(".&#")    => 1: IllFormedError::UnclosedReference);
+        err2!(unclosed_dec(".")   => 1: IllFormedError::UnclosedReference);
+        err2!(unclosed_hex1(".&#x")  => 1: IllFormedError::UnclosedReference);
+        err2!(unclosed_hex2(".&#xF") => 1: IllFormedError::UnclosedReference);
+
+        // We do not check correctness of references during parsing
+        ok!(empty("&;")   =>      2: Event::GeneralRef(BytesRef::new("")));
+        ok!(normal1("&x;") =>     3: Event::GeneralRef(BytesRef::new("x")));
+        ok!(normal2("&x;rest") => 3: Event::GeneralRef(BytesRef::new("x")));
+        ok!(num("&#;")    =>      3: Event::GeneralRef(BytesRef::new("#")));
+        ok!(dec("")   =>      4: Event::GeneralRef(BytesRef::new("#2")));
+        ok!(hex1("&#x;")  =>      4: Event::GeneralRef(BytesRef::new("#x")));
+        ok!(hex2("&#xF;") =>      5: Event::GeneralRef(BytesRef::new("#xF")));
+
+        // XML specification explicitly allowed any number of leading zeroes
+        ok!(long_dec("&#00000000000000000000000000000000000000032;")  => 44: Event::GeneralRef(BytesRef::new("#00000000000000000000000000000000000000032")));
+        ok!(long_hex("&#x00000000000000000000000000000000000000020;") => 45: Event::GeneralRef(BytesRef::new("#x00000000000000000000000000000000000000020")));
+    }
 }
diff --git a/tests/reader-references.rs b/tests/reader-references.rs
new file mode 100644
index 00000000..b0f3456e
--- /dev/null
+++ b/tests/reader-references.rs
@@ -0,0 +1,546 @@
+use quick_xml::events::{
+    BytesCData, BytesDecl, BytesEnd, BytesPI, BytesRef, BytesStart, BytesText, Event::*,
+};
+use quick_xml::reader::Reader;
+
+use pretty_assertions::assert_eq;
+
+mod character_reference {
+    use super::*;
+
+    mod dec {
+        use super::*;
+        use pretty_assertions::assert_eq;
+
+        #[test]
+        fn decl() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<?xml version=\"&{i};\"?>");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    Decl(BytesDecl::new(&format!("&{i};"), None, None)),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn pi() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<?&{i};?>");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    PI(BytesPI::new(&format!("&{i};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn doctype() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<!DOCTYPE &{i};>");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    DocType(BytesText::from_escaped(&format!("&{i};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn comment() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<!--&{i};-->");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    Comment(BytesText::from_escaped(&format!("&{i};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn cdata() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<![CDATA[&{i};]]>");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    CData(BytesCData::new(format!("&{i};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn text() {
+            for i in 0..=0x10FFFF {
+                let input = format!("&{i};");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    GeneralRef(BytesRef::new(format!("{i}"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn empty() {
+            for i in 0u32..=0x10FFFF {
+                let input = format!("<&{i}; &{i};='&{i};' &{i};=\"&{i};\" &{i};=&{i};/>");
+                let mut reader = Reader::from_str(&input);
+
+                let name_len = format!("&{i};").len();
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    Empty(BytesStart::from_content(
+                        format!("&{i}; &{i};='&{i};' &{i};=\"&{i};\" &{i};=&{i};"),
+                        name_len
+                    )),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn start() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<&{i}; &{i};='&{i};' &{i};=\"&{i};\" &{i};=&{i};>");
+                let mut reader = Reader::from_str(&input);
+
+                let name_len = format!("&{i};").len();
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    Start(BytesStart::from_content(
+                        format!("&{i}; &{i};='&{i};' &{i};=\"&{i};\" &{i};=&{i};"),
+                        name_len
+                    )),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn end() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<></&{i};>");
+                let mut reader = Reader::from_str(&input);
+                reader.config_mut().check_end_names = false;
+
+                // Skip <>
+                reader.read_event().unwrap();
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    End(BytesEnd::new(format!("&{i};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+    }
+
+    mod hex {
+        use super::*;
+        use pretty_assertions::assert_eq;
+
+        #[test]
+        fn decl() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<?xml version=\"&#{i:x};\"?>");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    Decl(BytesDecl::new(&format!("&#{i:x};"), None, None)),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn pi() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<?&#{i:x};?>");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    PI(BytesPI::new(&format!("&#{i:x};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn doctype() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<!DOCTYPE &#{i:x};>");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    DocType(BytesText::from_escaped(&format!("&#{i:x};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn comment() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<!--&#{i:x};-->");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    Comment(BytesText::from_escaped(&format!("&#{i:x};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn cdata() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<![CDATA[&#{i:x};]]>");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    CData(BytesCData::new(format!("&#{i:x};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn text() {
+            for i in 0..=0x10FFFF {
+                let input = format!("&#{i:x};");
+                let mut reader = Reader::from_str(&input);
+
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    GeneralRef(BytesRef::new(format!("#{i:x}"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn empty() {
+            for i in 0u32..=0x10FFFF {
+                let input = format!(
+                    "<&#{i:x}; &#{i:x};='&#{i:x};' &#{i:x};=\"&#{i:x};\" &#{i:x};=&#{i:x};/>"
+                );
+                let mut reader = Reader::from_str(&input);
+
+                let name_len = format!("&#{i:x};").len();
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    Empty(BytesStart::from_content(
+                        format!(
+                            "&#{i:x}; &#{i:x};='&#{i:x};' &#{i:x};=\"&#{i:x};\" &#{i:x};=&#{i:x};"
+                        ),
+                        name_len
+                    )),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn start() {
+            for i in 0..=0x10FFFF {
+                let input = format!(
+                    "<&#{i:x}; &#{i:x};='&#{i:x};' &#{i:x};=\"&#{i:x};\" &#{i:x};=&#{i:x};>"
+                );
+                let mut reader = Reader::from_str(&input);
+
+                let name_len = format!("&#{i:x};").len();
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    Start(BytesStart::from_content(
+                        format!(
+                            "&#{i:x}; &#{i:x};='&#{i:x};' &#{i:x};=\"&#{i:x};\" &#{i:x};=&#{i:x};"
+                        ),
+                        name_len
+                    )),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+
+        #[test]
+        fn end() {
+            for i in 0..=0x10FFFF {
+                let input = format!("<></&#{i:x};>");
+                let mut reader = Reader::from_str(&input);
+                reader.config_mut().check_end_names = false;
+
+                // Skip <>
+                reader.read_event().unwrap();
+                assert_eq!(
+                    reader.read_event().unwrap(),
+                    End(BytesEnd::new(format!("&#{i:x};"))),
+                    "Character reference {i}=0x{i:x}: {input}"
+                );
+            }
+        }
+    }
+}
+
+mod general_entity_reference {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn decl() {
+        let mut reader = Reader::from_str("<?xml version=\"&entity;\"?>");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Decl(BytesDecl::new("&entity;", None, None)),
+        );
+    }
+
+    #[test]
+    fn pi() {
+        let mut reader = Reader::from_str("<?&entity;?>");
+
+        assert_eq!(reader.read_event().unwrap(), PI(BytesPI::new("&entity;")));
+    }
+
+    #[test]
+    fn doctype() {
+        let mut reader = Reader::from_str("<!DOCTYPE &entity;>");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            DocType(BytesText::from_escaped("&entity;")),
+        );
+    }
+
+    #[test]
+    fn comment() {
+        let mut reader = Reader::from_str("<!--&entity;-->");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Comment(BytesText::from_escaped("&entity;")),
+        );
+    }
+
+    #[test]
+    fn cdata() {
+        let mut reader = Reader::from_str("<![CDATA[&entity;]]>");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            CData(BytesCData::new("&entity;")),
+        );
+    }
+
+    #[test]
+    fn text() {
+        let mut reader = Reader::from_str("&entity;");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            GeneralRef(BytesRef::new("entity")),
+        );
+    }
+
+    #[test]
+    fn empty() {
+        let mut reader = Reader::from_str(
+            "<&entity; &entity;='&entity;' &entity;=\"&entity;\" &entity;=&entity;/>",
+        );
+
+        let name_len = "&entity;".len();
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Empty(BytesStart::from_content(
+                "&entity; &entity;='&entity;' &entity;=\"&entity;\" &entity;=&entity;",
+                name_len
+            )),
+        );
+    }
+
+    #[test]
+    fn start() {
+        let mut reader = Reader::from_str(
+            "<&entity; &entity;='&entity;' &entity;=\"&entity;\" &entity;=&entity;>",
+        );
+
+        let name_len = "&entity;".len();
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Start(BytesStart::from_content(
+                "&entity; &entity;='&entity;' &entity;=\"&entity;\" &entity;=&entity;",
+                name_len
+            )),
+        );
+    }
+
+    #[test]
+    fn end() {
+        let mut reader = Reader::from_str("<></&entity;>");
+        reader.config_mut().check_end_names = false;
+
+        // Skip <>
+        reader.read_event().unwrap();
+        assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("&entity;")));
+    }
+}
+
+/// _Parameter entity references_ are references to entities recognized within DTD.
+/// That references recognized [only] inside DTD (`<!DOCTYPE>` declaration) and have a
+/// form `%name;` (percent sign, name, semicolon).
+///
+/// Parameter entities are so-called _parsed entities_, i.e. the content of this
+/// reference is a part of DTD and MUST follow DTD grammar after all substitutions.
+/// That also means that DTD could be self-modified.
+///
+/// In those tests, however, parameter entity references are not recognized.
+///
+/// [only]: https://www.w3.org/TR/xml11/#indtd
+mod parameter_entity_reference {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn decl() {
+        let mut reader = Reader::from_str("<?xml version=\"%param;\"?>");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Decl(BytesDecl::new("%param;", None, None)),
+        );
+    }
+
+    #[test]
+    fn pi() {
+        let mut reader = Reader::from_str("<?%param;?>");
+
+        assert_eq!(reader.read_event().unwrap(), PI(BytesPI::new("%param;")));
+    }
+
+    /// Because we do not parse DTD, we do not recognize parameter reference here yet.
+    /// TODO: Recognize parameter entity references when DTD parsing will be implemented
+    #[test]
+    fn doctype() {
+        let mut reader = Reader::from_str("<!DOCTYPE %param;>");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            DocType(BytesText::from_escaped("%param;")),
+        );
+    }
+
+    /// Comments can be part of DTD, but parameter entity references does not recognized within them.
+    ///
+    /// See: <https://www.w3.org/TR/xml11/#sec-comments>
+    #[test]
+    fn comment() {
+        let mut reader = Reader::from_str("<!--%param;-->");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Comment(BytesText::from_escaped("%param;")),
+        );
+    }
+
+    #[test]
+    fn cdata() {
+        let mut reader = Reader::from_str("<![CDATA[%param;]]>");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            CData(BytesCData::new("%param;")),
+        );
+    }
+
+    #[test]
+    fn text() {
+        let mut reader = Reader::from_str("%param;");
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Text(BytesText::from_escaped("%param;")),
+        );
+    }
+
+    #[test]
+    fn empty() {
+        let mut reader =
+            Reader::from_str("<%param; %param;='%param;' %param;=\"%param;\" %param;=%param;/>");
+
+        let name_len = "%param;".len();
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Empty(BytesStart::from_content(
+                "%param; %param;='%param;' %param;=\"%param;\" %param;=%param;",
+                name_len
+            )),
+        );
+    }
+
+    #[test]
+    fn start() {
+        let mut reader =
+            Reader::from_str("<%param; %param;='%param;' %param;=\"%param;\" %param;=%param;>");
+
+        let name_len = "%param;".len();
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Start(BytesStart::from_content(
+                "%param; %param;='%param;' %param;=\"%param;\" %param;=%param;",
+                name_len
+            )),
+        );
+    }
+
+    #[test]
+    fn end() {
+        let mut reader = Reader::from_str("<></%param;>");
+        reader.config_mut().check_end_names = false;
+
+        // Skip <>
+        reader.read_event().unwrap();
+        assert_eq!(reader.read_event().unwrap(), End(BytesEnd::new("%param;")));
+    }
+}
+
+#[test]
+fn mixed_text() {
+    let input = "text with &lt;&amp;'&#32;' or '&#x20;'";
+    let mut r = Reader::from_str(input);
+
+    assert_eq!(
+        r.read_event().unwrap(),
+        Text(BytesText::from_escaped("text with "))
+    );
+    assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("lt")));
+    assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("amp")));
+    assert_eq!(r.read_event().unwrap(), Text(BytesText::from_escaped("'")));
+    assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("#32")));
+    assert_eq!(
+        r.read_event().unwrap(),
+        Text(BytesText::from_escaped("' or '"))
+    );
+    assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("#x20")));
+    assert_eq!(r.read_event().unwrap(), Text(BytesText::from_escaped("'")));
+    assert_eq!(r.read_event().unwrap(), Eof);
+}
diff --git a/tests/reader.rs b/tests/reader.rs
index 2bc27e57..e05166ec 100644
--- a/tests/reader.rs
+++ b/tests/reader.rs
@@ -1,6 +1,6 @@
 use std::str::from_utf8;
 
-use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event::*};
+use quick_xml::events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event::*};
 use quick_xml::name::QName;
 use quick_xml::reader::Reader;
 
@@ -163,16 +163,17 @@ fn test_escaped_content() {
     let mut r = Reader::from_str("<a>&lt;test&gt;</a>");
 
     assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("a")));
+    assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("lt")));
     match r.read_event() {
         Ok(Text(e)) => {
             assert_eq!(
                 &*e,
-                b"&lt;test&gt;",
-                "content unexpected: expecting '&lt;test&gt;', got '{:?}'",
+                b"test",
+                "content unexpected: expecting 'test', got '{:?}'",
                 from_utf8(&e)
             );
             match e.unescape() {
-                Ok(c) => assert_eq!(c, "<test>"),
+                Ok(c) => assert_eq!(c, "test"),
                 Err(e) => panic!(
                     "cannot escape content at position {}: {:?}",
                     r.error_position(),
@@ -187,6 +188,7 @@ fn test_escaped_content() {
             e
         ),
     }
+    assert_eq!(r.read_event().unwrap(), GeneralRef(BytesRef::new("gt")));
     assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("a")));
 }
 

From 08ec03a28dc8a6c9a188f968a007d1743068494a Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Fri, 21 Jun 2024 15:46:07 +0500
Subject: [PATCH 3/6] Update `custom_entities` example to show how to process
 events from expanded entities

---
 Changelog.md                |   2 +-
 examples/custom_entities.rs | 243 +++++++++++++++++++++++++++---------
 2 files changed, 188 insertions(+), 57 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index 7b6efd9f..160ef7b6 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -18,7 +18,7 @@
 Now references to entities (as predefined, such as `&lt;`, as user-defined) reported as a new
 `Event::GeneralRef`.
 Caller can parse the content of the entity and stream events from it as it is required by the
-XML specification.
+XML specification. See the updated `custom_entities` example!
 
 ### New Features
 
diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs
index 37d172ac..be9d11ea 100644
--- a/examples/custom_entities.rs
+++ b/examples/custom_entities.rs
@@ -1,5 +1,7 @@
-//! This example demonstrate how custom entities can be extracted from the DOCTYPE!,
-//! and later use to decode text and attribute values.
+//! This example demonstrate how custom entities can be extracted from the DOCTYPE,
+//! and later use to:
+//! - insert new pieces of document (particular case - insert only textual content)
+//! - decode attribute values
 //!
 //! NB: this example is deliberately kept simple:
 //! * it assumes that the XML file is UTF-8 encoded (custom_entities must only contain UTF-8 data)
@@ -7,70 +9,199 @@
 //! * the regex in this example is simple but brittle;
 //! * it does not support the use of entities in entity declaration.
 
-use std::collections::HashMap;
+use std::borrow::Cow;
+use std::collections::{HashMap, VecDeque};
+use std::str::from_utf8;
 
-use quick_xml::escape::resolve_predefined_entity;
-use quick_xml::events::Event;
+use quick_xml::encoding::Decoder;
+use quick_xml::errors::Error;
+use quick_xml::escape::EscapeError;
+use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
+use quick_xml::name::QName;
 use quick_xml::reader::Reader;
 use regex::bytes::Regex;
 
-const DATA: &str = r#"
+use pretty_assertions::assert_eq;
 
-    <?xml version="1.0"?>
-    <!DOCTYPE test [
-    <!ENTITY msg "hello world" >
-    ]>
-    <test label="&msg;">&msg;</test>
+struct MyReader<'i> {
+    /// Stack of readers, the first element is the initial reader, the other are
+    /// readers created for each resolved entity
+    readers: VecDeque<Reader<&'i [u8]>>,
+    /// Map of captured internal _parsed general entities_. _Parsed_ means that
+    /// value of the entity is parsed by XML reader
+    entities: HashMap<&'i [u8], &'i [u8]>,
+    /// In this example we use simple regular expression to capture entities from DTD.
+    /// In real application you should use DTD parser.
+    entity_re: Regex,
+}
+impl<'i> MyReader<'i> {
+    fn new(input: &'i str) -> Result<Self, regex::Error> {
+        let mut reader = Reader::from_str(input);
+        reader.config_mut().trim_text(true);
 
-"#;
+        let mut readers = VecDeque::new();
+        readers.push_back(reader);
 
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let mut reader = Reader::from_str(DATA);
-    reader.config_mut().trim_text(true);
-
-    let mut custom_entities: HashMap<String, String> = HashMap::new();
-    let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
-
-    loop {
-        match reader.read_event() {
-            Ok(Event::DocType(ref e)) => {
-                for cap in entity_re.captures_iter(e) {
-                    custom_entities.insert(
-                        reader.decoder().decode(&cap[1])?.into_owned(),
-                        reader.decoder().decode(&cap[2])?.into_owned(),
-                    );
-                }
-            }
-            Ok(Event::Start(ref e)) => {
-                if let b"test" = e.name().as_ref() {
-                    let attributes = e
-                        .attributes()
-                        .map(|a| {
-                            a.unwrap()
-                                .decode_and_unescape_value_with(reader.decoder(), |ent| {
-                                    custom_entities.get(ent).map(|s| s.as_str())
-                                })
-                                .unwrap()
-                                .into_owned()
-                        })
-                        .collect::<Vec<_>>();
-                    println!("attributes values: {:?}", attributes);
+        // Capture "name" and "content" from such string:
+        // <!ENTITY name "content" >
+        let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
+        Ok(Self {
+            readers,
+            entities: HashMap::new(),
+            entity_re,
+        })
+    }
+    fn read_event(&mut self) -> Result<Event<'i>, Error> {
+        loop {
+            if let Some(mut reader) = self.readers.pop_back() {
+                match dbg!(reader.read_event())? {
+                    // Capture defined entities from the DTD inside document and skip that event
+                    Event::DocType(e) => {
+                        self.readers.push_back(reader);
+                        self.capture(e);
+                        continue;
+                    }
+                    // When entity is referenced, create new reader with the same settings as
+                    // the current reader have and push it to the top of stack. Then try to
+                    // read next event from it (on next iteration)
+                    Event::GeneralRef(e) => {
+                        if let Some(ch) = e.resolve_char_ref()? {
+                            self.readers.push_back(reader);
+                            return Ok(Event::Text(BytesText::from_escaped(ch.to_string())));
+                        }
+                        let mut r = Reader::from_reader(self.resolve(&e)?);
+                        *r.config_mut() = reader.config().clone();
+
+                        self.readers.push_back(reader);
+                        self.readers.push_back(r);
+                        continue;
+                    }
+                    // When reader is exhausted, do not return it to the stack
+                    Event::Eof => continue,
+
+                    // Return all other events to caller
+                    e => {
+                        self.readers.push_back(reader);
+                        return Ok(e);
+                    }
                 }
             }
-            Ok(Event::Text(ref e)) => {
-                println!(
-                    "text value: {}",
-                    e.unescape_with(|ent| match custom_entities.get(ent) {
-                        Some(s) => Some(s.as_str()),
-                        None => resolve_predefined_entity(ent),
-                    })
-                    .unwrap()
-                );
-            }
-            Ok(Event::Eof) => break,
-            Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
-            _ => (),
+            return Ok(Event::Eof);
         }
     }
+
+    /// In this example we use simple regular expression to capture entities from DTD.
+    /// In real application you should use DTD parser
+    fn capture(&mut self, doctype: BytesText<'i>) {
+        let doctype = match doctype.into_inner() {
+            Cow::Borrowed(doctype) => doctype,
+            Cow::Owned(_) => unreachable!("We are sure that event will be borrowed"),
+        };
+        for cap in self.entity_re.captures_iter(doctype) {
+            self.entities.insert(
+                cap.get(1).unwrap().as_bytes(),
+                cap.get(2).unwrap().as_bytes(),
+            );
+        }
+    }
+
+    fn resolve(&self, entity: &[u8]) -> Result<&'i [u8], EscapeError> {
+        match self.entities.get(entity) {
+            Some(replacement) => Ok(replacement),
+            None => Err(EscapeError::UnrecognizedEntity(
+                0..0,
+                String::from_utf8_lossy(entity).into_owned(),
+            )),
+        }
+    }
+
+    fn get_entity(&self, entity: &str) -> Option<&'i str> {
+        self.entities
+            .get(entity.as_bytes())
+            // SAFETY: We are sure that slices are correct UTF-8 because we get
+            // them from rust string
+            .map(|value| from_utf8(value).unwrap())
+    }
+
+    fn decoder(&self) -> Decoder {
+        self.readers.back().unwrap().decoder()
+    }
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let mut reader = MyReader::new(
+        r#"
+        <!DOCTYPE test [
+        <!ENTITY text "hello world" >
+        <!ENTITY element1 "<dtd attr = 'Message: &text;'/>" >
+        <!ENTITY element2 "<a>&element1;</a>" >
+        ]>
+        <test label="Message: &text;">&#39;&element2;&#x27;</test>
+        "#,
+    )?;
+
+    let event = reader.read_event()?;
+    assert_eq!(
+        event,
+        Event::Start(BytesStart::from_content(
+            r#"test label="Message: &text;""#,
+            4
+        ))
+    );
+    if let Event::Start(e) = event {
+        let mut attrs = e.attributes();
+
+        let label = attrs.next().unwrap()?;
+        assert_eq!(label.key, QName(b"label"));
+        assert_eq!(
+            label.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?,
+            "Message: hello world"
+        );
+
+        assert_eq!(attrs.next(), None);
+    }
+
+    // This is decoded decimal character reference &#39;
+    assert_eq!(
+        reader.read_event()?,
+        Event::Text(BytesText::from_escaped("'"))
+    );
+
+    //--------------------------------------------------------------------------
+    // This part was inserted into original document from entity defined in DTD
+
+    assert_eq!(reader.read_event()?, Event::Start(BytesStart::new("a")));
+    let event = reader.read_event()?;
+    assert_eq!(
+        event,
+        Event::Empty(BytesStart::from_content(
+            r#"dtd attr = 'Message: &text;'"#,
+            3
+        ))
+    );
+    if let Event::Start(e) = event {
+        let mut attrs = e.attributes();
+
+        let attr = attrs.next().unwrap()?;
+        assert_eq!(attr.key, QName(b"attr"));
+        assert_eq!(
+            attr.decode_and_unescape_value_with(reader.decoder(), |ent| reader.get_entity(ent))?,
+            "Message: hello world"
+        );
+
+        assert_eq!(attrs.next(), None);
+    }
+    assert_eq!(reader.read_event()?, Event::End(BytesEnd::new("a")));
+    //--------------------------------------------------------------------------
+
+    // This is decoded hexadecimal character reference &#x27;
+    assert_eq!(
+        reader.read_event()?,
+        Event::Text(BytesText::from_escaped("'"))
+    );
+
+    assert_eq!(reader.read_event()?, Event::End(BytesEnd::new("test")));
+    assert_eq!(reader.read_event()?, Event::Eof);
+
     Ok(())
 }

From 094a88e7a25171425186be3b3590b512c2cb2efc Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Wed, 12 Jun 2024 01:10:51 +0500
Subject: [PATCH 4/6] Replace `BytesText::unescape` and `unescape_with` by
 `decode`

Text events produces by the Reader can not contain escaped data anymore,
all such data is represented by the Event::GeneralRef
---
 Changelog.md                       |  3 +++
 benches/macrobenches.rs            |  8 ++++----
 benches/microbenches.rs            |  2 +-
 fuzz/fuzz_targets/fuzz_target_1.rs |  2 +-
 src/de/mod.rs                      |  6 ++----
 src/events/mod.rs                  | 28 ++++------------------------
 src/reader/async_tokio.rs          |  6 +++---
 src/reader/buffered_reader.rs      |  2 +-
 src/reader/mod.rs                  |  2 +-
 src/reader/ns_reader.rs            |  8 ++++----
 src/reader/slice_reader.rs         |  2 +-
 tests/encodings.rs                 |  2 +-
 tests/fuzzing.rs                   |  2 +-
 tests/reader.rs                    |  2 +-
 tests/roundtrip.rs                 |  2 +-
 15 files changed, 29 insertions(+), 48 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index 160ef7b6..ca0c854d 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -29,6 +29,9 @@ XML specification. See the updated `custom_entities` example!
 
 ### Misc Changes
 
+- [#766]: `BytesText::unescape` and `BytesText::unescape_with` replaced by `BytesText::decode`.
+  Now Text events does not contain escaped parts which are reported as `Event::GeneralRef`.
+
 [#766]: https://github.com/tafia/quick-xml/pull/766
 [general entity]: https://www.w3.org/TR/xml11/#gen-entity
 
diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs
index 2b882b12..a89c34e4 100644
--- a/benches/macrobenches.rs
+++ b/benches/macrobenches.rs
@@ -54,7 +54,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
                 }
             }
             Event::Text(e) => {
-                criterion::black_box(e.unescape()?);
+                criterion::black_box(e.decode()?);
             }
             Event::CData(e) => {
                 criterion::black_box(e.into_inner());
@@ -79,7 +79,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
                 }
             }
             Event::Text(e) => {
-                criterion::black_box(e.unescape()?);
+                criterion::black_box(e.decode()?);
             }
             Event::CData(e) => {
                 criterion::black_box(e.into_inner());
@@ -105,7 +105,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
                 }
             }
             (resolved_ns, Event::Text(e)) => {
-                criterion::black_box(e.unescape()?);
+                criterion::black_box(e.decode()?);
                 criterion::black_box(resolved_ns);
             }
             (resolved_ns, Event::CData(e)) => {
@@ -133,7 +133,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
                 }
             }
             (resolved_ns, Event::Text(e)) => {
-                criterion::black_box(e.unescape()?);
+                criterion::black_box(e.decode()?);
                 criterion::black_box(resolved_ns);
             }
             (resolved_ns, Event::CData(e)) => {
diff --git a/benches/microbenches.rs b/benches/microbenches.rs
index 2f4ece04..498ad7a2 100644
--- a/benches/microbenches.rs
+++ b/benches/microbenches.rs
@@ -145,7 +145,7 @@ fn one_event(c: &mut Criterion) {
             config.trim_text(true);
             config.check_end_names = false;
             match r.read_event() {
-                Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
+                Ok(Event::Comment(e)) => nbtxt += e.decode().unwrap().len(),
                 something_else => panic!("Did not expect {:?}", something_else),
             };
 
diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs
index d13e6081..dbadfe2f 100644
--- a/fuzz/fuzz_targets/fuzz_target_1.rs
+++ b/fuzz/fuzz_targets/fuzz_target_1.rs
@@ -43,7 +43,7 @@ where
             | Ok(Event::Comment(ref e))
             | Ok(Event::DocType(ref e)) => {
                 debug_format!(e);
-                if let Err(err) = e.unescape() {
+                if let Err(err) = e.decode() {
                     debug_format!(err);
                     break;
                 }
diff --git a/src/de/mod.rs b/src/de/mod.rs
index 484c31b0..a5bacfae 100644
--- a/src/de/mod.rs
+++ b/src/de/mod.rs
@@ -2223,9 +2223,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
                         // FIXME: Actually, we should trim after decoding text, but now we trim before
                         e.inplace_trim_end();
                     }
-                    result
-                        .to_mut()
-                        .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
+                    result.to_mut().push_str(&e.decode()?);
                 }
                 PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
 
@@ -2247,7 +2245,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
                         // FIXME: Actually, we should trim after decoding text, but now we trim before
                         continue;
                     }
-                    self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
+                    self.drain_text(e.decode()?)
                 }
                 PayloadEvent::CData(e) => self.drain_text(e.decode()?),
                 PayloadEvent::DocType(e) => {
diff --git a/src/events/mod.rs b/src/events/mod.rs
index c274085a..e8b46f15 100644
--- a/src/events/mod.rs
+++ b/src/events/mod.rs
@@ -47,10 +47,7 @@ use std::str::from_utf8;
 
 use crate::encoding::{Decoder, EncodingError};
 use crate::errors::{Error, IllFormedError};
-use crate::escape::{
-    escape, minimal_escape, parse_number, partial_escape, resolve_predefined_entity, unescape_with,
-    EscapeError,
-};
+use crate::escape::{escape, minimal_escape, parse_number, partial_escape, EscapeError};
 use crate::name::{LocalName, QName};
 #[cfg(feature = "serialize")]
 use crate::utils::CowRef;
@@ -580,29 +577,12 @@ impl<'a> BytesText<'a> {
         }
     }
 
-    /// Decodes then unescapes the content of the event.
-    ///
-    /// This will allocate if the value contains any escape sequences or in
-    /// non-UTF-8 encoding.
-    pub fn unescape(&self) -> Result<Cow<'a, str>, Error> {
-        self.unescape_with(resolve_predefined_entity)
-    }
-
-    /// Decodes then unescapes the content of the event with custom entities.
+    /// Decodes the content of the event.
     ///
     /// This will allocate if the value contains any escape sequences or in
     /// non-UTF-8 encoding.
-    pub fn unescape_with<'entity>(
-        &self,
-        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
-    ) -> Result<Cow<'a, str>, Error> {
-        let decoded = self.decoder.decode_cow(&self.content)?;
-
-        match unescape_with(&decoded, resolve_entity)? {
-            // Because result is borrowed, no replacements was done and we can use original string
-            Cow::Borrowed(_) => Ok(decoded),
-            Cow::Owned(s) => Ok(s.into()),
-        }
+    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
+        self.decoder.decode_cow(&self.content)
     }
 
     /// Removes leading XML whitespace bytes from text content.
diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs
index a9237de0..a79ced82 100644
--- a/src/reader/async_tokio.rs
+++ b/src/reader/async_tokio.rs
@@ -103,7 +103,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
     /// loop {
     ///     match reader.read_event_into_async(&mut buf).await {
     ///         Ok(Event::Start(_)) => count += 1,
-    ///         Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
+    ///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
     ///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
     ///         Ok(Event::Eof) => break,
     ///         _ => (),
@@ -237,7 +237,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
     ///             }
     ///         }
     ///         Event::Text(e) => {
-    ///             txt.push(e.unescape().unwrap().into_owned())
+    ///             txt.push(e.decode().unwrap().into_owned())
     ///         }
     ///         Event::Eof => break,
     ///         _ => (),
@@ -373,7 +373,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
     ///         (_, Event::Start(_)) => unreachable!(),
     ///
     ///         (_, Event::Text(e)) => {
-    ///             txt.push(e.unescape().unwrap().into_owned())
+    ///             txt.push(e.decode().unwrap().into_owned())
     ///         }
     ///         (_, Event::Eof) => break,
     ///         _ => (),
diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs
index f268448c..44930420 100644
--- a/src/reader/buffered_reader.rs
+++ b/src/reader/buffered_reader.rs
@@ -372,7 +372,7 @@ impl<R: BufRead> Reader<R> {
     /// loop {
     ///     match reader.read_event_into(&mut buf) {
     ///         Ok(Event::Start(_)) => count += 1,
-    ///         Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
+    ///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
     ///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
     ///         Ok(Event::Eof) => break,
     ///         _ => (),
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index cf806e3e..f95327fb 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -718,7 +718,7 @@ where
 ///                 _ => (),
 ///             }
 ///         }
-///         Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
+///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
 ///
 ///         // There are several other `Event`s we do not consider here
 ///         _ => (),
diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs
index 07220815..d9f84f62 100644
--- a/src/reader/ns_reader.rs
+++ b/src/reader/ns_reader.rs
@@ -419,7 +419,7 @@ impl<R: BufRead> NsReader<R> {
     ///             }
     ///         }
     ///         Event::Text(e) => {
-    ///             txt.push(e.unescape().unwrap().into_owned())
+    ///             txt.push(e.decode().unwrap().into_owned())
     ///         }
     ///         Event::Eof => break,
     ///         _ => (),
@@ -478,7 +478,7 @@ impl<R: BufRead> NsReader<R> {
     ///         (_, Event::Start(_)) => unreachable!(),
     ///
     ///         (_, Event::Text(e)) => {
-    ///             txt.push(e.unescape().unwrap().into_owned())
+    ///             txt.push(e.decode().unwrap().into_owned())
     ///         }
     ///         (_, Event::Eof) => break,
     ///         _ => (),
@@ -664,7 +664,7 @@ impl<'i> NsReader<&'i [u8]> {
     ///             }
     ///         }
     ///         Event::Text(e) => {
-    ///             txt.push(e.unescape().unwrap().into_owned())
+    ///             txt.push(e.decode().unwrap().into_owned())
     ///         }
     ///         Event::Eof => break,
     ///         _ => (),
@@ -726,7 +726,7 @@ impl<'i> NsReader<&'i [u8]> {
     ///         (_, Event::Start(_)) => unreachable!(),
     ///
     ///         (_, Event::Text(e)) => {
-    ///             txt.push(e.unescape().unwrap().into_owned())
+    ///             txt.push(e.decode().unwrap().into_owned())
     ///         }
     ///         (_, Event::Eof) => break,
     ///         _ => (),
diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs
index 37439597..c3b501ec 100644
--- a/src/reader/slice_reader.rs
+++ b/src/reader/slice_reader.rs
@@ -62,7 +62,7 @@ impl<'a> Reader<&'a [u8]> {
     /// loop {
     ///     match reader.read_event().unwrap() {
     ///         Event::Start(e) => count += 1,
-    ///         Event::Text(e) => txt.push(e.unescape().unwrap().into_owned()),
+    ///         Event::Text(e) => txt.push(e.decode().unwrap().into_owned()),
     ///         Event::Eof => break,
     ///         _ => (),
     ///     }
diff --git a/tests/encodings.rs b/tests/encodings.rs
index 5f5676fa..7b64e167 100644
--- a/tests/encodings.rs
+++ b/tests/encodings.rs
@@ -37,7 +37,7 @@ fn test_koi8_r_encoding() {
     loop {
         match r.read_event_into(&mut buf) {
             Ok(Text(e)) => {
-                e.unescape().unwrap();
+                e.decode().unwrap();
             }
             Ok(Eof) => break,
             _ => (),
diff --git a/tests/fuzzing.rs b/tests/fuzzing.rs
index 2740763c..25cf6989 100644
--- a/tests/fuzzing.rs
+++ b/tests/fuzzing.rs
@@ -38,7 +38,7 @@ fn fuzz_101() {
                 }
             }
             Ok(Event::Text(e)) => {
-                if e.unescape().is_err() {
+                if e.decode().is_err() {
                     break;
                 }
             }
diff --git a/tests/reader.rs b/tests/reader.rs
index e05166ec..fecdeabc 100644
--- a/tests/reader.rs
+++ b/tests/reader.rs
@@ -172,7 +172,7 @@ fn test_escaped_content() {
                 "content unexpected: expecting 'test', got '{:?}'",
                 from_utf8(&e)
             );
-            match e.unescape() {
+            match e.decode() {
                 Ok(c) => assert_eq!(c, "test"),
                 Err(e) => panic!(
                     "cannot escape content at position {}: {:?}",
diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs
index 68726195..4fb9ec53 100644
--- a/tests/roundtrip.rs
+++ b/tests/roundtrip.rs
@@ -236,7 +236,7 @@ fn reescape_text() {
         match reader.read_event().unwrap() {
             Eof => break,
             Text(e) => {
-                let t = e.unescape().unwrap();
+                let t = e.decode().unwrap();
                 assert!(writer.write_event(Text(BytesText::new(&t))).is_ok());
             }
             e => assert!(writer.write_event(e).is_ok()),

From dcc3a6c30defe791ce047975745a02f82725accd Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Wed, 12 Jun 2024 01:11:37 +0500
Subject: [PATCH 5/6] Rework entity resolution in serde Deserializer

Fixed (18):
  serde-de (9):
    borrow::escaped::element
    borrow::escaped::top_level
    resolve::resolve_custom_entity
    trivial::text::byte_buf
    trivial::text::bytes
    trivial::text::string::field
    trivial::text::string::naked
    trivial::text::string::text
    xml_schema_lists::element::text::string
  serde-migrated (1):
    test_parse_string
  serde-se (5):
    with_root::char_amp
    with_root::char_gt
    with_root::char_lt
    with_root::str_escaped
    with_root::tuple
  --doc (3):
    src\de\resolver.rs - de::resolver::EntityResolver (line 13)
---
 src/de/mod.rs | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/de/mod.rs b/src/de/mod.rs
index a5bacfae..31ec30cb 100644
--- a/src/de/mod.rs
+++ b/src/de/mod.rs
@@ -2014,7 +2014,8 @@ use crate::{
     de::map::ElementMapAccess,
     encoding::Decoder,
     errors::Error,
-    events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
+    escape::{parse_number, EscapeError},
+    events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event},
     name::QName,
     reader::Reader,
     utils::CowRef,
@@ -2133,6 +2134,8 @@ pub enum PayloadEvent<'a> {
     CData(BytesCData<'a>),
     /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
     DocType(BytesText<'a>),
+    /// Reference `&ref;` in the textual data.
+    GeneralRef(BytesRef<'a>),
     /// End of XML document.
     Eof,
 }
@@ -2147,6 +2150,7 @@ impl<'a> PayloadEvent<'a> {
             PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
             PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
             PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
+            PayloadEvent::GeneralRef(e) => PayloadEvent::GeneralRef(e.into_owned()),
             PayloadEvent::Eof => PayloadEvent::Eof,
         }
     }
@@ -2201,7 +2205,7 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
         // If next event is a text or CDATA, we should not trim trailing spaces
         !matches!(
             self.lookahead,
-            Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
+            Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_) | PayloadEvent::GeneralRef(_))
         )
     }
 
@@ -2226,9 +2230,10 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
                     result.to_mut().push_str(&e.decode()?);
                 }
                 PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
+                PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?,
 
-                // SAFETY: current_event_is_last_text checks that event is Text or CData
-                _ => unreachable!("Only `Text` and `CData` events can come here"),
+                // SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
+                _ => unreachable!("Only `Text`, `CData` or `GeneralRef` events can come here"),
             }
         }
         Ok(DeEvent::Text(Text { text: result }))
@@ -2254,11 +2259,32 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
                         .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
                     continue;
                 }
+                PayloadEvent::GeneralRef(e) => {
+                    let mut text = String::new();
+                    self.resolve_reference(&mut text, e)?;
+                    self.drain_text(text.into())
+                }
                 PayloadEvent::Eof => Ok(DeEvent::Eof),
             };
         }
     }
 
+    fn resolve_reference(&mut self, result: &mut String, event: BytesRef) -> Result<(), DeError> {
+        let len = event.len();
+        let reference = self.decoder().decode(&event)?;
+
+        if let Some(num) = reference.strip_prefix('#') {
+            let codepoint = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
+            result.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
+            return Ok(());
+        }
+        if let Some(value) = self.entity_resolver.resolve(reference.as_ref()) {
+            result.push_str(value);
+            return Ok(());
+        }
+        Err(EscapeError::UnrecognizedEntity(0..len, reference.to_string()).into())
+    }
+
     #[inline]
     fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
         match self.lookahead {
@@ -3027,7 +3053,7 @@ impl StartTrimmer {
             Event::End(e) => (PayloadEvent::End(e), true),
             Event::Eof => (PayloadEvent::Eof, true),
 
-            // Do not trim next text event after Text or CDATA event
+            // Do not trim next text event after Text, CDATA or reference event
             Event::CData(e) => (PayloadEvent::CData(e), false),
             Event::Text(mut e) => {
                 // If event is empty after trimming, skip it
@@ -3036,6 +3062,7 @@ impl StartTrimmer {
                 }
                 (PayloadEvent::Text(e), false)
             }
+            Event::GeneralRef(e) => (PayloadEvent::GeneralRef(e), false),
 
             _ => return None,
         };

From 0631d47c5affe7dff4a802358bace9036aedfd6d Mon Sep 17 00:00:00 2001
From: Mingun <Alexander_Sergey@mail.ru>
Date: Sun, 7 Jul 2024 18:35:31 +0500
Subject: [PATCH 6/6] Add `allow_dangling_amp` configuration option and allow
 dangling `&`

---
 Changelog.md                  |  3 ++
 src/reader/buffered_reader.rs |  6 ++--
 src/reader/mod.rs             | 62 +++++++++++++++++++++++++-------
 src/reader/slice_reader.rs    |  8 ++---
 tests/reader-config.rs        | 68 ++++++++++++++++++++++++++++++++++-
 5 files changed, 127 insertions(+), 20 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index ca0c854d..3c1f0a00 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -24,6 +24,9 @@ XML specification. See the updated `custom_entities` example!
 
 - [#766]: Allow to parse resolved entities as XML fragments and stream events from them.
 - [#766]: Added new event `Event::GeneralRef` with content of [general entity].
+- [#766]: Added new configuration option `allow_dangling_amp` which allows to have
+  a `&` not followed by `;` in the textual data which is required for some applications
+  for compatibility reasons.
 
 ### Bug Fixes
 
diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs
index 44930420..9b47da34 100644
--- a/src/reader/buffered_reader.rs
+++ b/src/reader/buffered_reader.rs
@@ -161,7 +161,7 @@ macro_rules! impl_buffered_source {
 
                         *position += read;
 
-                        return ReadRefResult::UpToRef;
+                        return ReadRefResult::UpToRef(&buf[start..]);
                     }
                     Some(i) => {
                         let is_end = available[i] == b';';
@@ -177,7 +177,7 @@ macro_rules! impl_buffered_source {
                         return if is_end {
                             ReadRefResult::Ref(&buf[start..])
                         } else {
-                            ReadRefResult::UpToMarkup
+                            ReadRefResult::UpToMarkup(&buf[start..])
                         };
                     }
                     None => {
@@ -191,7 +191,7 @@ macro_rules! impl_buffered_source {
             }
 
             *position += read;
-            ReadRefResult::UpToEof
+            ReadRefResult::UpToEof(&buf[start..])
         }
 
         #[inline]
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index f95327fb..49a9e249 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -24,6 +24,32 @@ use crate::reader::state::ReaderState;
 #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
 #[non_exhaustive]
 pub struct Config {
+    /// Whether lone ampersand character (without a paired semicolon) should be
+    /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
+    /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
+    ///
+    /// Default: `false`
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use quick_xml::events::{BytesRef, BytesText, Event};
+    /// # use quick_xml::reader::Reader;
+    /// # use pretty_assertions::assert_eq;
+    /// let mut reader = Reader::from_str("text with & &amp; & alone");
+    /// reader.config_mut().allow_dangling_amp = true;
+    ///
+    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
+    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
+    /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
+    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
+    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
+    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
+    /// ```
+    ///
+    /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
+    pub allow_dangling_amp: bool,
+
     /// Whether unmatched closing tag names should be allowed. Unless enabled,
     /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
     /// is returned from read methods.
@@ -210,6 +236,7 @@ impl Config {
 impl Default for Config {
     fn default() -> Self {
         Self {
+            allow_dangling_amp: false,
             allow_unmatched_ends: false,
             check_comments: false,
             check_end_names: true,
@@ -261,18 +288,29 @@ macro_rules! read_event_impl {
                             Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..], $self.decoder())))
                         }
                         // Go to Done state
-                        ReadRefResult::UpToEof => {
+                        ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
+                            $self.state.state = ParseState::Done;
+                            Ok(Event::Text($self.state.emit_text(bytes)))
+                        }
+                        ReadRefResult::UpToEof(_) => {
                             $self.state.state = ParseState::Done;
                             $self.state.last_error_offset = start;
                             Err(Error::IllFormed(IllFormedError::UnclosedReference))
                         }
                         // Do not change state, stay in InsideRef
-                        ReadRefResult::UpToRef => {
+                        ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
+                            Ok(Event::Text($self.state.emit_text(bytes)))
+                        }
+                        ReadRefResult::UpToRef(_) => {
                             $self.state.last_error_offset = start;
                             Err(Error::IllFormed(IllFormedError::UnclosedReference))
                         }
                         // Go to InsideMarkup state
-                        ReadRefResult::UpToMarkup => {
+                        ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
+                            $self.state.state = ParseState::InsideMarkup;
+                            Ok(Event::Text($self.state.emit_text(bytes)))
+                        }
+                        ReadRefResult::UpToMarkup(_) => {
                             $self.state.state = ParseState::InsideMarkup;
                             $self.state.last_error_offset = start;
                             Err(Error::IllFormed(IllFormedError::UnclosedReference))
@@ -997,13 +1035,13 @@ enum ReadRefResult<'r> {
     /// Contains text block up to EOF. Neither end of reference (`;`), start of
     /// another reference (`&`) or start of markup (`<`) characters was found.
     /// Result includes start `&`.
-    UpToEof,
+    UpToEof(&'r [u8]),
     /// Contains text block up to next possible reference (`&` character).
     /// Result includes start `&`.
-    UpToRef,
+    UpToRef(&'r [u8]),
     /// Contains text block up to start of markup (`<` character).
     /// Result includes start `&`.
-    UpToMarkup,
+    UpToMarkup(&'r [u8]),
     /// IO error occurred.
     Err(io::Error),
 }
@@ -1722,8 +1760,8 @@ mod test {
                     //                 ^= 2
 
                     match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
-                        ReadRefResult::UpToEof => (),
-                        x => panic!("Expected `UpToEof`, but got `{:?}`", x),
+                        ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
+                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
                     }
                     assert_eq!(position, 2);
                 }
@@ -1736,8 +1774,8 @@ mod test {
                     //                 ^= 2
 
                     match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
-                        ReadRefResult::UpToRef => (),
-                        x => panic!("Expected `UpToRef`, but got `{:?}`", x),
+                        ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
+                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
                     }
                     assert_eq!(position, 2);
                 }
@@ -1750,8 +1788,8 @@ mod test {
                     //                  ^= 3
 
                     match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
-                        ReadRefResult::UpToMarkup => (),
-                        x => panic!("Expected `UpToMarkup`, but got `{:?}`", x),
+                        ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
+                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
                     }
                     assert_eq!(position, 3);
                 }
diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs
index c3b501ec..311edf6a 100644
--- a/src/reader/slice_reader.rs
+++ b/src/reader/slice_reader.rs
@@ -306,11 +306,11 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
             // Do not consume `&` because it may be lone and we would be need to
             // return it as part of Text event
             Some(i) if self[i + 1] == b'&' => {
-                let (_, rest) = self.split_at(i + 1);
+                let (bytes, rest) = self.split_at(i + 1);
                 *self = rest;
                 *position += i as u64 + 1;
 
-                ReadRefResult::UpToRef
+                ReadRefResult::UpToRef(bytes)
             }
             Some(i) => {
                 let end = i + 1;
@@ -323,7 +323,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
                 if is_end {
                     ReadRefResult::Ref(bytes)
                 } else {
-                    ReadRefResult::UpToMarkup
+                    ReadRefResult::UpToMarkup(bytes)
                 }
             }
             None => {
@@ -331,7 +331,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
                 *self = &[];
                 *position += bytes.len() as u64;
 
-                ReadRefResult::UpToEof
+                ReadRefResult::UpToEof(bytes)
             }
         }
     }
diff --git a/tests/reader-config.rs b/tests/reader-config.rs
index 8796075e..09f820a3 100644
--- a/tests/reader-config.rs
+++ b/tests/reader-config.rs
@@ -6,9 +6,75 @@
 //! Please keep tests sorted (exceptions are allowed if options are tightly related).
 
 use quick_xml::errors::{Error, IllFormedError};
-use quick_xml::events::{BytesCData, BytesEnd, BytesPI, BytesStart, BytesText, Event};
+use quick_xml::events::{BytesCData, BytesEnd, BytesPI, BytesRef, BytesStart, BytesText, Event};
 use quick_xml::reader::Reader;
 
+mod allow_dangling_amp {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn false_() {
+        let mut reader = Reader::from_str("&&&lt;&");
+        reader.config_mut().allow_dangling_amp = false;
+
+        match reader.read_event() {
+            Err(Error::IllFormed(cause)) => {
+                assert_eq!(cause, IllFormedError::UnclosedReference);
+            }
+            x => panic!("Expected `Err(Syntax(_))`, but got `{:?}`", x),
+        }
+        assert_eq!(reader.error_position()..reader.buffer_position(), 0..1);
+
+        match reader.read_event() {
+            Err(Error::IllFormed(cause)) => {
+                assert_eq!(cause, IllFormedError::UnclosedReference);
+            }
+            x => panic!("Expected `Err(Syntax(_))`, but got `{:?}`", x),
+        }
+        assert_eq!(reader.error_position()..reader.buffer_position(), 1..2);
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Event::GeneralRef(BytesRef::new("lt"))
+        );
+        match reader.read_event() {
+            Err(Error::IllFormed(cause)) => {
+                assert_eq!(cause, IllFormedError::UnclosedReference);
+            }
+            x => panic!("Expected `Err(Syntax(_))`, but got `{:?}`", x),
+        }
+        assert_eq!(reader.error_position()..reader.buffer_position(), 6..7);
+
+        assert_eq!(reader.read_event().unwrap(), Event::Eof);
+        assert_eq!(reader.error_position()..reader.buffer_position(), 6..7);
+    }
+
+    #[test]
+    fn true_() {
+        let mut reader = Reader::from_str("&&&lt;&");
+        reader.config_mut().allow_dangling_amp = true;
+
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Event::Text(BytesText::from_escaped("&"))
+        );
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Event::Text(BytesText::from_escaped("&"))
+        );
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Event::GeneralRef(BytesRef::new("lt"))
+        );
+        assert_eq!(
+            reader.read_event().unwrap(),
+            Event::Text(BytesText::from_escaped("&"))
+        );
+        assert_eq!(reader.read_event().unwrap(), Event::Eof);
+    }
+}
+
 mod allow_unmatched_ends {
     use super::*;
     use pretty_assertions::assert_eq;