Skip to content

Commit

Permalink
Add helper functions to resolve XML and HTML entities:
Browse files Browse the repository at this point in the history
- `quick_xml::escape::resolve_predefined_entity`
- `quick_xml::escape::resolve_xml_entity`
- `quick_xml::escape::resolve_html5_entity`
  • Loading branch information
Mingun committed May 30, 2024
1 parent dd1cb18 commit 10d1ff8
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 7 deletions.
5 changes: 5 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ to get an offset of the error position. For `SyntaxError`s the range
- [#722]: Allow to pass owned strings to `Writer::create_element`. This is breaking change!
- [#275]: Added `ElementWriter::new_line()` which enables pretty printing elements with multiple attributes.
- [#743]: Add `Deserializer::get_ref()` to get XML Reader from serde Deserializer
- [#734]: Add helper functions to resolve predefined XML and HTML5 entities:
- `quick_xml::escape::resolve_predefined_entity`
- `quick_xml::escape::resolve_xml_entity`
- `quick_xml::escape::resolve_html5_entity`

### Bug Fixes

Expand Down Expand Up @@ -83,6 +87,7 @@ to get an offset of the error position. For `SyntaxError`s the range
[#704]: https://github.com/tafia/quick-xml/pull/704
[#705]: https://github.com/tafia/quick-xml/pull/705
[#722]: https://github.com/tafia/quick-xml/pull/722
[#734]: https://github.com/tafia/quick-xml/pull/734
[#738]: https://github.com/tafia/quick-xml/pull/738
[#743]: https://github.com/tafia/quick-xml/pull/743
[#748]: https://github.com/tafia/quick-xml/pull/748
Expand Down
50 changes: 43 additions & 7 deletions src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ where
if let Some(entity) = pat.strip_prefix('#') {
let codepoint = parse_number(entity, start..end)?;
unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
} else if let Some(value) = named_entity(pat) {
} else if let Some(value) = resolve_predefined_entity(pat) {
unescaped.push_str(value);
} else if let Some(value) = resolve_entity(pat) {
unescaped.push_str(value);
Expand All @@ -248,10 +248,45 @@ where
}
}

#[cfg(not(feature = "escape-html"))]
fn named_entity(name: &str) -> Option<&str> {
/// Resolves predefined XML entities or all HTML5 entities depending on the feature
/// [`escape-html`](https://docs.rs/quick-xml/latest/quick_xml/#escape-html).
///
/// Behaves like [`resolve_xml_entity`] if feature is not enabled and as
/// [`resolve_html5_entity`] if enabled.
#[inline]
pub fn resolve_predefined_entity(entity: &str) -> Option<&'static str> {
#[cfg(not(feature = "escape-html"))]
{
resolve_xml_entity(entity)
}

#[cfg(feature = "escape-html")]
{
resolve_html5_entity(entity)
}
}

/// Resolves predefined XML entities. If specified entity is not a predefined XML
/// entity, `None` is returned.
///
/// The complete list of predefined entities are defined in the [specification].
///
/// ```
/// # use quick_xml::escape::resolve_xml_entity;
/// # use pretty_assertions::assert_eq;
/// assert_eq!(resolve_xml_entity("lt"), Some("<"));
/// assert_eq!(resolve_xml_entity("gt"), Some(">"));
/// assert_eq!(resolve_xml_entity("amp"), Some("&"));
/// assert_eq!(resolve_xml_entity("apos"), Some("'"));
/// assert_eq!(resolve_xml_entity("quot"), Some("\""));
///
/// assert_eq!(resolve_xml_entity("foo"), None);
/// ```
///
/// [specification]: https://www.w3.org/TR/xml11/#sec-predefined-ent
pub fn resolve_xml_entity(entity: &str) -> Option<&'static str> {
// match over strings are not allowed in const functions
let s = match name.as_bytes() {
let s = match entity.as_bytes() {
b"lt" => "<",
b"gt" => ">",
b"amp" => "&",
Expand All @@ -261,12 +296,13 @@ fn named_entity(name: &str) -> Option<&str> {
};
Some(s)
}
#[cfg(feature = "escape-html")]
fn named_entity(name: &str) -> Option<&str> {

/// Resolves all HTML5 entities. For complete list see <https://dev.w3.org/html5/html-author/charref>.
pub fn resolve_html5_entity(entity: &str) -> Option<&'static str> {
// imported from https://dev.w3.org/html5/html-author/charref
// match over strings are not allowed in const functions
//TODO: automate up-to-dating using https://html.spec.whatwg.org/entities.json
let s = match name.as_bytes() {
let s = match entity.as_bytes() {
b"Tab" => "\u{09}",
b"NewLine" => "\u{0A}",
b"excl" => "\u{21}",
Expand Down

0 comments on commit 10d1ff8

Please sign in to comment.