Skip to content

Commit

Permalink
Merge tag '0.8.19'
Browse files Browse the repository at this point in the history
  • Loading branch information
kornelski committed Sep 23, 2023
2 parents 15332bd + bfb185e commit fee2576
Show file tree
Hide file tree
Showing 17 changed files with 119 additions and 44 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "xml-rs"
version = "0.8.18"
version = "0.8.19"
authors = ["Vladimir Matveev <vmatveev@citrine.cc>"]
license = "MIT"
description = "An XML library in pure Rust"
Expand Down
2 changes: 1 addition & 1 deletion examples/print_events.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::fs::File;
use std::io::BufReader;
use xml::common::Position;
use xml::reader::*;
use xml::reader::{ParserConfig, XmlEvent};

fn main() {
let file_path = std::env::args_os().nth(1).expect("Please specify a path to an XML file");
Expand Down
68 changes: 68 additions & 0 deletions examples/rewrite.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//! See <https://lib.rs/crates/svg-hush> for a real-world example.

use xml::EmitterConfig;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use xml::reader::{ParserConfig, Result};

fn main() -> Result<(), Box<dyn std::error::Error>> {
let arg = std::env::args_os().nth(1);
let file_path = Path::new(arg.as_deref().unwrap_or("tests/documents/sample_1.xml".as_ref()));
let file = BufReader::new(File::open(file_path)
.map_err(|e| format!("Can't open {}: {e}", file_path.display()))?);

let mut reader = ParserConfig::default()
.ignore_root_level_whitespace(true)
.ignore_comments(false)
.cdata_to_characters(true)
.coalesce_characters(true)
.create_reader(file);

let stdout = std::io::stdout().lock();

let mut writer = EmitterConfig::default()
.create_writer(stdout);

loop {
let reader_event = reader.next()?;

match reader_event {
xml::reader::XmlEvent::EndDocument => break,
xml::reader::XmlEvent::StartElement { name, mut attributes, namespace } => {
let event = xml::writer::XmlEvent::StartElement {
name: name.borrow(),
namespace: namespace.borrow(),
attributes: attributes.iter_mut().map(|attr| {
attr.value = alternating_caps(&attr.value);
attr.borrow()
}).collect(),
};
writer.write(event)?;
},
xml::reader::XmlEvent::Characters(text) => {
let text = alternating_caps(&text);
let event = xml::writer::XmlEvent::Characters(&text);
writer.write(event)?;
},
xml::reader::XmlEvent::Comment(text) => {
let text = alternating_caps(&text);
let event = xml::writer::XmlEvent::Comment(&text);
writer.write(event)?;
},
other => {
if let Some(writer_event) = other.as_writer_event() {
writer.write(writer_event)?;
}
}
}

}
Ok(())
}

fn alternating_caps(text: &str) -> String {
text.chars().enumerate()
.map(|(i, ch)| if i&1==0 { ch.to_ascii_uppercase() } else { ch.to_ascii_lowercase() })
.collect()
}
2 changes: 1 addition & 1 deletion src/attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

use std::fmt;

use crate::escape::{Escaped, AttributeEscapes};
use crate::escape::{AttributeEscapes, Escaped};
use crate::name::{Name, OwnedName};

/// A borrowed version of an XML attribute.
Expand Down
6 changes: 3 additions & 3 deletions src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,15 @@ pub fn is_whitespace_str(s: &str) -> bool {
s.chars().all(is_whitespace_char)
}

pub fn is_xml10_char(c: char) -> bool {
#[must_use] pub fn is_xml10_char(c: char) -> bool {
matches!(c, '\u{09}' | '\u{0A}' | '\u{0D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..)
}

pub fn is_xml11_char(c: char) -> bool {
#[must_use] pub fn is_xml11_char(c: char) -> bool {
matches!(c, '\u{01}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..)
}

pub fn is_xml11_char_not_restricted(c: char) -> bool {
#[must_use] pub fn is_xml11_char_not_restricted(c: char) -> bool {
is_xml11_char(c) && !matches!(c, '\u{01}'..='\u{08}' | '\u{0B}'..='\u{0C}' | '\u{0E}'..='\u{1F}' | '\u{7F}'..='\u{84}' | '\u{86}'..='\u{9F}')
}

Expand Down
17 changes: 9 additions & 8 deletions src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ use std::{borrow::Cow, marker::PhantomData, fmt::{Display, Result, Formatter}};
pub(crate) trait Escapes {
fn escape(c: u8) -> Option<&'static str>;

fn byte_needs_escaping(c: u8) -> bool{
fn byte_needs_escaping(c: u8) -> bool {
Self::escape(c).is_some()
}

fn str_needs_escaping(s: &str) -> bool{
fn str_needs_escaping(s: &str) -> bool {
s.bytes().any(|c| Self::escape(c).is_some())
}
}
Expand All @@ -22,13 +22,12 @@ pub(crate) struct Escaped<'a, E: Escapes> {
impl<'a, E: Escapes> Escaped<'a, E> {
pub fn new(s: &'a str) -> Self {
Escaped {
_escape_phantom: PhantomData,
_escape_phantom: PhantomData,
to_escape: s,
}
}
}


impl<'a, E: Escapes> Display for Escaped<'a, E> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
let mut total_remaining = self.to_escape;
Expand All @@ -49,7 +48,7 @@ impl<'a, E: Escapes> Display for Escaped<'a, E> {

total_remaining = &remaining[1..];
}

f.write_str(total_remaining)
}
}
Expand Down Expand Up @@ -107,7 +106,7 @@ escapes!(
/// * `"` → `&quot;`
/// * `'` → `&apos;`
/// * `&` → `&amp;`
///
///
/// The following characters are escaped so that attributes are printed on
/// a single line:
/// * `\n` → `&#xA;`
Expand All @@ -117,7 +116,8 @@ escapes!(
///
/// Does not perform allocations if the given string does not contain escapable characters.
#[inline]
#[must_use] pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
#[must_use]
pub fn escape_str_attribute(s: &str) -> Cow<'_, str> {
escape_str::<AttributeEscapes>(s)
}

Expand All @@ -133,7 +133,8 @@ escapes!(
///
/// Does not perform allocations if the given string does not contain escapable characters.
#[inline]
#[must_use] pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
#[must_use]
pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> {
escape_str::<PcDataEscapes>(s)
}

Expand Down
4 changes: 2 additions & 2 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ macro_rules! gen_setter {
///
/// <small>See [`ParserConfig`][crate::ParserConfig] fields docs for details</small>
#[inline]
pub fn $field(mut self, value: $t) -> Self {
#[must_use] pub fn $field(mut self, value: $t) -> Self {
self.$field = value;
self
}
Expand All @@ -29,7 +29,7 @@ macro_rules! gen_setter {
///
/// <small>See [`ParserConfig`][crate::ParserConfig] fields docs for details</small>
#[inline]
pub fn $field(mut self, value: $t) -> Self {
#[must_use] pub fn $field(mut self, value: $t) -> Self {
self.c.$field = value;
self
}
Expand Down
7 changes: 7 additions & 0 deletions src/namespace.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Contains namespace manipulation types and functions.

use std::borrow::Cow;
use std::collections::btree_map::Iter as Entries;
use std::collections::btree_map::{BTreeMap, Entry};
use std::collections::HashSet;
Expand Down Expand Up @@ -165,6 +166,12 @@ impl Namespace {
pub fn get<'a, P: ?Sized + AsRef<str>>(&'a self, prefix: &P) -> Option<&'a str> {
self.0.get(prefix.as_ref()).map(|s| &**s)
}

/// Borrowed namespace for the writer
#[must_use]
pub fn borrow(&self) -> Cow<'_, Self> {
Cow::Borrowed(self)
}
}

/// An alias for iterator type for namespace mappings contained in a namespace.
Expand Down
4 changes: 1 addition & 3 deletions src/reader/events.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
//! Contains `XmlEvent` datatype, instances of which are emitted by the parser.

use std::borrow::Cow;
use std::fmt;

use crate::attribute::OwnedAttribute;
use crate::common::XmlVersion;
use crate::name::OwnedName;
Expand Down Expand Up @@ -207,7 +205,7 @@ impl XmlEvent {
Some(crate::writer::events::XmlEvent::StartElement {
name: name.borrow(),
attributes: attributes.iter().map(|a| a.borrow()).collect(),
namespace: Cow::Borrowed(namespace)
namespace: namespace.borrow(),
}),
XmlEvent::EndElement { ref name } =>
Some(crate::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }),
Expand Down
8 changes: 4 additions & 4 deletions src/reader/indexset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,13 @@ fn indexset() {
}

assert!(s.contains(&OwnedName {
local_name: format!("attr1234"), namespace: None, prefix: None,
local_name: "attr1234".into(), namespace: None, prefix: None,
}));
assert!(s.contains(&OwnedName {
local_name: format!("attr0"), namespace: None, prefix: None,
local_name: "attr0".into(), namespace: None, prefix: None,
}));
assert!(s.contains(&OwnedName {
local_name: format!("attr49999"), namespace: None, prefix: None,
local_name: "attr49999".into(), namespace: None, prefix: None,
}));
}

Expand All @@ -100,7 +100,7 @@ struct U64Hasher(u64);
impl Hasher for U64Hasher {
fn finish(&self) -> u64 { self.0 }
fn write(&mut self, slice: &[u8]) {
for &v in slice { self.0 ^= v as u64 } // unused in practice
for &v in slice { self.0 ^= u64::from(v) } // unused in practice
}
fn write_u64(&mut self, i: u64) {
self.0 ^= i;
Expand Down
13 changes: 6 additions & 7 deletions src/reader/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ impl PullParser {
fn next_pos(&mut self) {
// unfortunately calls to next_pos will never be perfectly balanced with push_pos,
// at very least because parse errors and EOF can happen unexpectedly without a prior push.
if self.pos.len() > 0 {
if !self.pos.is_empty() {
if self.pos.len() > 1 {
self.pos.remove(0);
} else {
Expand Down Expand Up @@ -485,7 +485,7 @@ impl PullParser {
let name = this.take_buf();
match name.parse() {
Ok(name) => on_name(this, t, name),
Err(_) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into())))
Err(_) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))),
}
};

Expand Down Expand Up @@ -515,7 +515,7 @@ impl PullParser {

Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t),

_ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t)))
_ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))),
}
}

Expand All @@ -527,7 +527,7 @@ impl PullParser {
fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
where F: Fn(&mut PullParser, String) -> Option<Result> {
match t {
Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace

Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
None => { // Entered attribute value
Expand Down Expand Up @@ -558,8 +558,7 @@ impl PullParser {
self.into_state_continue(State::InsideReference)
},

Token::OpeningTagStart =>
Some(self.error(SyntaxError::UnexpectedOpeningTag)),
Token::OpeningTagStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)),

Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
Expand All @@ -584,7 +583,7 @@ impl PullParser {

// check whether the name prefix is bound and fix its namespace
match self.nst.get(name.borrow().prefix_repr()) {
Some("") => name.namespace = None, // default namespace
Some("") => name.namespace = None, // default namespace
Some(ns) => name.namespace = Some(ns.into()),
None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into())))
}
Expand Down
10 changes: 5 additions & 5 deletions src/reader/parser/inside_doctype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ impl PullParser {
_ => None,
},
DoctypeSubstate::String => match t {
Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => { None },
Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => { None },
Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => None,
Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => None,
Token::SingleQuote | Token::DoubleQuote => {
self.data.quote = None;
self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
Expand All @@ -51,12 +51,12 @@ impl PullParser {
None
},
Token::Character(c) if is_whitespace_char(c) => {
match self.buf.as_str() {
let buf = self.take_buf();
match buf.as_str() {
"ENTITY" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityName)),
"NOTATION" | "ELEMENT" | "ATTLIST" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)),
s => Some(self.error(SyntaxError::UnknownMarkupDeclaration(s.into()))),
_ => Some(self.error(SyntaxError::UnknownMarkupDeclaration(buf.into()))),
}

},
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},
Expand Down
10 changes: 6 additions & 4 deletions src/reader/parser/inside_opening_tag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ impl PullParser {
OpeningTagSubstate::InsideTag => match t {
Token::TagEnd => self.emit_start_element(false),
Token::EmptyTagEnd => self.emit_start_element(true),
Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
Token::Character(c) if is_name_start_char(c) => {
if self.buf.len() > self.config.max_name_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.buf.push(c);
self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeName))
}
_ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t)))
_ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t))),
},

OpeningTagSubstate::InsideAttributeName => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
Expand Down Expand Up @@ -108,10 +108,12 @@ impl PullParser {
}),

OpeningTagSubstate::AfterAttributeValue => match t {
Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag)),
Token::Character(c) if is_whitespace_char(c) => {
self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
},
Token::TagEnd => self.emit_start_element(false),
Token::EmptyTagEnd => self.emit_start_element(true),
_ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t)))
_ => Some(self.error(SyntaxError::UnexpectedTokenInOpeningTag(t))),
},
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/reader/parser/outside_tag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ impl PullParser {
if self.inside_whitespace && self.config.c.trim_whitespace {
None
} else if self.inside_whitespace && !self.config.c.whitespace_to_characters {
debug_assert!(buf.chars().all(|ch| ch.is_whitespace()), "ws={buf:?}");
Some(Ok(XmlEvent::Whitespace(buf)))
} else if self.config.c.trim_whitespace {
Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
Expand Down Expand Up @@ -174,7 +175,7 @@ impl PullParser {
self.into_state(State::OutsideTag, next_event)
},

Token::CommentStart => {
Token::CommentStart => {
let next_event = self.set_encountered(Encountered::Comment);
self.into_state(State::InsideComment, next_event)
}
Expand Down
Loading

0 comments on commit fee2576

Please sign in to comment.