-
Notifications
You must be signed in to change notification settings - Fork 236
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #753 from Mingun/fix-pi-parsing
Fix processing instruction parsing
- Loading branch information
Showing
5 changed files
with
196 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
//! Contains a parser for an XML processing instruction. | ||
|
||
/// A parser that search a `?>` sequence in the slice. | ||
/// | ||
/// To use a parser create an instance of parser and [`feed`] data into it. | ||
/// After successful search the parser will return [`Some`] with position where | ||
/// processing instruction is ended (the position after `?>`). If search was | ||
/// unsuccessful, a [`None`] will be returned. You typically would expect positive | ||
/// result of search, so that you should feed new data until yo'll get it. | ||
/// | ||
/// NOTE: after successful match the parser does not returned to the initial | ||
/// state and should not be used anymore. Create a new parser if you want to perform | ||
/// new search. | ||
/// | ||
/// # Example | ||
/// | ||
/// ``` | ||
/// # use quick_xml::reader::PiParser; | ||
/// # use pretty_assertions::assert_eq; | ||
/// let mut parser = PiParser::default(); | ||
/// | ||
/// // Parse `<?instruction with = 'some > and ?' inside?>and the text follow...` | ||
/// // splitted into three chunks | ||
/// assert_eq!(parser.feed(b"<?instruction"), None); | ||
/// // ...get new chunk of data | ||
/// assert_eq!(parser.feed(b" with = 'some > and ?"), None); | ||
/// // ...get another chunk of data | ||
/// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(10)); | ||
/// // ^ ^ | ||
/// // 0 10 | ||
/// ``` | ||
/// | ||
/// [`feed`]: Self::feed() | ||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] | ||
pub struct PiParser( | ||
/// A flag that indicates was the `bytes` in the previous attempt to find the | ||
/// end ended with `?`. | ||
pub bool, | ||
); | ||
|
||
impl PiParser { | ||
/// Determines the end position of a processing instruction in the provided slice. | ||
/// Processing instruction ends on the first occurrence of `?>` which cannot be | ||
/// escaped. | ||
/// | ||
/// Returns position after the `?>` or `None` if such sequence was not found. | ||
/// | ||
/// [Section 2.6]: Parameter entity references MUST NOT be recognized within | ||
/// processing instructions, so parser do not search for them. | ||
/// | ||
/// # Parameters | ||
/// - `bytes`: a slice to find the end of a processing instruction. | ||
/// Should contain text in ASCII-compatible encoding | ||
/// | ||
/// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi | ||
pub fn feed(&mut self, bytes: &[u8]) -> Option<usize> { | ||
for i in memchr::memchr_iter(b'>', bytes) { | ||
match i { | ||
// +1 for `>` which should be included in event | ||
0 if self.0 => return Some(1), | ||
// If the previous byte is `?`, then we found `?>` | ||
// +1 for `>` which should be included in event | ||
i if i > 0 && bytes[i - 1] == b'?' => return Some(i + 1), | ||
_ => {} | ||
} | ||
} | ||
self.0 = bytes.last().copied() == Some(b'?'); | ||
None | ||
} | ||
} | ||
|
||
#[test] | ||
fn pi() { | ||
use pretty_assertions::assert_eq; | ||
|
||
/// Returns `Ok(pos)` with the position in the buffer where processing | ||
/// instruction is ended. | ||
/// | ||
/// Returns `Err(internal_state)` if parsing is not done yet. | ||
fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result<usize, bool> { | ||
let mut parser = PiParser(had_question_mark); | ||
match parser.feed(bytes) { | ||
Some(i) => Ok(i), | ||
None => Err(parser.0), | ||
} | ||
} | ||
|
||
// Comments shows which character was seen the last before calling `feed`. | ||
// `x` means any character, pipe denotes start of the buffer that passed to `feed` | ||
|
||
assert_eq!(parse_pi(b"", false), Err(false)); // x| | ||
assert_eq!(parse_pi(b"", true), Err(false)); // ?| | ||
|
||
assert_eq!(parse_pi(b"?", false), Err(true)); // x|? | ||
assert_eq!(parse_pi(b"?", true), Err(true)); // ?|? | ||
|
||
assert_eq!(parse_pi(b">", false), Err(false)); // x|> | ||
assert_eq!(parse_pi(b">", true), Ok(1)); // ?|> | ||
|
||
assert_eq!(parse_pi(b"?>", false), Ok(2)); // x|?> | ||
assert_eq!(parse_pi(b"?>", true), Ok(2)); // ?|?> | ||
|
||
assert_eq!(parse_pi(b">?>", false), Ok(3)); // x|>?> | ||
assert_eq!(parse_pi(b">?>", true), Ok(1)); // ?|>?> | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters