Skip to content

Commit

Permalink
Merge pull request #659 from Mingun/tests
Browse files Browse the repository at this point in the history
Fix test for ISO-8859-8-I encoding and small refactoring
  • Loading branch information
Mingun committed Oct 1, 2023
2 parents ede4fb9 + 1e19a45 commit ae8db96
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 94 deletions.
4 changes: 2 additions & 2 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ use std::sync::Arc;
/// The error type used by this crate.
#[derive(Clone, Debug)]
pub enum Error {
/// IO error.
/// XML document cannot be read from or written to underlying source.
///
/// `Arc<IoError>` instead of `IoError` since `IoError` is not `Clone`.
/// Contains the reference-counted I/O error to make the error type `Clone`able.
Io(Arc<IoError>),
/// Input decoding error. If [`encoding`] feature is disabled, contains `None`,
/// otherwise contains the UTF-8 decoding error
Expand Down
4 changes: 2 additions & 2 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ macro_rules! read_event_impl {
}

/// Read bytes up to `<` and skip it. If current byte (after skipping all space
/// characters if [`Parser::trim_text_start`] is `true`) is already `<`, then
/// characters if [`ReaderState::trim_text_start`] is `true`) is already `<`, then
/// returns the next event, otherwise stay at position just after the `<` symbol.
///
/// Moves parser to the `OpenedTag` state.
Expand Down Expand Up @@ -409,7 +409,7 @@ enum ParseState {
/// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
/// after which reader returned to the `ClosedTag` state.
///
/// [`expand_empty_elements`]: Parser::expand_empty_elements
/// [`expand_empty_elements`]: ReaderState::expand_empty_elements
Empty,
/// Reader enters this state when `Eof` event generated or an error occurred.
/// This is the last state, the reader stay in it forever.
Expand Down
11 changes: 6 additions & 5 deletions src/reader/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,17 +132,18 @@ impl ReaderState {
/// Wraps content of `buf` into the [`Event::End`] event. Does the check that
/// end name matches the last opened start name if `self.check_end_names` is set.
pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
// Strip the `/` character. `content` contains data between `</` and `>`
let content = &buf[1..];
// XML standard permits whitespaces after the markup name in closing tags.
// Let's strip them from the buffer before comparing tag names.
let name = if self.trim_markup_names_in_closing_tags {
if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !is_whitespace(b)) {
let (name, _) = buf[1..].split_at(pos_end_name + 1);
name
if let Some(pos_end_name) = content.iter().rposition(|&b| !is_whitespace(b)) {
&content[..pos_end_name + 1]
} else {
&buf[1..]
content
}
} else {
&buf[1..]
content
};

let decoder = self.decoder();
Expand Down
9 changes: 9 additions & 0 deletions test-gen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,15 @@ fn main() {
.expect(&format!("label `{}` is unsupported", label));

process_index(enc, &codepoints);
if enc == ISO_8859_8 {
// ISO_8859_8_I does not have its own index in encoding/indexes.json,
// but it have the same mapping as ISO_8859_8.
//
// Wikipedia (https://en.wikipedia.org/wiki/ISO-8859-8-I):
// The WHATWG Encoding Standard used by HTML5 treats ISO-8859-8 and ISO-8859-8-I
// as distinct encodings with the same mapping due to influence on the layout direction
process_index(ISO_8859_8_I, &codepoints);
}
}
// https://encoding.spec.whatwg.org/#x-user-defined-decoder
make_xml(X_USER_DEFINED, '\u{F780}'..='\u{F7FF}');
Expand Down
29 changes: 15 additions & 14 deletions tests/documents/encoding/ISO-8859-8-I.xml
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
㰿硭氠敮捯摩湧㴢䥓伭㠸㔹ⴸⵉ∿㸊㱲潯琠慴瑲楢畴攱㴢Ă̄Ԇ܈ऊଌഎ༐ᄒጔᔖ᜘ᤚᬜᴞἠ℣␥✨⤪⬬⴮⼰ㄲ㌴㔶㜸㤺㬽㸿䁁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婛屝幟恡扣摥晧桩橫汭湯灱牳瑵癷硹穻籽繿肁芃蒅蚇袉誋貍躏邑銓钕隗颙骛鲝麟ꀢਠ††⁡瑴物扵瑥㈽✁ȃЅ؇ࠉ਋఍ฏထሓᐕᘗ᠙ᨛᰝḟ‡∣␥⠩⨫Ⱝⸯ〱㈳㐵㘷㠹㨻㴾㽀䅂䍄䕆䝈䥊䭌䵎佐兒協啖坘奚孜嵞彠慢捤敦杨楪歬浮潰煲獴當睸祺筼絾羀膂莄薆螈覊讌趎辐醒鎔閖鞘馚鮜鶞龠✊†††㩁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婟慢捤敦杨楪歬浮潰煲獴當睸祺㴺䅂䍄䕆䝈䥊䭌䵎佐兒協啖坘奚彡扣摥晧桩橫汭湯灱牳瑵癷硹稊㸊†㰿㩁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婟慢捤敦杨楪歬浮潰煲獴當睸祺㼾ਠ‼ℭⴁȃЅ؇ࠉ਋఍ฏထሓᐕᘗ᠙ᨛᰝḟ‡∣␥☧⠩⨫Ⱝⸯ〱㈳㐵㘷㠹㨻㰽㸿䁁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婛屝幟恡扣摥晧桩橫汭湯灱牳瑵癷硹穻籽繿肁芃蒅蚇袉誋貍躏邑銓钕隗颙骛鲝麟ꀭⴾਠ ȃЅ؇ࠉ਋఍ฏထሓᐕᘗ᠙ᨛᰝḟ‡∣␥✨⤪⬬⴮⼰ㄲ㌴㔶㜸㤺㬽㸿䁁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婛屝幟恡扣摥晧桩橫汭湯灱牳瑵癷硹穻籽繿肁芃蒅蚇袉誋貍躏邑銓钕隗颙骛鲝麟ꀊ†㱮猺敬敭敮琠湳㩡瑴物扵瑥㴢癡汵攱∠硭汮猺湳㴢湡浥獰慣攢⼾ਠ‼孛䍄䅔䅛嬁ȃЅ؇ࠉ਋఍ฏထሓᐕᘗ᠙ᨛᰝḟ‡∣␥✨⤪⬬⴮⼰ㄲ㌴㔶㜸㤺㬽㸿䁁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婛屝幟恡扣摥晧桩橫汭湯灱牳瑵癷硹穻籽繿肁芃蒅蚇袉誋貍躏邑銓钕隗颙骛鲝麟ꁝ崾਼⽲潯琾
<root attribute1="
 !#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������"
attribute2='
 !"#$%()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������'
:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz=:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
<?xml version="1.1" encoding="ISO-8859-8-I"?>
<!--This is generated file. Edit <quick-xml>/test-gen/src/main.rs instead-->
<root attribute1="
!#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���燾"
attribute2='
!"#$%()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���燾'
:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz珀矣粤肄蓍裨跋鈿韵鴦���=:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz珀矣粤肄蓍裨跋鈿韵鴦���
>
<?:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz?>
<!--
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������-->

!"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������
<ns:element ns:attribute="value1" xmlns:ns="namespace"/>
<[[CDATA[[
!"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������]]>
<?:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz珀矣粤肄蓍裨跋鈿韵鴦���?>
<!--
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���-->

!"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���
<ns::ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz珀矣粤肄蓍裨跋鈿韵鴦��� ns:attribute="value1" xmlns:ns="namespace"/>
<![CDATA[
!"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���]]>
</root>
Expand Down
51 changes: 51 additions & 0 deletions tests/issues.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@ use quick_xml::name::QName;
use quick_xml::reader::Reader;
use quick_xml::Error;

/// Regression test for https://github.com/tafia/quick-xml/issues/94
#[test]
fn issue94() {
let data = br#"<Run>
<!B>
</Run>"#;
let mut reader = Reader::from_reader(&data[..]);
reader.trim_text(true);
loop {
match reader.read_event() {
Ok(Event::Eof) | Err(..) => break,
_ => (),
}
}
}

/// Regression test for https://github.com/tafia/quick-xml/issues/115
#[test]
fn issue115() {
Expand All @@ -22,6 +38,41 @@ fn issue115() {
}
}

/// Regression test for https://github.com/tafia/quick-xml/issues/299
#[test]
fn issue299() -> Result<(), Error> {
let xml = r#"
<?xml version="1.0" encoding="utf8"?>
<MICEX_DOC xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<SECURITY SecurityId="PLZL" ISIN="RU000A0JNAA8" SecShortName="Short Name" PriceType="CASH">
<RECORDS RecNo="1" TradeNo="1111" TradeDate="2021-07-08" TradeTime="15:00:00" BuySell="S" SettleCode="Y1Dt" Decimals="3" Price="13057.034" Quantity="766" Value="10001688.29" AccInt="0" Amount="10001688.29" Balance="766" TrdAccId="X0011" ClientDetails="2222" CPFirmId="3333" CPFirmShortName="Firm Short Name" Price2="13057.034" RepoPart="2" ReportTime="16:53:27" SettleTime="17:47:06" ClientCode="4444" DueDate="2021-07-09" EarlySettleStatus="N" RepoRate="5.45" RateType="FIX"/>
</SECURITY>
</MICEX_DOC>
"#;
let mut reader = Reader::from_str(xml);
loop {
match reader.read_event()? {
Event::Start(e) | Event::Empty(e) => {
let attr_count = match e.name().as_ref() {
b"MICEX_DOC" => 1,
b"SECURITY" => 4,
b"RECORDS" => 26,
_ => unreachable!(),
};
assert_eq!(
attr_count,
e.attributes().filter(Result::is_ok).count(),
"mismatch att count on '{:?}'",
reader.decoder().decode(e.name().as_ref())
);
}
Event::Eof => break,
_ => (),
}
}
Ok(())
}

/// Regression test for https://github.com/tafia/quick-xml/issues/360
#[test]
fn issue360() {
Expand Down
50 changes: 0 additions & 50 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use quick_xml::events::attributes::Attribute;
use quick_xml::events::Event::*;
use quick_xml::name::QName;
use quick_xml::reader::Reader;
use quick_xml::Error;
use std::borrow::Cow;

use pretty_assertions::assert_eq;
Expand Down Expand Up @@ -89,21 +88,6 @@ fn test_comment_starting_with_gt() {
}
}

#[test]
fn test_issue94() {
let data = br#"<Run>
<!B>
</Run>"#;
let mut reader = Reader::from_reader(&data[..]);
reader.trim_text(true);
loop {
match reader.read_event() {
Ok(Eof) | Err(..) => break,
_ => (),
}
}
}

#[test]
fn test_no_trim() {
let mut reader = Reader::from_str(" <tag> text </tag> ");
Expand Down Expand Up @@ -151,37 +135,3 @@ fn test_clone_reader() {
assert!(matches!(cloned.read_event().unwrap(), Text(_)));
assert!(matches!(cloned.read_event().unwrap(), End(_)));
}

#[test]
fn test_issue299() -> Result<(), Error> {
let xml = r#"
<?xml version="1.0" encoding="utf8"?>
<MICEX_DOC xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<SECURITY SecurityId="PLZL" ISIN="RU000A0JNAA8" SecShortName="Short Name" PriceType="CASH">
<RECORDS RecNo="1" TradeNo="1111" TradeDate="2021-07-08" TradeTime="15:00:00" BuySell="S" SettleCode="Y1Dt" Decimals="3" Price="13057.034" Quantity="766" Value="10001688.29" AccInt="0" Amount="10001688.29" Balance="766" TrdAccId="X0011" ClientDetails="2222" CPFirmId="3333" CPFirmShortName="Firm Short Name" Price2="13057.034" RepoPart="2" ReportTime="16:53:27" SettleTime="17:47:06" ClientCode="4444" DueDate="2021-07-09" EarlySettleStatus="N" RepoRate="5.45" RateType="FIX"/>
</SECURITY>
</MICEX_DOC>
"#;
let mut reader = Reader::from_str(xml);
loop {
match reader.read_event()? {
Start(e) | Empty(e) => {
let attr_count = match e.name().as_ref() {
b"MICEX_DOC" => 1,
b"SECURITY" => 4,
b"RECORDS" => 26,
_ => unreachable!(),
};
assert_eq!(
attr_count,
e.attributes().filter(Result::is_ok).count(),
"mismatch att count on '{:?}'",
reader.decoder().decode(e.name().as_ref())
);
}
Eof => break,
_ => (),
}
}
Ok(())
}
21 changes: 0 additions & 21 deletions tests/unit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,27 +435,6 @@ fn test_offset_err_comment() {
}
}

#[test]
fn test_offset_err_comment_2_buf() {
let mut r = Reader::from_str("<a><!--b>");
r.trim_text(true);

let _ = r.read_event().unwrap();
assert_eq!(r.buffer_position(), 3);

match r.read_event() {
// error at char 4: no closing --> tag found
Err(e) => assert_eq!(
r.buffer_position(),
4,
"expecting buf_pos = 4, found {}, err {:?}",
r.buffer_position(),
e
),
e => panic!("expecting error, found {:?}", e),
}
}

#[test]
fn test_offset_err_comment_trim_text() {
let mut r = Reader::from_str("<a>\r\n <!--b>");
Expand Down

0 comments on commit ae8db96

Please sign in to comment.