From c63971403a61089904f9d57250b823b6426e13d3 Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 27 Sep 2023 23:04:05 +0500 Subject: [PATCH 1/6] Regenerate ISO-8859-8-I.xml due to error in its content CDATA section was formed incorrectly and instead was recognized as a Start tag. File introduced in PR #465 and was made manually and not using generator, because WHATWG does not have definition of this encoding as a separate entry in index.json. Actually, this encoding the same as ISO-8859-8, but influences layout direction when render text. Wikipedia: The WHATWG Encoding Standard used by HTML5 treats ISO-8859-8 and ISO-8859-8-I as distinct encodings with the same mapping due to influence on the layout direction So generator was fixed and file regenerated --- test-gen/src/main.rs | 9 +++++++ tests/documents/encoding/ISO-8859-8-I.xml | 29 ++++++++++++----------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/test-gen/src/main.rs b/test-gen/src/main.rs index 65b77d7d..92a1e89f 100644 --- a/test-gen/src/main.rs +++ b/test-gen/src/main.rs @@ -284,6 +284,15 @@ fn main() { .expect(&format!("label `{}` is unsupported", label)); process_index(enc, &codepoints); + if enc == ISO_8859_8 { + // ISO_8859_8_I does not have its own index in encoding/indexes.json, + // but it have the same mapping as ISO_8859_8. + // + // Wikipedia (https://en.wikipedia.org/wiki/ISO-8859-8-I): + // The WHATWG Encoding Standard used by HTML5 treats ISO-8859-8 and ISO-8859-8-I + // as distinct encodings with the same mapping due to influence on the layout direction + process_index(ISO_8859_8_I, &codepoints); + } } // https://encoding.spec.whatwg.org/#x-user-defined-decoder make_xml(X_USER_DEFINED, '\u{F780}'..='\u{F7FF}'); diff --git a/tests/documents/encoding/ISO-8859-8-I.xml b/tests/documents/encoding/ISO-8859-8-I.xml index af5aadbb..9ee16eef 100644 --- a/tests/documents/encoding/ISO-8859-8-I.xml +++ b/tests/documents/encoding/ISO-8859-8-I.xml @@ -1,16 +1,17 @@ - - + + - - -  -  !"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ - - <[[CDATA[[ -  !"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~]]> + + + + !"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ + + ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~]]> \ No newline at end of file From 78475dab176b39952789c759106c1fb4da6aaa98 Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 28 Sep 2023 20:16:15 +0500 Subject: [PATCH 2/6] Move regression tests for #94 and #299 to issues.rs --- tests/issues.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ tests/test.rs | 50 ------------------------------------------------ 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/tests/issues.rs b/tests/issues.rs index 90efc732..e6d52758 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -9,6 +9,22 @@ use quick_xml::name::QName; use quick_xml::reader::Reader; use quick_xml::Error; +/// Regression test for https://github.com/tafia/quick-xml/issues/94 +#[test] +fn issue94() { + let data = br#" + +"#; + let mut reader = Reader::from_reader(&data[..]); + reader.trim_text(true); + loop { + match reader.read_event() { + Ok(Event::Eof) | Err(..) => break, + _ => (), + } + } +} + /// Regression test for https://github.com/tafia/quick-xml/issues/115 #[test] fn issue115() { @@ -22,6 +38,41 @@ fn issue115() { } } +/// Regression test for https://github.com/tafia/quick-xml/issues/299 +#[test] +fn issue299() -> Result<(), Error> { + let xml = r#" + + + + + + +"#; + let mut reader = Reader::from_str(xml); + loop { + match reader.read_event()? { + Event::Start(e) | Event::Empty(e) => { + let attr_count = match e.name().as_ref() { + b"MICEX_DOC" => 1, + b"SECURITY" => 4, + b"RECORDS" => 26, + _ => unreachable!(), + }; + assert_eq!( + attr_count, + e.attributes().filter(Result::is_ok).count(), + "mismatch att count on '{:?}'", + reader.decoder().decode(e.name().as_ref()) + ); + } + Event::Eof => break, + _ => (), + } + } + Ok(()) +} + /// Regression test for https://github.com/tafia/quick-xml/issues/360 #[test] fn issue360() { diff --git a/tests/test.rs b/tests/test.rs index b068079f..3104e6dd 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -2,7 +2,6 @@ use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; use quick_xml::name::QName; use quick_xml::reader::Reader; -use quick_xml::Error; use std::borrow::Cow; use pretty_assertions::assert_eq; @@ -89,21 +88,6 @@ fn test_comment_starting_with_gt() { } } -#[test] -fn test_issue94() { - let data = br#" - -"#; - let mut reader = Reader::from_reader(&data[..]); - reader.trim_text(true); - loop { - match reader.read_event() { - Ok(Eof) | Err(..) => break, - _ => (), - } - } -} - #[test] fn test_no_trim() { let mut reader = Reader::from_str(" text "); @@ -151,37 +135,3 @@ fn test_clone_reader() { assert!(matches!(cloned.read_event().unwrap(), Text(_))); assert!(matches!(cloned.read_event().unwrap(), End(_))); } - -#[test] -fn test_issue299() -> Result<(), Error> { - let xml = r#" - - - - - - -"#; - let mut reader = Reader::from_str(xml); - loop { - match reader.read_event()? { - Start(e) | Empty(e) => { - let attr_count = match e.name().as_ref() { - b"MICEX_DOC" => 1, - b"SECURITY" => 4, - b"RECORDS" => 26, - _ => unreachable!(), - }; - assert_eq!( - attr_count, - e.attributes().filter(Result::is_ok).count(), - "mismatch att count on '{:?}'", - reader.decoder().decode(e.name().as_ref()) - ); - } - Eof => break, - _ => (), - } - } - Ok(()) -} From 5aa1004a2ed6013dad1e5df806019adf834f1070 Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 28 Sep 2023 20:33:20 +0500 Subject: [PATCH 3/6] Remove duplicated test added in 259ab4b27ebfd0948680f9ba72909e7dc3d60b64 --- tests/unit_tests.rs | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index e0438c9b..a5e8c2db 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -435,27 +435,6 @@ fn test_offset_err_comment() { } } -#[test] -fn test_offset_err_comment_2_buf() { - let mut r = Reader::from_str(" tag found - Err(e) => assert_eq!( - r.buffer_position(), - 4, - "expecting buf_pos = 4, found {}, err {:?}", - r.buffer_position(), - e - ), - e => panic!("expecting error, found {:?}", e), - } -} - #[test] fn test_offset_err_comment_trim_text() { let mut r = Reader::from_str("\r\n