@@ -48,7 +48,10 @@ use std::str::from_utf8;
4848
4949use crate :: encoding:: { Decoder , EncodingError } ;
5050use crate :: errors:: { Error , IllFormedError } ;
51- use crate :: escape:: { escape, minimal_escape, parse_number, partial_escape, EscapeError } ;
51+ use crate :: escape:: {
52+ escape, minimal_escape, normalize_html_eols, normalize_xml_eols, parse_number, partial_escape,
53+ EscapeError ,
54+ } ;
5255use crate :: name:: { LocalName , QName } ;
5356use crate :: utils:: { name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes } ;
5457use attributes:: { AttrError , Attribute , Attributes } ;
@@ -583,6 +586,46 @@ impl<'a> BytesText<'a> {
583586 self . decoder . decode_cow ( & self . content )
584587 }
585588
589+ /// Decodes the content of the XML event.
590+ ///
591+ /// When this event produced by the reader, it uses the encoding information
592+ /// associated with that reader to interpret the raw bytes contained within
593+ /// this text event.
594+ ///
595+ /// This will allocate if the value contains any escape sequences or in non-UTF-8
596+ /// encoding, or EOL normalization is required.
597+ ///
598+ /// Note, that this method should be used only if event represents XML content,
599+ /// because rules for normalizing EOLs for [XML] and [HTML] differs.
600+ ///
601+ /// To get HTML content use [`html_content()`](Self::html_content).
602+ ///
603+ /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
604+ /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
605+ pub fn xml_content ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
606+ self . decoder . content ( & self . content , normalize_xml_eols)
607+ }
608+
609+ /// Decodes the content of the HTML event.
610+ ///
611+ /// When this event produced by the reader, it uses the encoding information
612+ /// associated with that reader to interpret the raw bytes contained within
613+ /// this text event.
614+ ///
615+ /// This will allocate if the value contains any escape sequences or in non-UTF-8
616+ /// encoding, or EOL normalization is required.
617+ ///
618+ /// Note, that this method should be used only if event represents HTML content,
619+ /// because rules for normalizing EOLs for [XML] and [HTML] differs.
620+ ///
621+ /// To get XML content use [`xml_content()`](Self::xml_content).
622+ ///
623+ /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
624+ /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
625+ pub fn html_content ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
626+ self . decoder . content ( & self . content , normalize_html_eols)
627+ }
628+
586629 /// Removes leading XML whitespace bytes from text content.
587630 ///
588631 /// Returns `true` if content is empty after that
@@ -828,7 +871,49 @@ impl<'a> BytesCData<'a> {
828871 /// associated with that reader to interpret the raw bytes contained within this
829872 /// CDATA event.
830873 pub fn decode ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
831- Ok ( self . decoder . decode_cow ( & self . content ) ?)
874+ self . decoder . decode_cow ( & self . content )
875+ }
876+
877+ /// Decodes the raw input byte content of the CDATA section of the XML event
878+ /// into a string.
879+ ///
880+ /// When this event produced by the reader, it uses the encoding information
881+ /// associated with that reader to interpret the raw bytes contained within
882+ /// this CDATA event.
883+ ///
884+ /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
885+ /// is required.
886+ ///
887+ /// Note, that this method should be used only if event represents XML content,
888+ /// because rules for normalizing EOLs for [XML] and [HTML] differs.
889+ ///
890+ /// To get HTML content use [`html_content()`](Self::html_content).
891+ ///
892+ /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
893+ /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
894+ pub fn xml_content ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
895+ self . decoder . content ( & self . content , normalize_xml_eols)
896+ }
897+
898+ /// Decodes the raw input byte content of the CDATA section of the HTML event
899+ /// into a string.
900+ ///
901+ /// When this event produced by the reader, it uses the encoding information
902+ /// associated with that reader to interpret the raw bytes contained within
903+ /// this CDATA event.
904+ ///
905+ /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
906+ /// is required.
907+ ///
908+ /// Note, that this method should be used only if event represents HTML content,
909+ /// because rules for normalizing EOLs for [XML] and [HTML] differs.
910+ ///
911+ /// To get XML content use [`xml_content()`](Self::xml_content).
912+ ///
913+ /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
914+ /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
915+ pub fn html_content ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
916+ self . decoder . content ( & self . content , normalize_html_eols)
832917 }
833918}
834919
@@ -1443,6 +1528,46 @@ impl<'a> BytesRef<'a> {
14431528 self . decoder . decode_cow ( & self . content )
14441529 }
14451530
1531+ /// Decodes the content of the XML event.
1532+ ///
1533+ /// When this event produced by the reader, it uses the encoding information
1534+ /// associated with that reader to interpret the raw bytes contained within
1535+ /// this general reference event.
1536+ ///
1537+ /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1538+ /// is required.
1539+ ///
1540+ /// Note, that this method should be used only if event represents XML content,
1541+ /// because rules for normalizing EOLs for [XML] and [HTML] differs.
1542+ ///
1543+ /// To get HTML content use [`html_content()`](Self::html_content).
1544+ ///
1545+ /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
1546+ /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1547+ pub fn xml_content ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
1548+ self . decoder . content ( & self . content , normalize_xml_eols)
1549+ }
1550+
1551+ /// Decodes the content of the HTML event.
1552+ ///
1553+ /// When this event produced by the reader, it uses the encoding information
1554+ /// associated with that reader to interpret the raw bytes contained within
1555+ /// this general reference event.
1556+ ///
1557+ /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1558+ /// is required.
1559+ ///
1560+ /// Note, that this method should be used only if event represents HTML content,
1561+ /// because rules for normalizing EOLs for [XML] and [HTML] differs.
1562+ ///
1563+ /// To get XML content use [`xml_content()`](Self::xml_content).
1564+ ///
1565+ /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
1566+ /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1567+ pub fn html_content ( & self ) -> Result < Cow < ' a , str > , EncodingError > {
1568+ self . decoder . content ( & self . content , normalize_html_eols)
1569+ }
1570+
14461571 /// Returns `true` if the specified reference represents the character reference
14471572 /// (`&#<number>;`).
14481573 ///
0 commit comments