ruffle-rs · Herschel · Mar 31, 2023 · Mar 27, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/core/Cargo.toml b/core/Cargo.toml
@@ -26,7 +26,7 @@ bitflags = "2.0.2"
 smallvec = { version = "1.10.0", features = ["union"] }
 num-traits = "0.2"
 num-derive = "0.3"
-quick-xml = { git = "https://github.com/ruffle-rs/quick-xml", rev = "8496365ec1412eb5ba5de350937b6bce352fa0ba" }
+quick-xml = "0.28.1"
 downcast-rs = "1.2.0"
 url = "2.3.1"
 weak-table = "0.3.2"
@@ -59,7 +59,7 @@ version = "0.4.34"
 [features]
 default = []
 lzma = ["lzma-rs", "swf/lzma"]
-wasm-bindgen = [ "instant/wasm-bindgen" ]
+wasm-bindgen = ["instant/wasm-bindgen"]
 avm_debug = []
 deterministic = []
 timeline_debug = []

diff --git a/core/src/avm1/object/xml_object.rs b/core/src/avm1/object/xml_object.rs
@@ -5,8 +5,9 @@ use crate::avm1::error::Error;
 use crate::avm1::{Object, ScriptObject, TObject};
 use crate::impl_custom_object;
 use crate::string::{AvmString, WStr, WString};
-use crate::xml::{XmlNode, ELEMENT_NODE, TEXT_NODE};
+use crate::xml::{custom_unescape, XmlNode, ELEMENT_NODE, TEXT_NODE};
 use gc_arena::{Collect, GcCell, MutationContext};
+use quick_xml::events::attributes::AttrError;
 use quick_xml::{events::Event, Reader};
 use std::fmt;
 
@@ -123,21 +124,23 @@ impl<'gc> XmlObject<'gc> {
     ) -> Result<(), quick_xml::Error> {
         let data_utf8 = data.to_utf8_lossy();
         let mut parser = Reader::from_str(&data_utf8);
-        let mut buf = Vec::new();
         let mut open_tags = vec![self.as_node()];
 
         self.0.write(activation.context.gc_context).status = XmlStatus::NoError;
 
         loop {
-            let event = parser.read_event(&mut buf).map_err(|error| {
+            let event = parser.read_event().map_err(|error| {
                 self.0.write(activation.context.gc_context).status = match error {
                     quick_xml::Error::UnexpectedEof(_)
-                    | quick_xml::Error::NameWithQuote(_)
-                    | quick_xml::Error::NoEqAfterName(_)
-                    | quick_xml::Error::DuplicatedAttribute(_, _) => XmlStatus::ElementMalformed,
+                    | quick_xml::Error::InvalidAttr(AttrError::ExpectedEq(_))
+                    | quick_xml::Error::InvalidAttr(AttrError::Duplicated(_, _)) => {
+                        XmlStatus::ElementMalformed
+                    }
                     quick_xml::Error::EndEventMismatch { .. } => XmlStatus::MismatchedEnd,
                     quick_xml::Error::XmlDeclWithoutVersion(_) => XmlStatus::DeclNotTerminated,
-                    quick_xml::Error::UnquotedValue(_) => XmlStatus::AttributeNotTerminated,
+                    quick_xml::Error::InvalidAttr(AttrError::UnquotedValue(_)) => {
+                        XmlStatus::AttributeNotTerminated
+                    }
                     _ => XmlStatus::OutOfMemory,
                     // Not accounted for:
                     // quick_xml::Error::UnexpectedToken(_)
@@ -150,15 +153,17 @@ impl<'gc> XmlObject<'gc> {
 
             match event {
                 Event::Start(bs) => {
-                    let child = XmlNode::from_start_event(activation, bs, self.id_map())?;
+                    let child =
+                        XmlNode::from_start_event(activation, bs, self.id_map(), parser.decoder())?;
                     open_tags
                         .last_mut()
                         .unwrap()
                         .append_child(activation.context.gc_context, child);
                     open_tags.push(child);
                 }
                 Event::Empty(bs) => {
-                    let child = XmlNode::from_start_event(activation, bs, self.id_map())?;
+                    let child =
+                        XmlNode::from_start_event(activation, bs, self.id_map(), parser.decoder())?;
                     open_tags
                         .last_mut()
                         .unwrap()
@@ -167,19 +172,17 @@ impl<'gc> XmlObject<'gc> {
                 Event::End(_) => {
                     open_tags.pop();
                 }
-                Event::Text(bt) | Event::CData(bt) => {
-                    let text = bt.unescaped()?;
-                    let is_whitespace_char = |c: &u8| matches!(*c, b'\t' | b'\n' | b'\r' | b' ');
-                    let is_whitespace_text = text.iter().all(is_whitespace_char);
-                    if !(text.is_empty() || ignore_white && is_whitespace_text) {
-                        let text = AvmString::new_utf8_bytes(activation.context.gc_context, &text);
-                        let child =
-                            XmlNode::new(activation.context.gc_context, TEXT_NODE, Some(text));
-                        open_tags
-                            .last_mut()
-                            .unwrap()
-                            .append_child(activation.context.gc_context, child);
-                    }
+                Event::Text(bt) => {
+                    handle_text_cdata(
+                        custom_unescape(&bt.into_inner(), parser.decoder())?.as_bytes(),
+                        ignore_white,
+                        &mut open_tags,
+                        activation,
+                    );
+                }
+                Event::CData(bt) => {
+                    // This is already unescaped
+                    handle_text_cdata(&bt.into_inner(), ignore_white, &mut open_tags, activation);
                 }
                 Event::Decl(bd) => {
                     let mut xml_decl = WString::from_buf(b"<?".to_vec());
@@ -193,8 +196,8 @@ impl<'gc> XmlObject<'gc> {
                     // but it doesn't expose the whole tag, only the inner portion of it.
                     // Flash is also case-insensitive for DOCTYPE declarations. However,
                     // the `.docTypeDecl` property preserves the original case.
-                    let mut doctype = WString::from_buf(b"<!DOCTYPE".to_vec());
-                    doctype.push_str(WStr::from_units(bt.escaped()));
+                    let mut doctype = WString::from_buf(b"<!DOCTYPE ".to_vec());
+                    doctype.push_str(WStr::from_units(&*bt.escape_ascii().collect::<Vec<_>>()));
                     doctype.push_byte(b'>');
                     self.0.write(activation.context.gc_context).doctype =
                         Some(AvmString::new(activation.context.gc_context, doctype));
@@ -246,3 +249,21 @@ impl<'gc> TObject<'gc> for XmlObject<'gc> {
         Some(self.as_node())
     }
 }
+
+fn handle_text_cdata<'gc>(
+    text: &[u8],
+    ignore_white: bool,
+    open_tags: &mut [XmlNode<'gc>],
+    activation: &mut Activation<'_, 'gc>,
+) {
+    let is_whitespace_char = |c: &u8| matches!(*c, b'\t' | b'\n' | b'\r' | b' ');
+    let is_whitespace_text = text.iter().all(is_whitespace_char);
+    if !(text.is_empty() || ignore_white && is_whitespace_text) {
+        let text = AvmString::new_utf8_bytes(activation.context.gc_context, text);
+        let child = XmlNode::new(activation.context.gc_context, TEXT_NODE, Some(text));
+        open_tags
+            .last_mut()
+            .unwrap()
+            .append_child(activation.context.gc_context, child);
+    }
+}
diff --git a/core/src/avm2/e4x.rs b/core/src/avm2/e4x.rs
@@ -234,7 +234,6 @@ impl<'gc> E4XNode<'gc> {
 
         let data_utf8 = string.to_utf8_lossy();
         let mut parser = Reader::from_str(&data_utf8);
-        let mut buf = Vec::new();
         let mut open_tags: Vec<E4XNode<'gc>> = vec![];
 
         // FIXME - look these up from static property and settings
@@ -264,8 +263,38 @@ impl<'gc> E4XNode<'gc> {
             Ok(())
         }
 
+        fn handle_text_cdata<'gc>(
+            text: &[u8],
+            ignore_white: bool,
+            open_tags: &mut [E4XNode<'gc>],
+            top_level: &mut Vec<E4XNode<'gc>>,
+            depth: usize,
+            is_text: bool,
+            activation: &mut Activation<'_, 'gc>,
+        ) -> Result<(), Error<'gc>> {
+            let is_whitespace_char = |c: &u8| matches!(*c, b'\t' | b'\n' | b'\r' | b' ');
+            let is_whitespace_text = text.iter().all(is_whitespace_char);
+            if !(text.is_empty() || ignore_white && is_whitespace_text) {
+                let text = AvmString::new_utf8_bytes(activation.context.gc_context, text);
+                let node = E4XNode(GcCell::allocate(
+                    activation.context.gc_context,
+                    E4XNodeData {
+                        parent: None,
+                        local_name: None,
+                        kind: if is_text {
+                            E4XNodeKind::Text(text)
+                        } else {
+                            E4XNodeKind::CData(text)
+                        },
+                    },
+                ));
+                push_childless_node(node, open_tags, top_level, depth, activation)?;
+            }
+            Ok(())
+        }
+
         loop {
-            let event = parser.read_event(&mut buf).map_err(|error| {
+            let event = parser.read_event().map_err(|error| {
                 Error::RustError(format!("XML parsing error: {error:?}").into())
             })?;
 
@@ -290,41 +319,38 @@ impl<'gc> E4XNode<'gc> {
                         top_level.push(node);
                     }
                 }
-                Event::Text(bt) | Event::CData(bt) => {
-                    let text = bt.unescaped()?;
-                    let is_whitespace_char = |c: &u8| matches!(*c, b'\t' | b'\n' | b'\r' | b' ');
-                    let is_whitespace_text = text.iter().all(is_whitespace_char);
-                    if !(text.is_empty() || ignore_white && is_whitespace_text) {
-                        let text = AvmString::new_utf8_bytes(activation.context.gc_context, &text);
-                        let node = E4XNode(GcCell::allocate(
-                            activation.context.gc_context,
-                            E4XNodeData {
-                                parent: None,
-                                local_name: None,
-                                kind: match &event {
-                                    Event::Text(_) => E4XNodeKind::Text(text),
-                                    Event::CData(_) => E4XNodeKind::CData(text),
-                                    _ => unreachable!(),
-                                },
-                            },
-                        ));
-                        push_childless_node(
-                            node,
-                            &mut open_tags,
-                            &mut top_level,
-                            depth,
-                            activation,
-                        )?;
-                    }
+                Event::Text(bt) => {
+                    handle_text_cdata(
+                        bt.unescape()?.as_bytes(),
+                        ignore_white,
+                        &mut open_tags,
+                        &mut top_level,
+                        depth,
+                        true,
+                        activation,
+                    )?;
+                }
+                Event::CData(bt) => {
+                    // This is alreayd unescaped
+                    handle_text_cdata(
+                        bt,
+                        ignore_white,
+                        &mut open_tags,
+                        &mut top_level,
+                        depth,
+                        false,
+                        activation,
+                    )?;
                 }
                 Event::Comment(bt) | Event::PI(bt) => {
                     if (matches!(event, Event::Comment(_)) && ignore_comments)
                         || (matches!(event, Event::PI(_)) && ignore_processing_instructions)
                     {
                         continue;
                     }
-                    let text = bt.unescaped()?;
-                    let text = AvmString::new_utf8_bytes(activation.context.gc_context, &text);
+                    let text = bt.unescape()?;
+                    let text =
+                        AvmString::new_utf8_bytes(activation.context.gc_context, text.as_bytes());
                     let kind = match event {
                         Event::Comment(_) => E4XNodeKind::Comment(text),
                         Event::PI(_) => E4XNodeKind::ProcessingInstruction(text),
@@ -358,15 +384,20 @@ impl<'gc> E4XNode<'gc> {
         bs: &BytesStart<'_>,
     ) -> Result<Self, quick_xml::Error> {
         // FIXME - handle namespace
-        let name = AvmString::new_utf8_bytes(activation.context.gc_context, bs.local_name());
+        let name =
+            AvmString::new_utf8_bytes(activation.context.gc_context, bs.local_name().into_inner());
 
         let mut attribute_nodes = Vec::new();
 
         let attributes: Result<Vec<_>, _> = bs.attributes().collect();
         for attribute in attributes? {
-            let key = AvmString::new_utf8_bytes(activation.context.gc_context, attribute.key);
-            let value_bytes = attribute.unescaped_value()?;
-            let value = AvmString::new_utf8_bytes(activation.context.gc_context, &value_bytes);
+            let key = AvmString::new_utf8_bytes(
+                activation.context.gc_context,
+                attribute.key.into_inner(),
+            );
+            let value_str = attribute.unescape_value()?;
+            let value =
+                AvmString::new_utf8_bytes(activation.context.gc_context, value_str.as_bytes());
 
             let attribute_data = E4XNodeData {
                 parent: None,

diff --git a/core/src/html/text_format.rs b/core/src/html/text_format.rs
@@ -606,13 +606,11 @@ impl FormatSpans {
         let mut reader = Reader::from_reader(&raw_bytes[..]);
         reader.expand_empty_elements(true);
         reader.check_end_names(false);
-        let mut buf = Vec::new();
         loop {
-            buf.clear();
-            match reader.read_event(&mut buf) {
+            match reader.read_event() {
                 Ok(Event::Start(ref e)) => {
                     opened_starts.push(opened_buffer.len());
-                    opened_buffer.extend(e.name());
+                    opened_buffer.extend(e.name().into_inner());
 
                     let attributes: Result<Vec<_>, _> = e.attributes().with_checks(false).collect();
                     let attributes = match attributes {
@@ -626,12 +624,13 @@ impl FormatSpans {
                         attributes.iter().find_map(|attribute| {
                             attribute
                                 .key
+                                .into_inner()
                                 .eq_ignore_ascii_case(name)
                                 .then(|| decode_to_wstr(&attribute.value))
                         })
                     };
                     let mut format = format_stack.last().unwrap().clone();
-                    match &e.name().to_ascii_lowercase()[..] {
+                    match &e.name().into_inner().to_ascii_lowercase()[..] {
                         b"br" => {
                             if is_multiline {
                                 text.push_byte(b'\n');
@@ -762,7 +761,7 @@ impl FormatSpans {
                     format_stack.push(format);
                 }
                 Ok(Event::Text(e)) if !e.is_empty() => {
-                    let e = decode_to_wstr(e.escaped());
+                    let e = decode_to_wstr(&e.into_inner());
                     let e = process_html_entity(&e).unwrap_or(e);
                     let format = format_stack.last().unwrap().clone();
                     text.push_str(&e);
@@ -772,7 +771,7 @@ impl FormatSpans {
                     // Check for a mismatch.
                     match opened_starts.last() {
                         Some(start) => {
-                            if e.name() != &opened_buffer[*start..] {
+                            if e.name().into_inner() != &opened_buffer[*start..] {
                                 continue;
                             } else {
                                 opened_buffer.truncate(*start);
@@ -782,7 +781,7 @@ impl FormatSpans {
                         None => continue,
                     }
 
-                    match &e.name().to_ascii_lowercase()[..] {
+                    match &e.name().into_inner().to_ascii_lowercase()[..] {
                         b"br" | b"sbr" => {
                             // Skip pop from `format_stack`.
                             continue;
@@ -1422,16 +1421,17 @@ impl<'a> FormatState<'a> {
                 self.close_tags();
             }
             let encoded = text.to_utf8_lossy();
-            let escaped = escape(encoded.as_bytes());
+            let escaped = escape(&encoded);
 
             if let Cow::Borrowed(_) = &encoded {
                 // Optimization: if the utf8 conversion was a no-op, we know the text is ASCII;
                 // escaping special characters cannot insert new non-ASCII characters, so we can
                 // simply append the bytes directly without converting from UTF8.
-                self.result.push_str(WStr::from_units(&*escaped));
+                self.result.push_str(WStr::from_units(escaped.as_bytes()));
             } else {
                 // TODO: updating our quick_xml fork to upstream will allow removing this UTF8 check.
-                let escaped = std::str::from_utf8(&escaped).expect("escaped text should be utf8");
+                let escaped =
+                    std::str::from_utf8(escaped.as_bytes()).expect("escaped text should be utf8");
                 self.result.push_utf8(escaped);
             }
         }

diff --git a/core/src/xml.rs b/core/src/xml.rs
@@ -3,4 +3,4 @@
 mod iterators;
 mod tree;
 
-pub use tree::{XmlNode, ELEMENT_NODE, TEXT_NODE};
+pub use tree::{custom_unescape, XmlNode, ELEMENT_NODE, TEXT_NODE};