Skip to content

Commit

Permalink
Remove EscapeAll mode
Browse files Browse the repository at this point in the history
EscapeAll tries to escape all non-ASCII characters. Unfortunately, HTML5
numeric entities can't represent most codepoints between U+0080 and
U+009F. The only way to handle those is to use XML entity rules, but
this is an HTML5 entity library.

Also change ' to &rust-lang#39;. It turns out ' isn't part of HTML
until HTML5, so using &rust-lang#39; is more compatible with pre-HTML5 parsers.
  • Loading branch information
lilyball committed May 19, 2014
1 parent 63d3b2f commit a4575eb
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 63 deletions.
79 changes: 19 additions & 60 deletions src/libhtml/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//! This module contains `Writer`s for escaping/unescaping HTML.

use std::io::{Writer, IoResult};
use std::{char, str};
use std::char;
use entity::ENTITIES;

/// A `Writer` adaptor that escapes any HTML characters written to it.
Expand All @@ -32,9 +32,7 @@ pub enum EscapeMode {
/// Escapes characters for double-quoted attribute values. Escapes `&"`.
EscapeAttr,
/// Escapes characters for single-quoted attribute values. Escapes `&'`.
EscapeSingleQuoteAttr,
/// Escapes all non-printable or non-ASCII characters, with the exception of U+0000.
EscapeAll
EscapeSingleQuoteAttr
}

impl<W: Writer> EscapeWriter<W> {
Expand Down Expand Up @@ -64,63 +62,24 @@ impl<W: Writer> EscapeWriter<W> {

impl<W: Writer> Writer for EscapeWriter<W> {
fn write(&mut self, bytes: &[u8]) -> IoResult<()> {
if self.mode == EscapeAll {
// This mode needs to operate on chars. Everything else is handled below.
let s = str::from_utf8_lossy(bytes);
let s = s.as_slice();
let mut last = 0u;
for (i, c) in s.char_indices() {
match c {
'&'|'<'|'>'|'"'|'\'' => (),
'\0' | '\x20'..'\x7E' => continue,
_ => ()
}
if last < i {
try!(self.inner.write_str(s.slice(last, i)));
}
match c {
'&'|'<'|'>'|'"'|'\'' => {
let ent = match c {
'&' => "&amp;",
'<' => "&lt;",
'>' => "&gt;",
'"' => "&quot;",
'\'' => "&apos;",
_ => unreachable!()
};
try!(self.inner.write_str(ent));
}
_ => {
let c = c as u32;
try!(write!(&mut self.inner as &mut ::std::io::Writer, r"&\#x{:x};", c));
}
}
last = i + char::len_utf8_bytes(c);
}
if last < s.as_slice().len() {
try!(self.inner.write_str(s.slice_from(last)));
}
} else {
// We only want to escape ASCII values, so we can safely operate on bytes
let mut last = 0;
for (i, b) in bytes.iter().enumerate() {
let ent = match (self.mode, *b as char) {
(_,'&') => "&amp;",
(EscapeDefault,'<') |(EscapeText,'<') => "&lt;",
(EscapeDefault,'>') |(EscapeText,'>') => "&gt;",
(EscapeDefault,'\'')|(EscapeSingleQuoteAttr,'\'') => "&apos;",
(EscapeDefault,'"') |(EscapeAttr,'"') => "&quot;",
_ => continue
};
if last < i {
try!(self.inner.write(bytes.slice(last, i)));
}
try!(self.inner.write_str(ent));
last = i + 1;
}
if last < bytes.len() {
try!(self.inner.write(bytes.slice_from(last)));
let mut last = 0;
for (i, b) in bytes.iter().enumerate() {
let ent = match (self.mode, *b as char) {
(_,'&') => "&amp;",
(EscapeDefault,'<') |(EscapeText,'<') => "&lt;",
(EscapeDefault,'>') |(EscapeText,'>') => "&gt;",
(EscapeDefault,'\'')|(EscapeSingleQuoteAttr,'\'') => "&#39;",
(EscapeDefault,'"') |(EscapeAttr,'"') => "&quot;",
_ => continue
};
if last < i {
try!(self.inner.write(bytes.slice(last, i)));
}
try!(self.inner.write_str(ent));
last = i + 1;
}
if last < bytes.len() {
try!(self.inner.write(bytes.slice_from(last)));
}
Ok(())
}
Expand Down
6 changes: 3 additions & 3 deletions src/libhtml/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl fmt::Show for UnTest {
fn test_escape() {
let s = r#"<script src="evil.domain?foo&" type='baz'>"#;
assert_eq!(escape(s).as_slice(), "&lt;script src=&quot;evil.domain?foo&amp;&quot; \
type=&apos;baz&apos;&gt;");
type=&#39;baz&#39;&gt;");

let t = Test("foo".to_strbuf());
assert_eq!(escape(t), "&lt;Test&gt;foo&lt;/Test&gt;".to_owned());
Expand Down Expand Up @@ -71,9 +71,9 @@ mod python {

#[test]
fn test_escape() {
// python converts ' to &#x27; but we go to &apos;
// python converts ' to &#x27; but we go to &#39;
assert_eq!(escape(r#"'<script>"&foo;"</script>'"#).as_slice(),
"&apos;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&apos;");
"&#39;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&#39;");
let mut w = EscapeWriter::new(MemWriter::new(), EscapeText);
assert!(w.write_str(r#"'<script>"&foo;"</script>'"#).is_ok());
assert_eq!(w.unwrap().unwrap().as_slice(),
Expand Down

0 comments on commit a4575eb

Please sign in to comment.