Skip to content

Commit 516a1e7

Browse files
Reimplement ser::escape_str() and escape control characters
This new implementation does escape Unicode C0, DEL and C1 control characters. It also use its own logic and does not rely on ser::escape_bytes(). Escaping C0 control characters is mandated by ECMA-404. Escaping DEL and C1 control characters is a useful convenience often done by other JSON implementations.
1 parent f523b41 commit 516a1e7

File tree

1 file changed

+41
-1
lines changed

1 file changed

+41
-1
lines changed

json/src/ser.rs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,47 @@ pub fn escape_bytes<W>(wr: &mut W, bytes: &[u8]) -> Result<()>
555555
pub fn escape_str<W>(wr: &mut W, value: &str) -> Result<()>
556556
where W: io::Write
557557
{
558-
escape_bytes(wr, value.as_bytes())
558+
let mut start = 0;
559+
560+
try!(wr.write_all(b"\""));
561+
562+
for (i, char) in value.char_indices() {
563+
let escaped = match char {
564+
'"' => b"\\\"",
565+
'\\' => b"\\\\",
566+
'\u{08}' => b"\\b",
567+
'\u{0c}' => b"\\f",
568+
'\n' => b"\\n",
569+
'\r' => b"\\r",
570+
'\t' => b"\\t",
571+
'\u{00}' ... '\u{1F}' | '\u{7F}' | '\u{80}' ... '\u{9F}' => {
572+
// only Unicode C0 control characters ('\u{00}' ... '\u{1F}') are manated to be escaped by ECMA-404.
573+
// DEL ('\u{7F}') and C1 ('\u{80}' ... '\u{9F}') control characters are also escaped for convenience.
574+
575+
debug_assert_eq!(char.len_utf16(), 1); // C0, DEL and C1 control characters fit on one utf16 code unit by specification.
576+
try!(write!(wr, "{}\\u{:04X}", &value[start..i], char as u32));
577+
578+
start = i + char.len_utf8();
579+
continue;
580+
},
581+
_ => { continue; }
582+
};
583+
584+
if start < i {
585+
try!(wr.write_all(&value[start..i].as_bytes()));
586+
}
587+
try!(wr.write_all(escaped));
588+
589+
debug_assert_eq!(char.len_utf8(), 1);
590+
start = i + 1;
591+
}
592+
593+
if start != value.len() {
594+
try!(wr.write_all(&value[start..].as_bytes()));
595+
}
596+
597+
try!(wr.write_all(b"\""));
598+
Ok(())
559599
}
560600

561601
#[inline]

0 commit comments

Comments
 (0)