Skip to content

Commit a342eb4

Browse files
Reimplement ser::escape_str() and escape control characters
This new implementation does escape Unicode C0, DEL and C1 control characters. It also use its own logic and does not rely on ser::escape_bytes(). Escaping C0 control characters is mandated by ECMA-404. Escaping DEL and C1 control characters is a useful convenience often done by other JSON implementations.
1 parent f523b41 commit a342eb4

File tree

1 file changed

+27
-50
lines changed

1 file changed

+27
-50
lines changed

json/src/ser.rs

Lines changed: 27 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -486,78 +486,55 @@ impl<'a> Formatter for PrettyFormatter<'a> {
486486
}
487487
}
488488

489-
/// Serializes and escapes a `&[u8]` into a JSON string.
489+
490+
/// Serializes and escapes a `&str` into a JSON string.
490491
#[inline]
491-
pub fn escape_bytes<W>(wr: &mut W, bytes: &[u8]) -> Result<()>
492+
pub fn escape_str<W>(wr: &mut W, value: &str) -> Result<()>
492493
where W: io::Write
493494
{
494-
try!(wr.write_all(b"\""));
495-
496495
let mut start = 0;
497-
let mut last_byte = 0u8;
498-
499-
for (i, byte) in bytes.iter().enumerate() {
500-
let last_byte_was_c2 = last_byte == b'\xC2';
501-
last_byte = *byte;
502-
503-
let escaped = match *byte {
504-
b'"' => b"\\\"",
505-
b'\\' => b"\\\\",
506-
b'\x08' => b"\\b",
507-
b'\x0c' => b"\\f",
508-
b'\n' => b"\\n",
509-
b'\r' => b"\\r",
510-
b'\t' => b"\\t",
511-
b'\x00' ... b'\x1F' | b'\x7F' => {
512-
if start < i {
513-
try!(wr.write_all(&bytes[start..i]));
514-
}
515-
516-
try!(write!(wr,"\\u{:04X}", *byte));
517-
518-
start = i + 1;
519-
520-
continue;
521-
},
522-
b'\x80' ... b'\x9F' if last_byte_was_c2 => {
523-
if start < (i - 1) {
524-
try!(wr.write_all(&bytes[start..(i - 1)]));
525-
}
526-
527-
try!(write!(wr,"\\u{:04X}", *byte));
528496

529-
start = i + 1;
497+
try!(wr.write_all(b"\""));
530498

499+
for (i, char) in value.char_indices() {
500+
let escaped = match char {
501+
'"' => b"\\\"",
502+
'\\' => b"\\\\",
503+
'\u{08}' => b"\\b",
504+
'\u{0c}' => b"\\f",
505+
'\n' => b"\\n",
506+
'\r' => b"\\r",
507+
'\t' => b"\\t",
508+
'\u{00}' ... '\u{1F}' | '\u{7F}' | '\u{80}' ... '\u{9F}' => {
509+
// only Unicode C0 control characters ('\u{00}' ... '\u{1F}') are mandated to be escaped by ECMA-404.
510+
// DEL ('\u{7F}') and C1 ('\u{80}' ... '\u{9F}') control characters are also escaped for convenience.
511+
512+
debug_assert_eq!(char.len_utf16(), 1); // C0, DEL and C1 control characters fit on one utf16 code unit by specification.
513+
try!(write!(wr, "{}\\u{:04X}", &value[start..i], char as u32));
514+
515+
start = i + char.len_utf8();
531516
continue;
532517
},
533-
_ => { continue; }
518+
_ => { continue; }
534519
};
535520

536521
if start < i {
537-
try!(wr.write_all(&bytes[start..i]));
522+
try!(wr.write_all(&value[start..i].as_bytes()));
538523
}
539-
540524
try!(wr.write_all(escaped));
541525

526+
debug_assert_eq!(char.len_utf8(), 1);
542527
start = i + 1;
543528
}
544529

545-
if start != bytes.len() {
546-
try!(wr.write_all(&bytes[start..]));
530+
if start != value.len() {
531+
try!(wr.write_all(&value[start..].as_bytes()));
547532
}
548533

549534
try!(wr.write_all(b"\""));
550535
Ok(())
551536
}
552537

553-
/// Serializes and escapes a `&str` into a JSON string.
554-
#[inline]
555-
pub fn escape_str<W>(wr: &mut W, value: &str) -> Result<()>
556-
where W: io::Write
557-
{
558-
escape_bytes(wr, value.as_bytes())
559-
}
560-
561538
#[inline]
562539
fn escape_char<W>(wr: &mut W, value: char) -> Result<()>
563540
where W: io::Write
@@ -566,7 +543,7 @@ fn escape_char<W>(wr: &mut W, value: char) -> Result<()>
566543
// rust, which doesn't support encoding a `char` into a stack buffer.
567544
let mut s = String::new();
568545
s.push(value);
569-
escape_bytes(wr, s.as_bytes())
546+
escape_str(wr, &s)
570547
}
571548

572549
fn fmt_f32_or_null<W>(wr: &mut W, value: f32) -> Result<()>

0 commit comments

Comments
 (0)