diff --git a/library/core/benches/str.rs b/library/core/benches/str.rs index 7d36eff3d6c8b..0f14809444bc5 100644 --- a/library/core/benches/str.rs +++ b/library/core/benches/str.rs @@ -3,6 +3,7 @@ use test::{black_box, Bencher}; mod char_count; mod corpora; +mod debug; mod iter; #[bench] diff --git a/library/core/benches/str/debug.rs b/library/core/benches/str/debug.rs new file mode 100644 index 0000000000000..cb91169eed8eb --- /dev/null +++ b/library/core/benches/str/debug.rs @@ -0,0 +1,79 @@ +//! This primarily benchmarks `impl Debug for str`, +//! and it also explicitly tests that we minimizes calls to the underlying `Write`r. +//! While that is an implementation detail and there are no guarantees about it, +//! we should still try to minimize those calls over time rather than regress them. + +use std::fmt::{self, Write}; +use test::{black_box, Bencher}; + +#[derive(Default)] +struct CountingWriter { + buf: String, + write_calls: usize, +} + +impl Write for CountingWriter { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.buf.push_str(s); + self.write_calls += 1; + Ok(()) + } +} + +fn assert_fmt(s: &str, expected: &str, expected_write_calls: usize) { + let mut w = CountingWriter::default(); + + write!(&mut w, "{s:?}").unwrap(); + assert_eq!(s.len(), 64); + assert_eq!(w.buf, expected); + assert_eq!(w.write_calls, expected_write_calls); +} + +#[bench] +fn ascii_only(b: &mut Bencher) { + let s = "just a bit of ascii text that has no escapes. 64 bytes exactly!!"; + assert_fmt(s, r#""just a bit of ascii text that has no escapes. 64 bytes exactly!!""#, 3); + b.iter(|| { + black_box(format!("{:?}", black_box(s))); + }); +} + +#[bench] +fn ascii_escapes(b: &mut Bencher) { + let s = "some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte"; + assert_fmt( + s, + r#""some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte""#, + 15, + ); + b.iter(|| { + black_box(format!("{:?}", black_box(s))); + }); +} + +#[bench] +fn some_unicode(b: &mut Bencher) { + let s = "egy kis szöveg néhány unicode betűvel. legyen ez is 64 byte."; + assert_fmt(s, r#""egy kis szöveg néhány unicode betűvel. legyen ez is 64 byte.""#, 3); + b.iter(|| { + black_box(format!("{:?}", black_box(s))); + }); +} + +#[bench] +fn mostly_unicode(b: &mut Bencher) { + let s = "предложение из кириллических букв."; + assert_fmt(s, r#""предложение из кириллических букв.""#, 3); + b.iter(|| { + black_box(format!("{:?}", black_box(s))); + }); +} + +#[bench] +fn mixed(b: &mut Bencher) { + let s = "\"❤️\"\n\"hűha ez betű\"\n\"кириллических букв\"."; + assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 21); + b.iter(|| { + black_box(format!("{:?}", black_box(s))); + }); +} diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index ce0643a3f5ef5..1d75b9f44f38f 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -2397,9 +2397,7 @@ impl Debug for str { // If char needs escaping, flush backlog so far and write, else skip if esc.len() != 1 { f.write_str(&self[from..i])?; - for c in esc { - f.write_char(c)?; - } + Display::fmt(&esc, f)?; from = i + c.len_utf8(); } } @@ -2419,13 +2417,12 @@ impl Display for str { impl Debug for char { fn fmt(&self, f: &mut Formatter<'_>) -> Result { f.write_char('\'')?; - for c in self.escape_debug_ext(EscapeDebugExtArgs { + let esc = self.escape_debug_ext(EscapeDebugExtArgs { escape_grapheme_extended: true, escape_single_quote: true, escape_double_quote: false, - }) { - f.write_char(c)? - } + }); + Display::fmt(&esc, f)?; f.write_char('\'') } }