From 0842f2c65c766301be488256a18d89e90c3b6304 Mon Sep 17 00:00:00 2001
From: Thalia Archibald <thalia@archibald.dev>
Date: Wed, 5 Feb 2025 19:37:27 -0800
Subject: [PATCH 1/2] Add fast path for displaying pre-validated Wtf8Buf

---
 library/std/src/sys/os_str/wtf8.rs |  4 ++--
 library/std/src/sys_common/wtf8.rs | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs
index 19728d33990ac..8acec6f949fc5 100644
--- a/library/std/src/sys/os_str/wtf8.rs
+++ b/library/std/src/sys/os_str/wtf8.rs
@@ -41,13 +41,13 @@ impl AsInner<Wtf8> for Buf {
 
 impl fmt::Debug for Buf {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::Debug::fmt(self.as_slice(), f)
+        fmt::Debug::fmt(&self.inner, f)
     }
 }
 
 impl fmt::Display for Buf {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::Display::fmt(self.as_slice(), f)
+        fmt::Display::fmt(&self.inner, f)
     }
 }
 
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
index 952c39132b056..01cbb3e24ff8a 100644
--- a/library/std/src/sys_common/wtf8.rs
+++ b/library/std/src/sys_common/wtf8.rs
@@ -169,6 +169,18 @@ impl fmt::Debug for Wtf8Buf {
     }
 }
 
+/// Formats the string with unpaired surrogates substituted with the replacement
+/// character, U+FFFD.
+impl fmt::Display for Wtf8Buf {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if let Some(s) = self.as_known_utf8() {
+            fmt::Display::fmt(s, formatter)
+        } else {
+            fmt::Display::fmt(&**self, formatter)
+        }
+    }
+}
+
 impl Wtf8Buf {
     /// Creates a new, empty WTF-8 string.
     #[inline]
@@ -262,6 +274,18 @@ impl Wtf8Buf {
         unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) }
     }
 
+    /// Converts the string to UTF-8 without validation, if it was created from
+    /// valid UTF-8.
+    #[inline]
+    fn as_known_utf8(&self) -> Option<&str> {
+        if self.is_known_utf8 {
+            // SAFETY: The buffer is known to be valid UTF-8.
+            Some(unsafe { str::from_utf8_unchecked(self.as_bytes()) })
+        } else {
+            None
+        }
+    }
+
     /// Reserves capacity for at least `additional` more bytes to be inserted
     /// in the given `Wtf8Buf`.
     /// The collection may reserve more space to avoid frequent reallocations.

From eb14652770451022d424a1c301a4416514464932 Mon Sep 17 00:00:00 2001
From: Thalia Archibald <thalia@archibald.dev>
Date: Fri, 7 Feb 2025 11:54:02 -0800
Subject: [PATCH 2/2] Skip scanning for surrogates when not known valid

---
 library/std/src/sys_common/wtf8.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
index 01cbb3e24ff8a..f9ec112b19747 100644
--- a/library/std/src/sys_common/wtf8.rs
+++ b/library/std/src/sys_common/wtf8.rs
@@ -388,7 +388,7 @@ impl Wtf8Buf {
             _ => {
                 // If we'll be pushing a string containing a surrogate, we may
                 // no longer have UTF-8.
-                if other.next_surrogate(0).is_some() {
+                if self.is_known_utf8 && other.next_surrogate(0).is_some() {
                     self.is_known_utf8 = false;
                 }