diff --git a/src/libstd/io/lazy.rs b/src/libstd/io/lazy.rs
index e864aa2c864bb..b75e33f8d43ba 100644
--- a/src/libstd/io/lazy.rs
+++ b/src/libstd/io/lazy.rs
@@ -1,64 +1,80 @@
-use crate::cell::Cell;
-use crate::ptr;
+use crate::cell::UnsafeCell;
 use crate::sync::Arc;
+use crate::sync::atomic::AtomicUsize;
+use crate::sync::atomic::Ordering;
 use crate::sys_common;
 use crate::sys_common::mutex::Mutex;
 
+/// Helper for lazy initialization of a static, with a destructor that attempts to run when the main
+/// (Rust) thread exits.
+///
+/// Currently used only inside the standard library, by the stdio types.
+///
+/// If there are still child threads around when the main thread exits, they get terminated. But
+/// there is a small window where they are not yet terminated and may hold a reference to the
+/// the data. We therefore store the data in an `Arc<T>`, keep one of the `Arc`'s in the static, and
+/// hand out clones. When the `Arc` in the static gets dropped by the `at_exit` handler, the
+/// contents will only be dropped if there where no childs threads holding a reference.
+///
+/// # Safety
+/// - `UnsafeCell`: We only create a mutable reference during initialization and during the shutdown
+///   phase. At both times there can't exist any other references.
+/// - Destruction. The `Drop` implementation of `T` should not access references to anything except
+///   itself, they are not guaranteed to exist. It should also not rely on other machinery of the
+///   standard library to be available.
+/// - Initialization. The `init` function for `get` should not call `get` itself,  to prevent
+///   infinite recursion and acquiring the guard mutex reentrantly.
+/// - We use the `Mutex` from `sys::common` because it has a `const` constructor. It currently has
+///   UB when acquired reentrantly without calling `init`.
 pub struct Lazy<T> {
-    // We never call `lock.init()`, so it is UB to attempt to acquire this mutex reentrantly!
-    lock: Mutex,
-    ptr: Cell<*mut Arc<T>>,
+    guard: Mutex, // Only used to protect initialization.
+    status: AtomicUsize,
+    data: UnsafeCell<Option<Arc<T>>>,
 }
 
-#[inline]
-const fn done<T>() -> *mut Arc<T> { 1_usize as *mut _ }
-
 unsafe impl<T> Sync for Lazy<T> {}
 
+const UNINITIALIZED: usize = 0;
+const SHUTDOWN: usize = 1;
+const AVAILABLE: usize = 2;
+
 impl<T> Lazy<T> {
     pub const fn new() -> Lazy<T> {
         Lazy {
-            lock: Mutex::new(),
-            ptr: Cell::new(ptr::null_mut()),
+            guard: Mutex::new(),
+            status: AtomicUsize::new(UNINITIALIZED),
+            data: UnsafeCell::new(None),
         }
     }
 }
 
 impl<T: Send + Sync + 'static> Lazy<T> {
-    /// Safety: `init` must not call `get` on the variable that is being
-    /// initialized.
-    pub unsafe fn get(&'static self, init: fn() -> Arc<T>) -> Option<Arc<T>> {
-        let _guard = self.lock.lock();
-        let ptr = self.ptr.get();
-        if ptr.is_null() {
-            Some(self.init(init))
-        } else if ptr == done() {
-            None
-        } else {
-            Some((*ptr).clone())
-        }
-    }
+    pub unsafe fn get(&'static self, init: fn() -> T) -> Option<Arc<T>> {
+        if self.status.load(Ordering::Acquire) == UNINITIALIZED {
+            let _guard = self.guard.lock();
+            // Double-check to make sure this `Lazy` didn't get initialized by another
+            // thread in the small window before we acquired the mutex.
+            if self.status.load(Ordering::Relaxed) != UNINITIALIZED {
+                return self.get(init);
+            }
+
+            // Register an `at_exit` handler.
+            let registered = sys_common::at_exit(move || {
+                *self.data.get() = None;
+                // The reference to `Arc<T>` gets dropped above. If there are no other references
+                // in child threads `T` will be dropped.
+                self.status.store(SHUTDOWN, Ordering::Release);
+            });
+            if registered.is_err() {
+                // Registering the handler will only fail if we are already in the shutdown
+                // phase. In that case don't attempt to initialize.
+                return None;
+            }
 
-    // Must only be called with `lock` held
-    unsafe fn init(&'static self, init: fn() -> Arc<T>) -> Arc<T> {
-        // If we successfully register an at exit handler, then we cache the
-        // `Arc` allocation in our own internal box (it will get deallocated by
-        // the at exit handler). Otherwise we just return the freshly allocated
-        // `Arc`.
-        let registered = sys_common::at_exit(move || {
-            let ptr = {
-                let _guard = self.lock.lock();
-                self.ptr.replace(done())
-            };
-            drop(Box::from_raw(ptr))
-        });
-        // This could reentrantly call `init` again, which is a problem
-        // because our `lock` allows reentrancy!
-        // That's why `get` is unsafe and requires the caller to ensure no reentrancy happens.
-        let ret = init();
-        if registered.is_ok() {
-            self.ptr.set(Box::into_raw(Box::new(ret.clone())));
+            // Run the initializer of `T`.
+            *self.data.get() = Some(Arc::new(init()));
+            self.status.store(AVAILABLE, Ordering::Release);
         }
-        ret
+        (*self.data.get()).as_ref().cloned()
     }
 }
diff --git a/src/libstd/io/stdio.rs b/src/libstd/io/stdio.rs
index 13bf357e2eb8f..a4c60292ca237 100644
--- a/src/libstd/io/stdio.rs
+++ b/src/libstd/io/stdio.rs
@@ -41,7 +41,7 @@ struct StderrRaw(stdio::Stderr);
 /// handles is **not** available to raw handles returned from this function.
 ///
 /// The returned handle has no external synchronization or buffering.
-fn stdin_raw() -> io::Result<StdinRaw> { stdio::Stdin::new().map(StdinRaw) }
+fn stdin_raw() -> StdinRaw { StdinRaw(stdio::Stdin::new()) }
 
 /// Constructs a new raw handle to the standard output stream of this process.
 ///
@@ -52,7 +52,7 @@ fn stdin_raw() -> io::Result<StdinRaw> { stdio::Stdin::new().map(StdinRaw) }
 ///
 /// The returned handle has no external synchronization or buffering layered on
 /// top.
-fn stdout_raw() -> io::Result<StdoutRaw> { stdio::Stdout::new().map(StdoutRaw) }
+fn stdout_raw() -> StdoutRaw { StdoutRaw(stdio::Stdout::new()) }
 
 /// Constructs a new raw handle to the standard error stream of this process.
 ///
@@ -61,7 +61,7 @@ fn stdout_raw() -> io::Result<StdoutRaw> { stdio::Stdout::new().map(StdoutRaw) }
 ///
 /// The returned handle has no external synchronization or buffering layered on
 /// top.
-fn stderr_raw() -> io::Result<StderrRaw> { stdio::Stderr::new().map(StderrRaw) }
+fn stderr_raw() -> StderrRaw { StderrRaw(stdio::Stderr::new()) }
 
 impl Read for StdinRaw {
     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { self.0.read(buf) }
@@ -71,42 +71,32 @@ impl Read for StdinRaw {
         Initializer::nop()
     }
 }
+
 impl Write for StdoutRaw {
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> { self.0.write(buf) }
     fn flush(&mut self) -> io::Result<()> { self.0.flush() }
 }
+
 impl Write for StderrRaw {
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> { self.0.write(buf) }
     fn flush(&mut self) -> io::Result<()> { self.0.flush() }
 }
 
-enum Maybe<T> {
-    Real(T),
-    Fake,
-}
+struct Maybe<T> (T);
 
 impl<W: io::Write> io::Write for Maybe<W> {
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
-        match *self {
-            Maybe::Real(ref mut w) => handle_ebadf(w.write(buf), buf.len()),
-            Maybe::Fake => Ok(buf.len())
-        }
+        handle_ebadf(self.0.write(buf), buf.len())
     }
 
     fn flush(&mut self) -> io::Result<()> {
-        match *self {
-            Maybe::Real(ref mut w) => handle_ebadf(w.flush(), ()),
-            Maybe::Fake => Ok(())
-        }
+        handle_ebadf(self.0.flush(), ())
     }
 }
 
 impl<R: io::Read> io::Read for Maybe<R> {
     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
-        match *self {
-            Maybe::Real(ref mut r) => handle_ebadf(r.read(buf), 0),
-            Maybe::Fake => Ok(0)
-        }
+        handle_ebadf(self.0.read(buf), 0)
     }
 }
 
@@ -202,21 +192,15 @@ pub struct StdinLock<'a> {
 /// ```
 #[stable(feature = "rust1", since = "1.0.0")]
 pub fn stdin() -> Stdin {
-    static INSTANCE: Lazy<Mutex<BufReader<Maybe<StdinRaw>>>> = Lazy::new();
-    return Stdin {
+    static STDIN: Lazy<Mutex<BufReader<Maybe<StdinRaw>>>> = Lazy::new();
+    fn stdin_init() -> Mutex<BufReader<Maybe<StdinRaw>>> {
+        Mutex::new(BufReader::with_capacity(stdio::STDIN_BUF_SIZE, Maybe(stdin_raw())))
+    }
+
+    Stdin {
         inner: unsafe {
-            INSTANCE.get(stdin_init).expect("cannot access stdin during shutdown")
+            STDIN.get(stdin_init).expect("cannot access stdin during shutdown")
         },
-    };
-
-    fn stdin_init() -> Arc<Mutex<BufReader<Maybe<StdinRaw>>>> {
-        // This must not reentrantly access `INSTANCE`
-        let stdin = match stdin_raw() {
-            Ok(stdin) => Maybe::Real(stdin),
-            _ => Maybe::Fake
-        };
-
-        Arc::new(Mutex::new(BufReader::with_capacity(stdio::STDIN_BUF_SIZE, stdin)))
     }
 }
 
@@ -418,20 +402,15 @@ pub struct StdoutLock<'a> {
 /// ```
 #[stable(feature = "rust1", since = "1.0.0")]
 pub fn stdout() -> Stdout {
-    static INSTANCE: Lazy<ReentrantMutex<RefCell<LineWriter<Maybe<StdoutRaw>>>>> = Lazy::new();
-    return Stdout {
+    static STDOUT: Lazy<ReentrantMutex<RefCell<LineWriter<Maybe<StdoutRaw>>>>> = Lazy::new();
+    fn stdout_init() -> ReentrantMutex<RefCell<LineWriter<Maybe<StdoutRaw>>>> {
+        ReentrantMutex::new(RefCell::new(LineWriter::new(Maybe(stdout_raw()))))
+    }
+
+    Stdout {
         inner: unsafe {
-            INSTANCE.get(stdout_init).expect("cannot access stdout during shutdown")
+            STDOUT.get(stdout_init).expect("cannot access stdout during shutdown")
         },
-    };
-
-    fn stdout_init() -> Arc<ReentrantMutex<RefCell<LineWriter<Maybe<StdoutRaw>>>>> {
-        // This must not reentrantly access `INSTANCE`
-        let stdout = match stdout_raw() {
-            Ok(stdout) => Maybe::Real(stdout),
-            _ => Maybe::Fake,
-        };
-        Arc::new(ReentrantMutex::new(RefCell::new(LineWriter::new(stdout))))
     }
 }
 
@@ -571,20 +550,15 @@ pub struct StderrLock<'a> {
 /// ```
 #[stable(feature = "rust1", since = "1.0.0")]
 pub fn stderr() -> Stderr {
-    static INSTANCE: Lazy<ReentrantMutex<RefCell<Maybe<StderrRaw>>>> = Lazy::new();
-    return Stderr {
+    static STDERR: Lazy<ReentrantMutex<RefCell<Maybe<StderrRaw>>>> = Lazy::new();
+    fn stderr_init() -> ReentrantMutex<RefCell<Maybe<StderrRaw>>> {
+        ReentrantMutex::new(RefCell::new(Maybe(stderr_raw())))
+    }
+
+    Stderr {
         inner: unsafe {
-            INSTANCE.get(stderr_init).expect("cannot access stderr during shutdown")
+            STDERR.get(stderr_init).expect("cannot access stderr during shutdown")
         },
-    };
-
-    fn stderr_init() -> Arc<ReentrantMutex<RefCell<Maybe<StderrRaw>>>> {
-        // This must not reentrantly access `INSTANCE`
-        let stderr = match stderr_raw() {
-            Ok(stderr) => Maybe::Real(stderr),
-            _ => Maybe::Fake,
-        };
-        Arc::new(ReentrantMutex::new(RefCell::new(stderr)))
     }
 }
 
diff --git a/src/libstd/sys/cloudabi/stdio.rs b/src/libstd/sys/cloudabi/stdio.rs
index 601563c5b1fcb..d4ba1caf68ea5 100644
--- a/src/libstd/sys/cloudabi/stdio.rs
+++ b/src/libstd/sys/cloudabi/stdio.rs
@@ -1,14 +1,12 @@
 use crate::io;
 use crate::sys::cloudabi::abi;
 
-pub struct Stdin(());
-pub struct Stdout(());
-pub struct Stderr(());
+pub struct Stdin;
+pub struct Stdout;
+pub struct Stderr;
 
 impl Stdin {
-    pub fn new() -> io::Result<Stdin> {
-        Ok(Stdin(()))
-    }
+    pub fn new() -> Stdin { Stdin }
 }
 
 impl io::Read for Stdin {
@@ -18,9 +16,7 @@ impl io::Read for Stdin {
 }
 
 impl Stdout {
-    pub fn new() -> io::Result<Stdout> {
-        Ok(Stdout(()))
-    }
+    pub fn new() -> Stdout { Stdout }
 }
 
 impl io::Write for Stdout {
@@ -37,9 +33,7 @@ impl io::Write for Stdout {
 }
 
 impl Stderr {
-    pub fn new() -> io::Result<Stderr> {
-        Ok(Stderr(()))
-    }
+    pub fn new() -> Stderr { Stderr }
 }
 
 impl io::Write for Stderr {
@@ -62,5 +56,5 @@ pub fn is_ebadf(err: &io::Error) -> bool {
 pub const STDIN_BUF_SIZE: usize = crate::sys_common::io::DEFAULT_BUF_SIZE;
 
 pub fn panic_output() -> Option<impl io::Write> {
-    Stderr::new().ok()
+    Some(Stderr::new())
 }
diff --git a/src/libstd/sys/redox/stdio.rs b/src/libstd/sys/redox/stdio.rs
index 33f5bdbb5d358..52d98b0e651cd 100644
--- a/src/libstd/sys/redox/stdio.rs
+++ b/src/libstd/sys/redox/stdio.rs
@@ -2,12 +2,12 @@ use crate::io;
 use crate::sys::{cvt, syscall};
 use crate::sys::fd::FileDesc;
 
-pub struct Stdin(());
-pub struct Stdout(());
-pub struct Stderr(());
+pub struct Stdin;
+pub struct Stdout;
+pub struct Stderr;
 
 impl Stdin {
-    pub fn new() -> io::Result<Stdin> { Ok(Stdin(())) }
+    pub fn new() -> Stdin { Stdin }
 }
 
 impl io::Read for Stdin {
@@ -20,7 +20,7 @@ impl io::Read for Stdin {
 }
 
 impl Stdout {
-    pub fn new() -> io::Result<Stdout> { Ok(Stdout(())) }
+    pub fn new() -> Stdout { Stdout }
 }
 
 impl io::Write for Stdout {
@@ -37,7 +37,7 @@ impl io::Write for Stdout {
 }
 
 impl Stderr {
-    pub fn new() -> io::Result<Stderr> { Ok(Stderr(())) }
+    pub fn new() -> Stderr { Stderr }
 }
 
 impl io::Write for Stderr {
@@ -60,5 +60,5 @@ pub fn is_ebadf(err: &io::Error) -> bool {
 pub const STDIN_BUF_SIZE: usize = crate::sys_common::io::DEFAULT_BUF_SIZE;
 
 pub fn panic_output() -> Option<impl io::Write> {
-    Stderr::new().ok()
+    Some(Stderr::new())
 }
diff --git a/src/libstd/sys/sgx/stdio.rs b/src/libstd/sys/sgx/stdio.rs
index f2c6892bfb7fd..837c13c44fd9e 100644
--- a/src/libstd/sys/sgx/stdio.rs
+++ b/src/libstd/sys/sgx/stdio.rs
@@ -3,9 +3,9 @@ use fortanix_sgx_abi as abi;
 use crate::io;
 use crate::sys::fd::FileDesc;
 
-pub struct Stdin(());
-pub struct Stdout(());
-pub struct Stderr(());
+pub struct Stdin;
+pub struct Stdout;
+pub struct Stderr;
 
 fn with_std_fd<F: FnOnce(&FileDesc) -> R, R>(fd: abi::Fd, f: F) -> R {
     let fd = FileDesc::new(fd);
@@ -15,7 +15,7 @@ fn with_std_fd<F: FnOnce(&FileDesc) -> R, R>(fd: abi::Fd, f: F) -> R {
 }
 
 impl Stdin {
-    pub fn new() -> io::Result<Stdin> { Ok(Stdin(())) }
+    pub fn new() -> Stdin { Stdin }
 }
 
 impl io::Read for Stdin {
@@ -25,7 +25,7 @@ impl io::Read for Stdin {
 }
 
 impl Stdout {
-    pub fn new() -> io::Result<Stdout> { Ok(Stdout(())) }
+    pub fn new() -> Stdout { Stdout }
 }
 
 impl io::Write for Stdout {
@@ -39,7 +39,7 @@ impl io::Write for Stdout {
 }
 
 impl Stderr {
-    pub fn new() -> io::Result<Stderr> { Ok(Stderr(())) }
+    pub fn new() -> Stderr { Stderr }
 }
 
 impl io::Write for Stderr {
diff --git a/src/libstd/sys/unix/stdio.rs b/src/libstd/sys/unix/stdio.rs
index 35f163bbdb10f..7d8a3234412a1 100644
--- a/src/libstd/sys/unix/stdio.rs
+++ b/src/libstd/sys/unix/stdio.rs
@@ -1,12 +1,12 @@
 use crate::io;
 use crate::sys::fd::FileDesc;
 
-pub struct Stdin(());
-pub struct Stdout(());
-pub struct Stderr(());
+pub struct Stdin;
+pub struct Stdout;
+pub struct Stderr;
 
 impl Stdin {
-    pub fn new() -> io::Result<Stdin> { Ok(Stdin(())) }
+    pub fn new() -> Stdin { Stdin }
 }
 
 impl io::Read for Stdin {
@@ -19,7 +19,7 @@ impl io::Read for Stdin {
 }
 
 impl Stdout {
-    pub fn new() -> io::Result<Stdout> { Ok(Stdout(())) }
+    pub fn new() -> Stdout { Stdout }
 }
 
 impl io::Write for Stdout {
@@ -36,7 +36,7 @@ impl io::Write for Stdout {
 }
 
 impl Stderr {
-    pub fn new() -> io::Result<Stderr> { Ok(Stderr(())) }
+    pub fn new() -> Stderr { Stderr }
 }
 
 impl io::Write for Stderr {
@@ -59,5 +59,5 @@ pub fn is_ebadf(err: &io::Error) -> bool {
 pub const STDIN_BUF_SIZE: usize = crate::sys_common::io::DEFAULT_BUF_SIZE;
 
 pub fn panic_output() -> Option<impl io::Write> {
-    Stderr::new().ok()
+    Some(Stderr::new())
 }
diff --git a/src/libstd/sys/wasm/stdio.rs b/src/libstd/sys/wasm/stdio.rs
index b8899a9c84746..44b07d7008a2b 100644
--- a/src/libstd/sys/wasm/stdio.rs
+++ b/src/libstd/sys/wasm/stdio.rs
@@ -6,9 +6,7 @@ pub struct Stdout;
 pub struct Stderr;
 
 impl Stdin {
-    pub fn new() -> io::Result<Stdin> {
-        Ok(Stdin)
-    }
+    pub fn new() -> Stdin { Stdin }
 }
 
 impl io::Read for Stdin {
@@ -18,9 +16,7 @@ impl io::Read for Stdin {
 }
 
 impl Stdout {
-    pub fn new() -> io::Result<Stdout> {
-        Ok(Stdout)
-    }
+    pub fn new() -> Stdout { Stdout }
 }
 
 impl io::Write for Stdout {
@@ -35,9 +31,7 @@ impl io::Write for Stdout {
 }
 
 impl Stderr {
-    pub fn new() -> io::Result<Stderr> {
-        Ok(Stderr)
-    }
+    pub fn new() -> Stderr { Stderr }
 }
 
 impl io::Write for Stderr {
@@ -59,7 +53,7 @@ pub fn is_ebadf(_err: &io::Error) -> bool {
 
 pub fn panic_output() -> Option<impl io::Write> {
     if cfg!(feature = "wasm_syscall") {
-        Stderr::new().ok()
+        Some(Stderr::new())
     } else {
         None
     }
diff --git a/src/libstd/sys/windows/stdio.rs b/src/libstd/sys/windows/stdio.rs
index b1e76b3b755da..797fb1046ffd9 100644
--- a/src/libstd/sys/windows/stdio.rs
+++ b/src/libstd/sys/windows/stdio.rs
@@ -126,8 +126,8 @@ fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
 }
 
 impl Stdin {
-    pub fn new() -> io::Result<Stdin> {
-        Ok(Stdin { surrogate: 0 })
+    pub fn new() -> Stdin {
+        Stdin { surrogate: 0 }
     }
 }
 
@@ -160,7 +160,6 @@ impl io::Read for Stdin {
     }
 }
 
-
 // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our
 // buffer size, and keep it around for the next read hoping to put them together.
 // This is a best effort, and may not work if we are not the only reader on Stdin.
@@ -243,9 +242,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
 }
 
 impl Stdout {
-    pub fn new() -> io::Result<Stdout> {
-        Ok(Stdout)
-    }
+    pub fn new() -> Stdout { Stdout }
 }
 
 impl io::Write for Stdout {
@@ -259,9 +256,7 @@ impl io::Write for Stdout {
 }
 
 impl Stderr {
-    pub fn new() -> io::Result<Stderr> {
-        Ok(Stderr)
-    }
+    pub fn new() -> Stderr { Stderr }
 }
 
 impl io::Write for Stderr {
@@ -279,5 +274,5 @@ pub fn is_ebadf(err: &io::Error) -> bool {
 }
 
 pub fn panic_output() -> Option<impl io::Write> {
-    Stderr::new().ok()
+    Some(Stderr::new())
 }