Skip to content

Commit

Permalink
Add str-based os_str implementation for wasm
Browse files Browse the repository at this point in the history
On `wasm` the only legal way to construct an OS string is from a UTF-8
string. Every other platform publicly guarantees (through some
extension trait) either arbitrary bytes or (transcoded) UTF-16.

Therefore `wasm` can base its OS strings on UTF-8 strings. The
implementation is largely copied from the `unix` implementation, but
without the validation and with slightly more liberal use of
`#[inline]`.

An immediate benefit is that conversion back to UTF-8 strings is
free. But the motivation is that we want the `unix` implementation to
express that it can be sliced at arbitrary points, and this would be
inappropriate for `wasm` since it lacks an extension trait.
  • Loading branch information
blyxxyz committed Dec 3, 2023
1 parent 7ceaf19 commit 463b3bc
Show file tree
Hide file tree
Showing 3 changed files with 277 additions and 3 deletions.
1 change: 0 additions & 1 deletion library/std/src/sys/unsupported/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ pub mod locks;
pub mod net;
pub mod once;
pub mod os;
#[path = "../unix/os_str.rs"]
pub mod os_str;
#[path = "../unix/path.rs"]
pub mod path;
Expand Down
275 changes: 275 additions & 0 deletions library/std/src/sys/unsupported/os_str.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
//! The underlying OsString/OsStr implementation on platforms where there isn't
//! any particular OS encoding: just a `String`/`str`.
use crate::borrow::Cow;
use crate::collections::TryReserveError;
use crate::fmt;
use crate::mem;
use crate::rc::Rc;
use crate::str;
use crate::string::String;
use crate::sync::Arc;
use crate::sys_common::{AsInner, FromInner, IntoInner};

#[derive(Hash)]
#[repr(transparent)]
pub struct Buf {
pub inner: String,
}

#[repr(transparent)]
pub struct Slice {
pub inner: str,
}

impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.inner, formatter)
}
}

impl fmt::Display for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.inner, formatter)
}
}

impl fmt::Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_slice(), formatter)
}
}

impl fmt::Display for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_slice(), formatter)
}
}

impl Clone for Buf {
#[inline]
fn clone(&self) -> Self {
Buf { inner: self.inner.clone() }
}

#[inline]
fn clone_from(&mut self, source: &Self) {
self.inner.clone_from(&source.inner)
}
}

impl IntoInner<String> for Buf {
fn into_inner(self) -> String {
self.inner
}
}

impl FromInner<String> for Buf {
fn from_inner(inner: String) -> Self {
Buf { inner }
}
}

impl AsInner<str> for Buf {
#[inline]
fn as_inner(&self) -> &str {
&self.inner
}
}

impl Buf {
#[inline]
pub fn into_encoded_bytes(self) -> Vec<u8> {
self.inner.into_bytes()
}

#[inline]
pub unsafe fn from_encoded_bytes_unchecked(s: Vec<u8>) -> Self {
Self { inner: unsafe { String::from_utf8_unchecked(s) } }
}

#[inline]
pub fn from_string(s: String) -> Buf {
Buf { inner: s }
}

#[inline]
pub fn with_capacity(capacity: usize) -> Buf {
Buf { inner: String::with_capacity(capacity) }
}

#[inline]
pub fn clear(&mut self) {
self.inner.clear()
}

#[inline]
pub fn capacity(&self) -> usize {
self.inner.capacity()
}

#[inline]
pub fn reserve(&mut self, additional: usize) {
self.inner.reserve(additional)
}

#[inline]
pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
self.inner.try_reserve(additional)
}

#[inline]
pub fn reserve_exact(&mut self, additional: usize) {
self.inner.reserve_exact(additional)
}

#[inline]
pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
self.inner.try_reserve_exact(additional)
}

#[inline]
pub fn shrink_to_fit(&mut self) {
self.inner.shrink_to_fit()
}

#[inline]
pub fn shrink_to(&mut self, min_capacity: usize) {
self.inner.shrink_to(min_capacity)
}

#[inline]
pub fn as_slice(&self) -> &Slice {
// SAFETY: Slice just wraps str,
// and &*self.inner is &str, therefore
// transmuting &str to &Slice is safe.
unsafe { mem::transmute(&*self.inner) }
}

#[inline]
pub fn as_mut_slice(&mut self) -> &mut Slice {
// SAFETY: Slice just wraps str,
// and &mut *self.inner is &mut str, therefore
// transmuting &mut str to &mut Slice is safe.
unsafe { mem::transmute(&mut *self.inner) }
}

#[inline]
pub fn into_string(self) -> Result<String, Buf> {
Ok(self.inner)
}

#[inline]
pub fn push_slice(&mut self, s: &Slice) {
self.inner.push_str(&s.inner)
}

#[inline]
pub fn into_box(self) -> Box<Slice> {
unsafe { mem::transmute(self.inner.into_boxed_str()) }
}

#[inline]
pub fn from_box(boxed: Box<Slice>) -> Buf {
let inner: Box<str> = unsafe { mem::transmute(boxed) };
Buf { inner: inner.into_string() }
}

#[inline]
pub fn into_arc(&self) -> Arc<Slice> {
self.as_slice().into_arc()
}

#[inline]
pub fn into_rc(&self) -> Rc<Slice> {
self.as_slice().into_rc()
}
}

impl Slice {
#[inline]
pub fn as_encoded_bytes(&self) -> &[u8] {
self.inner.as_bytes()
}

#[inline]
pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice {
unsafe { mem::transmute(s) }
}

#[inline]
pub fn from_str(s: &str) -> &Slice {
unsafe { mem::transmute(s) }
}

#[inline]
pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> {
Ok(&self.inner)
}

#[inline]
pub fn to_string_lossy(&self) -> Cow<'_, str> {
Cow::Borrowed(&self.inner)
}

pub fn to_owned(&self) -> Buf {
Buf { inner: self.inner.to_owned() }
}

pub fn clone_into(&self, buf: &mut Buf) {
self.inner.clone_into(&mut buf.inner)
}

#[inline]
pub fn into_box(&self) -> Box<Slice> {
let boxed: Box<str> = self.inner.into();
unsafe { mem::transmute(boxed) }
}

pub fn empty_box() -> Box<Slice> {
let boxed: Box<str> = Default::default();
unsafe { mem::transmute(boxed) }
}

#[inline]
pub fn into_arc(&self) -> Arc<Slice> {
let arc: Arc<str> = Arc::from(&self.inner);
unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) }
}

#[inline]
pub fn into_rc(&self) -> Rc<Slice> {
let rc: Rc<str> = Rc::from(&self.inner);
unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) }
}

#[inline]
pub fn make_ascii_lowercase(&mut self) {
self.inner.make_ascii_lowercase()
}

#[inline]
pub fn make_ascii_uppercase(&mut self) {
self.inner.make_ascii_uppercase()
}

#[inline]
pub fn to_ascii_lowercase(&self) -> Buf {
Buf { inner: self.inner.to_ascii_lowercase() }
}

#[inline]
pub fn to_ascii_uppercase(&self) -> Buf {
Buf { inner: self.inner.to_ascii_uppercase() }
}

#[inline]
pub fn is_ascii(&self) -> bool {
self.inner.is_ascii()
}

#[inline]
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.inner.eq_ignore_ascii_case(&other.inner)
}
}
4 changes: 2 additions & 2 deletions library/std/src/sys/wasm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! wide/production use yet, it's still all in the experimental category. This
//! will likely change over time.
//!
//! Currently all functions here are basically stubs that immediately return
//! Currently most functions here are basically stubs that immediately return
//! errors. The hope is that with a portability lint we can turn actually just
//! remove all this and just omit parts of the standard library if we're
//! compiling for wasm. That way it's a compile time error for something that's
Expand All @@ -30,7 +30,7 @@ pub mod io;
pub mod net;
#[path = "../unsupported/os.rs"]
pub mod os;
#[path = "../unix/os_str.rs"]
#[path = "../unsupported/os_str.rs"]
pub mod os_str;
#[path = "../unix/path.rs"]
pub mod path;
Expand Down

0 comments on commit 463b3bc

Please sign in to comment.