Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(std): Stabilize 'os_str_bytes' feature #115443

Merged
merged 2 commits into from
Sep 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions library/std/src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@
//! On all platforms, [`OsStr`] consists of a sequence of bytes that is encoded as a superset of
//! UTF-8; see [`OsString`] for more details on its encoding on different platforms.
//!
//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_os_str_bytes`] and
//! [`OsStr::from_os_str_bytes_unchecked`].
//! For limited, inexpensive conversions from and to bytes, see [`OsStr::as_encoded_bytes`] and
//! [`OsStr::from_encoded_bytes_unchecked`].
//!
//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
Expand Down
68 changes: 32 additions & 36 deletions library/std/src/ffi/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,36 +154,34 @@ impl OsString {
/// # Safety
///
/// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
/// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
/// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same rust version
/// built for the same target platform. For example, reconstructing an `OsString` from bytes sent
/// over the network or stored in a file will likely violate these safety rules.
///
/// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
/// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be
/// split either immediately before or immediately after any valid non-empty UTF-8 substring.
///
/// # Example
///
/// ```
/// #![feature(os_str_bytes)]
///
/// use std::ffi::OsStr;
///
/// let os_str = OsStr::new("Mary had a little lamb");
/// let bytes = os_str.as_os_str_bytes();
/// let bytes = os_str.as_encoded_bytes();
/// let words = bytes.split(|b| *b == b' ');
/// let words: Vec<&OsStr> = words.map(|word| {
/// // SAFETY:
/// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
/// // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes`
/// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
/// unsafe { OsStr::from_os_str_bytes_unchecked(word) }
/// unsafe { OsStr::from_encoded_bytes_unchecked(word) }
/// }).collect();
/// ```
///
/// [conversions]: super#conversions
#[inline]
#[unstable(feature = "os_str_bytes", issue = "111544")]
pub unsafe fn from_os_str_bytes_unchecked(bytes: Vec<u8>) -> Self {
OsString { inner: Buf::from_os_str_bytes_unchecked(bytes) }
#[stable(feature = "os_str_bytes", since = "CURRENT_RUSTC_VERSION")]
pub unsafe fn from_encoded_bytes_unchecked(bytes: Vec<u8>) -> Self {
OsString { inner: Buf::from_encoded_bytes_unchecked(bytes) }
}

/// Converts to an [`OsStr`] slice.
Expand All @@ -205,7 +203,7 @@ impl OsString {
}

/// Converts the `OsString` into a byte slice. To convert the byte slice back into an
/// `OsString`, use the [`OsStr::from_os_str_bytes_unchecked`] function.
/// `OsString`, use the [`OsStr::from_encoded_bytes_unchecked`] function.
///
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
Expand All @@ -219,9 +217,9 @@ impl OsString {
///
/// [`std::ffi`]: crate::ffi
#[inline]
#[unstable(feature = "os_str_bytes", issue = "111544")]
pub fn into_os_str_bytes(self) -> Vec<u8> {
self.inner.into_os_str_bytes()
#[stable(feature = "os_str_bytes", since = "CURRENT_RUSTC_VERSION")]
pub fn into_encoded_bytes(self) -> Vec<u8> {
self.inner.into_encoded_bytes()
}

/// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
Expand Down Expand Up @@ -745,36 +743,34 @@ impl OsStr {
/// # Safety
///
/// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
/// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
/// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same rust version
/// built for the same target platform. For example, reconstructing an `OsStr` from bytes sent
/// over the network or stored in a file will likely violate these safety rules.
///
/// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
/// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be
/// split either immediately before or immediately after any valid non-empty UTF-8 substring.
///
/// # Example
///
/// ```
/// #![feature(os_str_bytes)]
///
/// use std::ffi::OsStr;
///
/// let os_str = OsStr::new("Mary had a little lamb");
/// let bytes = os_str.as_os_str_bytes();
/// let bytes = os_str.as_encoded_bytes();
/// let words = bytes.split(|b| *b == b' ');
/// let words: Vec<&OsStr> = words.map(|word| {
/// // SAFETY:
/// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
/// // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes`
/// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
/// unsafe { OsStr::from_os_str_bytes_unchecked(word) }
/// unsafe { OsStr::from_encoded_bytes_unchecked(word) }
/// }).collect();
/// ```
///
/// [conversions]: super#conversions
#[inline]
#[unstable(feature = "os_str_bytes", issue = "111544")]
pub unsafe fn from_os_str_bytes_unchecked(bytes: &[u8]) -> &Self {
Self::from_inner(Slice::from_os_str_bytes_unchecked(bytes))
#[stable(feature = "os_str_bytes", since = "CURRENT_RUSTC_VERSION")]
pub unsafe fn from_encoded_bytes_unchecked(bytes: &[u8]) -> &Self {
Self::from_inner(Slice::from_encoded_bytes_unchecked(bytes))
}

#[inline]
Expand Down Expand Up @@ -948,7 +944,7 @@ impl OsStr {
}

/// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS
/// string slice, use the [`OsStr::from_os_str_bytes_unchecked`] function.
/// string slice, use the [`OsStr::from_encoded_bytes_unchecked`] function.
///
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
Expand All @@ -962,9 +958,9 @@ impl OsStr {
///
/// [`std::ffi`]: crate::ffi
#[inline]
#[unstable(feature = "os_str_bytes", issue = "111544")]
pub fn as_os_str_bytes(&self) -> &[u8] {
self.inner.as_os_str_bytes()
#[stable(feature = "os_str_bytes", since = "CURRENT_RUSTC_VERSION")]
pub fn as_encoded_bytes(&self) -> &[u8] {
self.inner.as_encoded_bytes()
}

/// Converts this string to its ASCII lower case equivalent in-place.
Expand Down Expand Up @@ -1270,7 +1266,7 @@ impl Default for &OsStr {
impl PartialEq for OsStr {
#[inline]
fn eq(&self, other: &OsStr) -> bool {
self.as_os_str_bytes().eq(other.as_os_str_bytes())
self.as_encoded_bytes().eq(other.as_encoded_bytes())
}
}

Expand All @@ -1297,23 +1293,23 @@ impl Eq for OsStr {}
impl PartialOrd for OsStr {
#[inline]
fn partial_cmp(&self, other: &OsStr) -> Option<cmp::Ordering> {
self.as_os_str_bytes().partial_cmp(other.as_os_str_bytes())
self.as_encoded_bytes().partial_cmp(other.as_encoded_bytes())
}
#[inline]
fn lt(&self, other: &OsStr) -> bool {
self.as_os_str_bytes().lt(other.as_os_str_bytes())
self.as_encoded_bytes().lt(other.as_encoded_bytes())
}
#[inline]
fn le(&self, other: &OsStr) -> bool {
self.as_os_str_bytes().le(other.as_os_str_bytes())
self.as_encoded_bytes().le(other.as_encoded_bytes())
}
#[inline]
fn gt(&self, other: &OsStr) -> bool {
self.as_os_str_bytes().gt(other.as_os_str_bytes())
self.as_encoded_bytes().gt(other.as_encoded_bytes())
}
#[inline]
fn ge(&self, other: &OsStr) -> bool {
self.as_os_str_bytes().ge(other.as_os_str_bytes())
self.as_encoded_bytes().ge(other.as_encoded_bytes())
}
}

Expand All @@ -1332,7 +1328,7 @@ impl PartialOrd<str> for OsStr {
impl Ord for OsStr {
#[inline]
fn cmp(&self, other: &OsStr) -> cmp::Ordering {
self.as_os_str_bytes().cmp(other.as_os_str_bytes())
self.as_encoded_bytes().cmp(other.as_encoded_bytes())
}
}

Expand Down Expand Up @@ -1382,7 +1378,7 @@ impl_cmp!(Cow<'a, OsStr>, OsString);
impl Hash for OsStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_os_str_bytes().hash(state)
self.as_encoded_bytes().hash(state)
}
}

Expand Down
34 changes: 17 additions & 17 deletions library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ impl<'a> Prefix<'a> {
fn len(&self) -> usize {
use self::Prefix::*;
fn os_str_len(s: &OsStr) -> usize {
s.as_os_str_bytes().len()
s.as_encoded_bytes().len()
}
match *self {
Verbatim(x) => 4 + os_str_len(x),
Expand Down Expand Up @@ -316,31 +316,31 @@ fn has_physical_root(s: &[u8], prefix: Option<Prefix<'_>>) -> bool {

// basic workhorse for splitting stem and extension
fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) {
if file.as_os_str_bytes() == b".." {
if file.as_encoded_bytes() == b".." {
return (Some(file), None);
}

// The unsafety here stems from converting between &OsStr and &[u8]
// and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced
// only from ASCII-bounded slices of existing &OsStr values.
let mut iter = file.as_os_str_bytes().rsplitn(2, |b| *b == b'.');
let mut iter = file.as_encoded_bytes().rsplitn(2, |b| *b == b'.');
let after = iter.next();
let before = iter.next();
if before == Some(b"") {
(Some(file), None)
} else {
unsafe {
(
before.map(|s| OsStr::from_os_str_bytes_unchecked(s)),
after.map(|s| OsStr::from_os_str_bytes_unchecked(s)),
before.map(|s| OsStr::from_encoded_bytes_unchecked(s)),
after.map(|s| OsStr::from_encoded_bytes_unchecked(s)),
)
}
}
}

fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
let slice = file.as_os_str_bytes();
let slice = file.as_encoded_bytes();
if slice == b".." {
return (file, None);
}
Expand All @@ -357,8 +357,8 @@ fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
let after = &slice[i + 1..];
unsafe {
(
OsStr::from_os_str_bytes_unchecked(before),
Some(OsStr::from_os_str_bytes_unchecked(after)),
OsStr::from_encoded_bytes_unchecked(before),
Some(OsStr::from_encoded_bytes_unchecked(after)),
)
}
}
Expand Down Expand Up @@ -739,7 +739,7 @@ impl<'a> Components<'a> {
// separately via `include_cur_dir`
b".." => Some(Component::ParentDir),
b"" => None,
_ => Some(Component::Normal(unsafe { OsStr::from_os_str_bytes_unchecked(comp) })),
_ => Some(Component::Normal(unsafe { OsStr::from_encoded_bytes_unchecked(comp) })),
}
}

Expand Down Expand Up @@ -896,7 +896,7 @@ impl<'a> Iterator for Components<'a> {
let raw = &self.path[..self.prefix_len()];
self.path = &self.path[self.prefix_len()..];
return Some(Component::Prefix(PrefixComponent {
raw: unsafe { OsStr::from_os_str_bytes_unchecked(raw) },
raw: unsafe { OsStr::from_encoded_bytes_unchecked(raw) },
parsed: self.prefix.unwrap(),
}));
}
Expand Down Expand Up @@ -968,7 +968,7 @@ impl<'a> DoubleEndedIterator for Components<'a> {
State::Prefix if self.prefix_len() > 0 => {
self.back = State::Done;
return Some(Component::Prefix(PrefixComponent {
raw: unsafe { OsStr::from_os_str_bytes_unchecked(self.path) },
raw: unsafe { OsStr::from_encoded_bytes_unchecked(self.path) },
parsed: self.prefix.unwrap(),
}));
}
Expand Down Expand Up @@ -1477,17 +1477,17 @@ impl PathBuf {
fn _set_extension(&mut self, extension: &OsStr) -> bool {
let file_stem = match self.file_stem() {
None => return false,
Some(f) => f.as_os_str_bytes(),
Some(f) => f.as_encoded_bytes(),
};

// truncate until right after the file stem
let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr();
let start = self.inner.as_os_str_bytes().as_ptr().addr();
let start = self.inner.as_encoded_bytes().as_ptr().addr();
let v = self.as_mut_vec();
v.truncate(end_file_stem.wrapping_sub(start));

// add the new extension, if any
let new = extension.as_os_str_bytes();
let new = extension.as_encoded_bytes();
if !new.is_empty() {
v.reserve_exact(new.len() + 1);
v.push(b'.');
Expand Down Expand Up @@ -2007,11 +2007,11 @@ impl Path {
// The following (private!) function allows construction of a path from a u8
// slice, which is only safe when it is known to follow the OsStr encoding.
unsafe fn from_u8_slice(s: &[u8]) -> &Path {
unsafe { Path::new(OsStr::from_os_str_bytes_unchecked(s)) }
unsafe { Path::new(OsStr::from_encoded_bytes_unchecked(s)) }
}
// The following (private!) function reveals the byte encoding used for OsStr.
fn as_u8_slice(&self) -> &[u8] {
self.inner.as_os_str_bytes()
self.inner.as_encoded_bytes()
}

/// Directly wraps a string slice as a `Path` slice.
Expand Down Expand Up @@ -2609,7 +2609,7 @@ impl Path {

fn _with_extension(&self, extension: &OsStr) -> PathBuf {
let self_len = self.as_os_str().len();
let self_bytes = self.as_os_str().as_os_str_bytes();
let self_bytes = self.as_os_str().as_encoded_bytes();

let (new_capacity, slice_to_copy) = match self.extension() {
None => {
Expand Down
2 changes: 1 addition & 1 deletion library/std/src/sys/common/small_c_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub fn run_path_with_cstr<T, F>(path: &Path, f: F) -> io::Result<T>
where
F: FnOnce(&CStr) -> io::Result<T>,
{
run_with_cstr(path.as_os_str().as_os_str_bytes(), f)
run_with_cstr(path.as_os_str().as_encoded_bytes(), f)
}

#[inline]
Expand Down
4 changes: 2 additions & 2 deletions library/std/src/sys/common/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use core::iter::repeat;
fn stack_allocation_works() {
let path = Path::new("abc");
let result = run_path_with_cstr(path, |p| {
assert_eq!(p, &*CString::new(path.as_os_str().as_os_str_bytes()).unwrap());
assert_eq!(p, &*CString::new(path.as_os_str().as_encoded_bytes()).unwrap());
Ok(42)
});
assert_eq!(result.unwrap(), 42);
Expand All @@ -25,7 +25,7 @@ fn heap_allocation_works() {
let path = repeat("a").take(384).collect::<String>();
let path = Path::new(&path);
let result = run_path_with_cstr(path, |p| {
assert_eq!(p, &*CString::new(path.as_os_str().as_os_str_bytes()).unwrap());
assert_eq!(p, &*CString::new(path.as_os_str().as_encoded_bytes()).unwrap());
Ok(42)
});
assert_eq!(result.unwrap(), 42);
Expand Down
10 changes: 5 additions & 5 deletions library/std/src/sys/unix/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ impl AsInner<[u8]> for Buf {

impl Buf {
#[inline]
pub fn into_os_str_bytes(self) -> Vec<u8> {
pub fn into_encoded_bytes(self) -> Vec<u8> {
self.inner
}

#[inline]
pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
pub unsafe fn from_encoded_bytes_unchecked(s: Vec<u8>) -> Self {
Self { inner: s }
}

Expand Down Expand Up @@ -203,18 +203,18 @@ impl Buf {

impl Slice {
#[inline]
pub fn as_os_str_bytes(&self) -> &[u8] {
pub fn as_encoded_bytes(&self) -> &[u8] {
&self.inner
}

#[inline]
pub unsafe fn from_os_str_bytes_unchecked(s: &[u8]) -> &Slice {
pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice {
unsafe { mem::transmute(s) }
}

#[inline]
pub fn from_str(s: &str) -> &Slice {
unsafe { Slice::from_os_str_bytes_unchecked(s.as_bytes()) }
unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) }
}

pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> {
Expand Down
Loading