Skip to content

Commit e888608

Browse files
committed
Document all unsafe usages in ascii_str module.
Redid implementation of `<str as AsMutAsciiStr>::slice_ascii_mut` to include less unsafe.
1 parent 89ee040 commit e888608

File tree

1 file changed

+79
-29
lines changed

1 file changed

+79
-29
lines changed

src/ascii_str.rs

+79-29
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,14 @@ impl AsciiStr {
2727
/// Converts `&self` to a `&str` slice.
2828
#[inline]
2929
pub fn as_str(&self) -> &str {
30+
// SAFETY: All variants of `AsciiChar` are valid bytes for a `str`.
3031
unsafe { &*(self as *const AsciiStr as *const str) }
3132
}
3233

3334
/// Converts `&self` into a byte slice.
3435
#[inline]
3536
pub fn as_bytes(&self) -> &[u8] {
37+
// SAFETY: All variants of `AsciiChar` are valid `u8`, given they're `repr(u8)`.
3638
unsafe { &*(self as *const AsciiStr as *const [u8]) }
3739
}
3840

@@ -95,6 +97,10 @@ impl AsciiStr {
9597
/// Converts anything that can be represented as a byte slice to an `AsciiStr` without checking
9698
/// for non-ASCII characters..
9799
///
100+
/// # Safety
101+
/// If any of the bytes in `bytes` do not represent valid ascii characters, calling
102+
/// this function is undefined behavior.
103+
///
98104
/// # Examples
99105
/// ```
100106
/// # use ascii::AsciiStr;
@@ -103,7 +109,9 @@ impl AsciiStr {
103109
/// ```
104110
#[inline]
105111
pub unsafe fn from_ascii_unchecked(bytes: &[u8]) -> &AsciiStr {
106-
bytes.as_ascii_str_unchecked()
112+
// SAFETY: Caller guarantees all bytes in `bytes` are valid
113+
// ascii characters.
114+
unsafe { bytes.as_ascii_str_unchecked() }
107115
}
108116

109117
/// Returns the number of characters / bytes in this ASCII sequence.
@@ -782,6 +790,10 @@ pub trait AsAsciiStr {
782790
}
783791
/// Convert to an ASCII slice without checking for non-ASCII characters.
784792
///
793+
/// # Safety
794+
/// Calling this function when `self` contains non-ascii characters is
795+
/// undefined behavior.
796+
///
785797
/// # Examples
786798
///
787799
unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr;
@@ -793,11 +805,17 @@ pub trait AsMutAsciiStr: AsAsciiStr {
793805
fn slice_ascii_mut<R>(&mut self, range: R) -> Result<&mut AsciiStr, AsAsciiStrError>
794806
where
795807
R: SliceIndex<[Self::Inner], Output = [Self::Inner]>;
808+
796809
/// Convert to a mutable ASCII slice.
797810
fn as_mut_ascii_str(&mut self) -> Result<&mut AsciiStr, AsAsciiStrError> {
798811
self.slice_ascii_mut(..)
799812
}
813+
800814
/// Convert to a mutable ASCII slice without checking for non-ASCII characters.
815+
///
816+
/// # Safety
817+
/// Calling this function when `self` contains non-ascii characters is
818+
/// undefined behavior.
801819
unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr;
802820
}
803821

@@ -813,8 +831,10 @@ where
813831
{
814832
<T as AsAsciiStr>::slice_ascii(*self, range)
815833
}
834+
816835
unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr {
817-
<T as AsAsciiStr>::as_ascii_str_unchecked(*self)
836+
// SAFETY: Caller guarantees `self` does not contain non-ascii characters
837+
unsafe { <T as AsAsciiStr>::as_ascii_str_unchecked(*self) }
818838
}
819839
}
820840

@@ -831,7 +851,8 @@ where
831851
}
832852

833853
unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr {
834-
<T as AsAsciiStr>::as_ascii_str_unchecked(*self)
854+
// SAFETY: Caller guarantees `self` does not contain non-ascii characters
855+
unsafe { <T as AsAsciiStr>::as_ascii_str_unchecked(*self) }
835856
}
836857
}
837858

@@ -847,26 +868,31 @@ where
847868
}
848869

849870
unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr {
850-
<T as AsMutAsciiStr>::as_mut_ascii_str_unchecked(*self)
871+
// SAFETY: Caller guarantees `self` does not contain non-ascii characters
872+
unsafe { <T as AsMutAsciiStr>::as_mut_ascii_str_unchecked(*self) }
851873
}
852874
}
853875

854876
impl AsAsciiStr for AsciiStr {
855877
type Inner = AsciiChar;
878+
856879
fn slice_ascii<R>(&self, range: R) -> Result<&AsciiStr, AsAsciiStrError>
857880
where
858881
R: SliceIndex<[AsciiChar], Output = [AsciiChar]>,
859882
{
860883
self.slice.slice_ascii(range)
861884
}
885+
862886
#[inline]
863887
fn as_ascii_str(&self) -> Result<&AsciiStr, AsAsciiStrError> {
864888
Ok(self)
865889
}
890+
866891
#[inline]
867892
unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr {
868893
self
869894
}
895+
870896
#[inline]
871897
fn get_ascii(&self, index: usize) -> Option<AsciiChar> {
872898
self.slice.get_ascii(index)
@@ -879,6 +905,7 @@ impl AsMutAsciiStr for AsciiStr {
879905
{
880906
self.slice.slice_ascii_mut(range)
881907
}
908+
882909
#[inline]
883910
unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr {
884911
self
@@ -896,14 +923,17 @@ impl AsAsciiStr for [AsciiChar] {
896923
None => Err(AsAsciiStrError(self.len())),
897924
}
898925
}
926+
899927
#[inline]
900928
fn as_ascii_str(&self) -> Result<&AsciiStr, AsAsciiStrError> {
901929
Ok(self.into())
902930
}
931+
903932
#[inline]
904933
unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr {
905-
self.into()
934+
<&AsciiStr>::from(self)
906935
}
936+
907937
#[inline]
908938
fn get_ascii(&self, index: usize) -> Option<AsciiChar> {
909939
self.get(index).cloned()
@@ -922,12 +952,13 @@ impl AsMutAsciiStr for [AsciiChar] {
922952
}
923953
#[inline]
924954
unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr {
925-
self.into()
955+
<&mut AsciiStr>::from(self)
926956
}
927957
}
928958

929959
impl AsAsciiStr for [u8] {
930960
type Inner = u8;
961+
931962
fn slice_ascii<R>(&self, range: R) -> Result<&AsciiStr, AsAsciiStrError>
932963
where
933964
R: SliceIndex<[u8], Output = [u8]>,
@@ -941,20 +972,23 @@ impl AsAsciiStr for [u8] {
941972
Err(AsAsciiStrError(self.len()))
942973
}
943974
}
975+
944976
fn as_ascii_str(&self) -> Result<&AsciiStr, AsAsciiStrError> {
977+
// is_ascii is likely optimized
945978
if self.is_ascii() {
946-
// is_ascii is likely optimized
979+
// SAFETY: `is_ascii` guarantees all bytes are within ascii range.
947980
unsafe { Ok(self.as_ascii_str_unchecked()) }
948981
} else {
949982
Err(AsAsciiStrError(
950983
self.iter().take_while(|&b| b.is_ascii()).count(),
951984
))
952985
}
953986
}
987+
954988
#[inline]
955989
unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr {
956-
let ptr = self as *const [u8] as *const AsciiStr;
957-
&*ptr
990+
// SAFETY: Caller guarantees `self` does not contain non-ascii characters
991+
unsafe { &*(self as *const [u8] as *const AsciiStr) }
958992
}
959993
}
960994
impl AsMutAsciiStr for [u8] {
@@ -975,20 +1009,23 @@ impl AsMutAsciiStr for [u8] {
9751009
Err(AsAsciiStrError(len))
9761010
}
9771011
}
1012+
9781013
fn as_mut_ascii_str(&mut self) -> Result<&mut AsciiStr, AsAsciiStrError> {
1014+
// is_ascii() is likely optimized
9791015
if self.is_ascii() {
980-
// is_ascii() is likely optimized
1016+
// SAFETY: `is_ascii` guarantees all bytes are within ascii range.
9811017
unsafe { Ok(self.as_mut_ascii_str_unchecked()) }
9821018
} else {
9831019
Err(AsAsciiStrError(
9841020
self.iter().take_while(|&b| b.is_ascii()).count(),
9851021
))
9861022
}
9871023
}
1024+
9881025
#[inline]
9891026
unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr {
990-
let ptr = self as *mut [u8] as *mut AsciiStr;
991-
&mut *ptr
1027+
// SAFETY: Caller guarantees `self` does not contain non-ascii characters
1028+
unsafe { &mut *(self as *mut [u8] as *mut AsciiStr) }
9921029
}
9931030
}
9941031

@@ -1005,40 +1042,52 @@ impl AsAsciiStr for str {
10051042
}
10061043
#[inline]
10071044
unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr {
1008-
self.as_bytes().as_ascii_str_unchecked()
1045+
// SAFETY: Caller guarantees `self` does not contain non-ascii characters
1046+
unsafe { self.as_bytes().as_ascii_str_unchecked() }
10091047
}
10101048
}
10111049
impl AsMutAsciiStr for str {
10121050
fn slice_ascii_mut<R>(&mut self, range: R) -> Result<&mut AsciiStr, AsAsciiStrError>
10131051
where
10141052
R: SliceIndex<[u8], Output = [u8]>,
10151053
{
1016-
let (ptr, len) = if let Some(slice) = self.as_bytes().get(range) {
1017-
if !slice.is_ascii() {
1054+
// SAFETY: We don't modify the reference in this function, and the caller may
1055+
// only modify it to include valid ascii characters.
1056+
let bytes = unsafe { self.as_bytes_mut() };
1057+
match bytes.get_mut(range) {
1058+
// Valid ascii slice
1059+
Some(slice) if slice.is_ascii() => {
1060+
// SAFETY: All bytes are ascii, so this cast is valid
1061+
let ptr = slice.as_mut_ptr() as *mut AsciiChar;
1062+
let len = slice.len();
1063+
1064+
// SAFETY: The pointer is valid for `len` elements, as it came
1065+
// from a slice.
1066+
unsafe {
1067+
let slice = core::slice::from_raw_parts_mut(ptr, len);
1068+
Ok(<&mut AsciiStr>::from(slice))
1069+
}
1070+
}
1071+
Some(slice) => {
1072+
let not_ascii_len = slice.iter().copied().take_while(u8::is_ascii).count();
10181073
let offset = slice.as_ptr() as usize - self.as_ptr() as usize;
1019-
let not_ascii = slice.iter().take_while(|&b| b.is_ascii()).count();
1020-
return Err(AsAsciiStrError(offset + not_ascii));
1074+
1075+
Err(AsAsciiStrError(offset + not_ascii_len))
10211076
}
1022-
(slice.as_ptr(), slice.len())
1023-
} else {
1024-
return Err(AsAsciiStrError(self.len()));
1025-
};
1026-
unsafe {
1027-
let ptr = ptr as *const AsciiChar as *mut AsciiChar;
1028-
let slice = core::slice::from_raw_parts_mut(ptr, len);
1029-
Ok(slice.into())
1077+
None => Err(AsAsciiStrError(self.len())),
10301078
}
10311079
}
10321080
fn as_mut_ascii_str(&mut self) -> Result<&mut AsciiStr, AsAsciiStrError> {
1033-
match self.bytes().position(|b| b > 127) {
1081+
match self.bytes().position(|b| !b.is_ascii()) {
10341082
Some(index) => Err(AsAsciiStrError(index)),
1083+
// SAFETY: All bytes were iterated, and all were ascii
10351084
None => unsafe { Ok(self.as_mut_ascii_str_unchecked()) },
10361085
}
10371086
}
10381087
#[inline]
10391088
unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr {
1040-
let ptr = self as *mut str as *mut AsciiStr;
1041-
&mut *ptr
1089+
// SAFETY: Caller guarantees `self` does not contain non-ascii characters
1090+
&mut *(self as *mut str as *mut AsciiStr)
10421091
}
10431092
}
10441093

@@ -1058,7 +1107,8 @@ impl AsAsciiStr for CStr {
10581107
}
10591108
#[inline]
10601109
unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr {
1061-
self.to_bytes().as_ascii_str_unchecked()
1110+
// SAFETY: Caller guarantees `self` does not contain non-ascii characters
1111+
unsafe { self.to_bytes().as_ascii_str_unchecked() }
10621112
}
10631113
}
10641114

0 commit comments

Comments
 (0)