1
1
// ignore-tidy-filelength
2
- // ignore-tidy-undocumented-unsafe
3
2
4
3
//! String manipulation.
5
4
//!
@@ -337,6 +336,7 @@ impl Utf8Error {
337
336
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
338
337
pub fn from_utf8 ( v : & [ u8 ] ) -> Result < & str , Utf8Error > {
339
338
run_utf8_validation ( v) ?;
339
+ // SAFETY: just ran validation
340
340
Ok ( unsafe { from_utf8_unchecked ( v) } )
341
341
}
342
342
@@ -375,6 +375,7 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
375
375
#[ stable( feature = "str_mut_extras" , since = "1.20.0" ) ]
376
376
pub fn from_utf8_mut ( v : & mut [ u8 ] ) -> Result < & mut str , Utf8Error > {
377
377
run_utf8_validation ( v) ?;
378
+ // SAFETY: just ran validation
378
379
Ok ( unsafe { from_utf8_unchecked_mut ( v) } )
379
380
}
380
381
@@ -567,7 +568,7 @@ impl<'a> Iterator for Chars<'a> {
567
568
#[ inline]
568
569
fn next ( & mut self ) -> Option < char > {
569
570
next_code_point ( & mut self . iter ) . map ( |ch| {
570
- // str invariant says `ch` is a valid Unicode Scalar Value
571
+ // SAFETY: str invariant says `ch` is a valid Unicode Scalar Value
571
572
unsafe {
572
573
char:: from_u32_unchecked ( ch)
573
574
}
@@ -616,7 +617,7 @@ impl<'a> DoubleEndedIterator for Chars<'a> {
616
617
#[ inline]
617
618
fn next_back ( & mut self ) -> Option < char > {
618
619
next_code_point_reverse ( & mut self . iter ) . map ( |ch| {
619
- // str invariant says `ch` is a valid Unicode Scalar Value
620
+ // SAFETY: str invariant says `ch` is a valid Unicode Scalar Value
620
621
unsafe {
621
622
char:: from_u32_unchecked ( ch)
622
623
}
@@ -648,6 +649,7 @@ impl<'a> Chars<'a> {
648
649
#[ stable( feature = "iter_to_slice" , since = "1.4.0" ) ]
649
650
#[ inline]
650
651
pub fn as_str ( & self ) -> & ' a str {
652
+ // SAFETY: Chars is only made from a str, which guarantees the iter is valid utf8
651
653
unsafe { from_utf8_unchecked ( self . iter . as_slice ( ) ) }
652
654
}
653
655
}
@@ -1080,6 +1082,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1080
1082
fn get_end ( & mut self ) -> Option < & ' a str > {
1081
1083
if !self . finished && ( self . allow_trailing_empty || self . end - self . start > 0 ) {
1082
1084
self . finished = true ;
1085
+ // SAFETY: self.start and self.end always lie on unicode boudaries
1083
1086
unsafe {
1084
1087
let string = self . matcher . haystack ( ) . get_unchecked ( self . start ..self . end ) ;
1085
1088
Some ( string)
@@ -1095,6 +1098,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1095
1098
1096
1099
let haystack = self . matcher . haystack ( ) ;
1097
1100
match self . matcher . next_match ( ) {
1101
+ // SAFETY: Searcher guarantees that a and b lie on unicode boundaries
1098
1102
Some ( ( a, b) ) => unsafe {
1099
1103
let elt = haystack. get_unchecked ( self . start ..a) ;
1100
1104
self . start = b;
@@ -1120,11 +1124,13 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1120
1124
1121
1125
let haystack = self . matcher . haystack ( ) ;
1122
1126
match self . matcher . next_match_back ( ) {
1127
+ // SAFETY: Searcher guarantees that a and b lie on unicode boundaries
1123
1128
Some ( ( a, b) ) => unsafe {
1124
1129
let elt = haystack. get_unchecked ( b..self . end ) ;
1125
1130
self . end = a;
1126
1131
Some ( elt)
1127
1132
} ,
1133
+ // SAFETY: self.start and self.end always lie on unicode boudaries
1128
1134
None => unsafe {
1129
1135
self . finished = true ;
1130
1136
Some ( haystack. get_unchecked ( self . start ..self . end ) )
@@ -1253,6 +1259,7 @@ where
1253
1259
impl < ' a , P : Pattern < ' a > > MatchIndicesInternal < ' a , P > {
1254
1260
#[ inline]
1255
1261
fn next ( & mut self ) -> Option < ( usize , & ' a str ) > {
1262
+ // SAFETY: Searcher guarantees that start and end lie on unicode boundaries
1256
1263
self . 0 . next_match ( ) . map ( |( start, end) | unsafe {
1257
1264
( start, self . 0 . haystack ( ) . get_unchecked ( start..end) )
1258
1265
} )
@@ -1262,6 +1269,7 @@ impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1262
1269
fn next_back ( & mut self ) -> Option < ( usize , & ' a str ) >
1263
1270
where P :: Searcher : ReverseSearcher < ' a >
1264
1271
{
1272
+ // SAFETY: Searcher guarantees that start and end lie on unicode boundaries
1265
1273
self . 0 . next_match_back ( ) . map ( |( start, end) | unsafe {
1266
1274
( start, self . 0 . haystack ( ) . get_unchecked ( start..end) )
1267
1275
} )
@@ -1307,6 +1315,7 @@ where
1307
1315
impl < ' a , P : Pattern < ' a > > MatchesInternal < ' a , P > {
1308
1316
#[ inline]
1309
1317
fn next ( & mut self ) -> Option < & ' a str > {
1318
+ // SAFETY: Searcher guarantees that start and end lie on unicode boundaries
1310
1319
self . 0 . next_match ( ) . map ( |( a, b) | unsafe {
1311
1320
// Indices are known to be on utf8 boundaries
1312
1321
self . 0 . haystack ( ) . get_unchecked ( a..b)
@@ -1317,6 +1326,7 @@ impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1317
1326
fn next_back ( & mut self ) -> Option < & ' a str >
1318
1327
where P :: Searcher : ReverseSearcher < ' a >
1319
1328
{
1329
+ // SAFETY: Searcher guarantees that start and end lie on unicode boundaries
1320
1330
self . 0 . next_match_back ( ) . map ( |( a, b) | unsafe {
1321
1331
// Indices are known to be on utf8 boundaries
1322
1332
self . 0 . haystack ( ) . get_unchecked ( a..b)
@@ -1538,6 +1548,9 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1538
1548
if align != usize:: max_value ( ) && align. wrapping_sub ( index) % usize_bytes == 0 {
1539
1549
let ptr = v. as_ptr ( ) ;
1540
1550
while index < blocks_end {
1551
+ // SAFETY: since align - index and ascii_block_size are multiples of
1552
+ // usize_bytes, ptr.add(index) is always aligned with a usize so we may cast
1553
+ // directly to a const pointer.
1541
1554
unsafe {
1542
1555
let block = ptr. add ( index) as * const usize ;
1543
1556
// break if there is a nonascii byte
@@ -1760,6 +1773,7 @@ mod traits {
1760
1773
if self . start <= self . end &&
1761
1774
slice. is_char_boundary ( self . start ) &&
1762
1775
slice. is_char_boundary ( self . end ) {
1776
+ // SAFETY: just checked that start and end are on a char boundary
1763
1777
Some ( unsafe { self . get_unchecked ( slice) } )
1764
1778
} else {
1765
1779
None
@@ -1770,6 +1784,7 @@ mod traits {
1770
1784
if self . start <= self . end &&
1771
1785
slice. is_char_boundary ( self . start ) &&
1772
1786
slice. is_char_boundary ( self . end ) {
1787
+ // SAFETY: just checked that start and end are on a char boundary
1773
1788
Some ( unsafe { self . get_unchecked_mut ( slice) } )
1774
1789
} else {
1775
1790
None
@@ -1799,6 +1814,7 @@ mod traits {
1799
1814
if self . start <= self . end &&
1800
1815
slice. is_char_boundary ( self . start ) &&
1801
1816
slice. is_char_boundary ( self . end ) {
1817
+ // SAFETY: just checked that start and end are on a char boundary
1802
1818
unsafe { self . get_unchecked_mut ( slice) }
1803
1819
} else {
1804
1820
super :: slice_error_fail ( slice, self . start , self . end )
@@ -1827,6 +1843,7 @@ mod traits {
1827
1843
#[ inline]
1828
1844
fn get ( self , slice : & str ) -> Option < & Self :: Output > {
1829
1845
if slice. is_char_boundary ( self . end ) {
1846
+ // SAFETY: just checked that end is on a char boundary
1830
1847
Some ( unsafe { self . get_unchecked ( slice) } )
1831
1848
} else {
1832
1849
None
@@ -1835,6 +1852,7 @@ mod traits {
1835
1852
#[ inline]
1836
1853
fn get_mut ( self , slice : & mut str ) -> Option < & mut Self :: Output > {
1837
1854
if slice. is_char_boundary ( self . end ) {
1855
+ // SAFETY: just checked that end is on a char boundary
1838
1856
Some ( unsafe { self . get_unchecked_mut ( slice) } )
1839
1857
} else {
1840
1858
None
@@ -1857,8 +1875,8 @@ mod traits {
1857
1875
}
1858
1876
#[ inline]
1859
1877
fn index_mut ( self , slice : & mut str ) -> & mut Self :: Output {
1860
- // is_char_boundary checks that the index is in [0, .len()]
1861
1878
if slice. is_char_boundary ( self . end ) {
1879
+ // SAFETY: just checked that end is on a char boundary
1862
1880
unsafe { self . get_unchecked_mut ( slice) }
1863
1881
} else {
1864
1882
super :: slice_error_fail ( slice, 0 , self . end )
@@ -1888,6 +1906,7 @@ mod traits {
1888
1906
#[ inline]
1889
1907
fn get ( self , slice : & str ) -> Option < & Self :: Output > {
1890
1908
if slice. is_char_boundary ( self . start ) {
1909
+ // SAFETY: just checked that start is on a char boundary
1891
1910
Some ( unsafe { self . get_unchecked ( slice) } )
1892
1911
} else {
1893
1912
None
@@ -1896,6 +1915,7 @@ mod traits {
1896
1915
#[ inline]
1897
1916
fn get_mut ( self , slice : & mut str ) -> Option < & mut Self :: Output > {
1898
1917
if slice. is_char_boundary ( self . start ) {
1918
+ // SAFETY: just checked that start is on a char boundary
1899
1919
Some ( unsafe { self . get_unchecked_mut ( slice) } )
1900
1920
} else {
1901
1921
None
@@ -1920,8 +1940,8 @@ mod traits {
1920
1940
}
1921
1941
#[ inline]
1922
1942
fn index_mut ( self , slice : & mut str ) -> & mut Self :: Output {
1923
- // is_char_boundary checks that the index is in [0, .len()]
1924
1943
if slice. is_char_boundary ( self . start ) {
1944
+ // SAFETY: just checked that start is on a char boundary
1925
1945
unsafe { self . get_unchecked_mut ( slice) }
1926
1946
} else {
1927
1947
super :: slice_error_fail ( slice, self . start , slice. len ( ) )
@@ -2167,7 +2187,6 @@ impl str {
2167
2187
/// ```
2168
2188
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
2169
2189
#[ inline( always) ]
2170
- // SAFETY: const sound because we transmute two types with the same layout
2171
2190
#[ allow( unused_attributes) ]
2172
2191
#[ allow_internal_unstable( const_fn_union) ]
2173
2192
pub const fn as_bytes ( & self ) -> & [ u8 ] {
@@ -2176,6 +2195,7 @@ impl str {
2176
2195
str : & ' a str ,
2177
2196
slice : & ' a [ u8 ] ,
2178
2197
}
2198
+ // SAFETY: const sound because we transmute two types with the same layout
2179
2199
unsafe { Slices { str : self } . slice }
2180
2200
}
2181
2201
@@ -2501,6 +2521,7 @@ impl str {
2501
2521
pub fn split_at ( & self , mid : usize ) -> ( & str , & str ) {
2502
2522
// is_char_boundary checks that the index is in [0, .len()]
2503
2523
if self . is_char_boundary ( mid) {
2524
+ // SAFETY: just checked that mid is on a char boundary
2504
2525
unsafe {
2505
2526
( self . get_unchecked ( 0 ..mid) ,
2506
2527
self . get_unchecked ( mid..self . len ( ) ) )
@@ -2548,6 +2569,7 @@ impl str {
2548
2569
if self . is_char_boundary ( mid) {
2549
2570
let len = self . len ( ) ;
2550
2571
let ptr = self . as_mut_ptr ( ) ;
2572
+ // SAFETY: just checked that mid is on a char boundary
2551
2573
unsafe {
2552
2574
( from_utf8_unchecked_mut ( slice:: from_raw_parts_mut ( ptr, mid) ) ,
2553
2575
from_utf8_unchecked_mut ( slice:: from_raw_parts_mut (
@@ -3746,8 +3768,8 @@ impl str {
3746
3768
if let Some ( ( _, b) ) = matcher. next_reject_back ( ) {
3747
3769
j = b;
3748
3770
}
3771
+ // SAFETY: Searcher is known to return valid indices
3749
3772
unsafe {
3750
- // Searcher is known to return valid indices
3751
3773
self . get_unchecked ( i..j)
3752
3774
}
3753
3775
}
@@ -3785,8 +3807,8 @@ impl str {
3785
3807
if let Some ( ( a, _) ) = matcher. next_reject ( ) {
3786
3808
i = a;
3787
3809
}
3810
+ // SAFETY: Searcher is known to return valid indices
3788
3811
unsafe {
3789
- // Searcher is known to return valid indices
3790
3812
self . get_unchecked ( i..self . len ( ) )
3791
3813
}
3792
3814
}
@@ -3833,8 +3855,8 @@ impl str {
3833
3855
if let Some ( ( _, b) ) = matcher. next_reject_back ( ) {
3834
3856
j = b;
3835
3857
}
3858
+ // SAFETY: Searcher is known to return valid indices
3836
3859
unsafe {
3837
- // Searcher is known to return valid indices
3838
3860
self . get_unchecked ( 0 ..j)
3839
3861
}
3840
3862
}
@@ -4029,6 +4051,7 @@ impl str {
4029
4051
/// ```
4030
4052
#[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
4031
4053
pub fn make_ascii_uppercase ( & mut self ) {
4054
+ // SAFETY: safe because we transmute two types with the same layout
4032
4055
let me = unsafe { self . as_bytes_mut ( ) } ;
4033
4056
me. make_ascii_uppercase ( )
4034
4057
}
@@ -4054,6 +4077,7 @@ impl str {
4054
4077
/// ```
4055
4078
#[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
4056
4079
pub fn make_ascii_lowercase ( & mut self ) {
4080
+ // SAFETY: safe because we transmute two types with the same layout
4057
4081
let me = unsafe { self . as_bytes_mut ( ) } ;
4058
4082
me. make_ascii_lowercase ( )
4059
4083
}
@@ -4216,6 +4240,7 @@ impl Default for &str {
4216
4240
#[ stable( feature = "default_mut_str" , since = "1.28.0" ) ]
4217
4241
impl Default for & mut str {
4218
4242
/// Creates an empty mutable str
4243
+ // SAFETY: str is guranteed to be utf8
4219
4244
fn default ( ) -> Self { unsafe { from_utf8_unchecked_mut ( & mut [ ] ) } }
4220
4245
}
4221
4246
@@ -4270,6 +4295,7 @@ impl_fn_for_zst! {
4270
4295
4271
4296
#[ derive( Clone ) ]
4272
4297
struct UnsafeBytesToStr impl <' a> Fn = |bytes: & ' a [ u8 ] | -> & ' a str {
4298
+ // SAFETY: not safe
4273
4299
unsafe { from_utf8_unchecked( bytes) }
4274
4300
} ;
4275
4301
}
0 commit comments