@@ -52,10 +52,8 @@ pub enum EscapeError {
52
52
53
53
/// Unicode escape code in byte literal.
54
54
UnicodeEscapeInByte ,
55
- /// Non-ascii character in byte literal.
55
+ /// Non-ascii character in byte literal, byte string literal, or raw byte string literal .
56
56
NonAsciiCharInByte ,
57
- /// Non-ascii character in byte string literal.
58
- NonAsciiCharInByteString ,
59
57
60
58
/// After a line ending with '\', the next line contains whitespace
61
59
/// characters that are not skipped.
@@ -78,54 +76,33 @@ impl EscapeError {
78
76
/// Takes a contents of a literal (without quotes) and produces a
79
77
/// sequence of escaped characters or errors.
80
78
/// Values are returned through invoking of the provided callback.
81
- pub fn unescape_literal < F > ( literal_text : & str , mode : Mode , callback : & mut F )
79
+ pub fn unescape_literal < F > ( src : & str , mode : Mode , callback : & mut F )
82
80
where
83
81
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
84
82
{
85
83
match mode {
86
84
Mode :: Char | Mode :: Byte => {
87
- let mut chars = literal_text. chars ( ) ;
88
- let result = unescape_char_or_byte ( & mut chars, mode) ;
89
- // The Chars iterator moved forward.
90
- callback ( 0 ..( literal_text. len ( ) - chars. as_str ( ) . len ( ) ) , result) ;
85
+ let mut chars = src. chars ( ) ;
86
+ let res = unescape_char_or_byte ( & mut chars, mode == Mode :: Byte ) ;
87
+ callback ( 0 ..( src. len ( ) - chars. as_str ( ) . len ( ) ) , res) ;
91
88
}
92
- Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( literal_text, mode, callback) ,
93
- // NOTE: Raw strings do not perform any explicit character escaping, here we
94
- // only translate CRLF to LF and produce errors on bare CR.
89
+ Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( src, mode == Mode :: ByteStr , callback) ,
95
90
Mode :: RawStr | Mode :: RawByteStr => {
96
- unescape_raw_str_or_raw_byte_str ( literal_text , mode, callback)
91
+ unescape_raw_str_or_raw_byte_str ( src , mode == Mode :: RawByteStr , callback)
97
92
}
98
93
}
99
94
}
100
95
101
- /// Takes a contents of a byte, byte string or raw byte string (without quotes)
102
- /// and produces a sequence of bytes or errors.
103
- /// Values are returned through invoking of the provided callback.
104
- pub fn unescape_byte_literal < F > ( literal_text : & str , mode : Mode , callback : & mut F )
105
- where
106
- F : FnMut ( Range < usize > , Result < u8 , EscapeError > ) ,
107
- {
108
- debug_assert ! ( mode. is_bytes( ) ) ;
109
- unescape_literal ( literal_text, mode, & mut |range, result| {
110
- callback ( range, result. map ( byte_from_char) ) ;
111
- } )
112
- }
113
-
114
96
/// Takes a contents of a char literal (without quotes), and returns an
115
- /// unescaped char or an error
116
- pub fn unescape_char ( literal_text : & str ) -> Result < char , ( usize , EscapeError ) > {
117
- let mut chars = literal_text. chars ( ) ;
118
- unescape_char_or_byte ( & mut chars, Mode :: Char )
119
- . map_err ( |err| ( literal_text. len ( ) - chars. as_str ( ) . len ( ) , err) )
97
+ /// unescaped char or an error.
98
+ pub fn unescape_char ( src : & str ) -> Result < char , EscapeError > {
99
+ unescape_char_or_byte ( & mut src. chars ( ) , false )
120
100
}
121
101
122
102
/// Takes a contents of a byte literal (without quotes), and returns an
123
103
/// unescaped byte or an error.
124
- pub fn unescape_byte ( literal_text : & str ) -> Result < u8 , ( usize , EscapeError ) > {
125
- let mut chars = literal_text. chars ( ) ;
126
- unescape_char_or_byte ( & mut chars, Mode :: Byte )
127
- . map ( byte_from_char)
128
- . map_err ( |err| ( literal_text. len ( ) - chars. as_str ( ) . len ( ) , err) )
104
+ pub fn unescape_byte ( src : & str ) -> Result < u8 , EscapeError > {
105
+ unescape_char_or_byte ( & mut src. chars ( ) , true ) . map ( byte_from_char)
129
106
}
130
107
131
108
/// What kind of literal do we parse.
@@ -147,20 +124,17 @@ impl Mode {
147
124
}
148
125
}
149
126
150
- pub fn is_bytes ( self ) -> bool {
127
+ pub fn is_byte ( self ) -> bool {
151
128
match self {
152
129
Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
153
130
Mode :: Char | Mode :: Str | Mode :: RawStr => false ,
154
131
}
155
132
}
156
133
}
157
134
158
- fn scan_escape ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
135
+ fn scan_escape ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
159
136
// Previous character was '\\', unescape what follows.
160
-
161
- let second_char = chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ?;
162
-
163
- let res = match second_char {
137
+ let res = match chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ? {
164
138
'"' => '"' ,
165
139
'n' => '\n' ,
166
140
'r' => '\r' ,
@@ -181,7 +155,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
181
155
let value = hi * 16 + lo;
182
156
183
157
// For a non-byte literal verify that it is within ASCII range.
184
- if !mode . is_bytes ( ) && !is_ascii ( value) {
158
+ if !is_byte && !is_ascii ( value) {
185
159
return Err ( EscapeError :: OutOfRangeHexEscape ) ;
186
160
}
187
161
let value = value as u8 ;
@@ -217,7 +191,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
217
191
218
192
// Incorrect syntax has higher priority for error reporting
219
193
// than unallowed value for a literal.
220
- if mode . is_bytes ( ) {
194
+ if is_byte {
221
195
return Err ( EscapeError :: UnicodeEscapeInByte ) ;
222
196
}
223
197
@@ -249,23 +223,22 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
249
223
}
250
224
251
225
#[ inline]
252
- fn ascii_check ( first_char : char , mode : Mode ) -> Result < char , EscapeError > {
253
- if mode . is_bytes ( ) && !first_char . is_ascii ( ) {
226
+ fn ascii_check ( c : char , is_byte : bool ) -> Result < char , EscapeError > {
227
+ if is_byte && !c . is_ascii ( ) {
254
228
// Byte literal can't be a non-ascii character.
255
229
Err ( EscapeError :: NonAsciiCharInByte )
256
230
} else {
257
- Ok ( first_char )
231
+ Ok ( c )
258
232
}
259
233
}
260
234
261
- fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
262
- debug_assert ! ( mode == Mode :: Char || mode == Mode :: Byte ) ;
263
- let first_char = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
264
- let res = match first_char {
265
- '\\' => scan_escape ( chars, mode) ,
235
+ fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
236
+ let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
237
+ let res = match c {
238
+ '\\' => scan_escape ( chars, is_byte) ,
266
239
'\n' | '\t' | '\'' => Err ( EscapeError :: EscapeOnlyChar ) ,
267
240
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
268
- _ => ascii_check ( first_char , mode ) ,
241
+ _ => ascii_check ( c , is_byte ) ,
269
242
} ?;
270
243
if chars. next ( ) . is_some ( ) {
271
244
return Err ( EscapeError :: MoreThanOneChar ) ;
@@ -275,20 +248,20 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca
275
248
276
249
/// Takes a contents of a string literal (without quotes) and produces a
277
250
/// sequence of escaped characters or errors.
278
- fn unescape_str_or_byte_str < F > ( src : & str , mode : Mode , callback : & mut F )
251
+ fn unescape_str_or_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
279
252
where
280
253
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
281
254
{
282
- debug_assert ! ( mode == Mode :: Str || mode == Mode :: ByteStr ) ;
283
- let initial_len = src. len ( ) ;
284
255
let mut chars = src. chars ( ) ;
285
- while let Some ( first_char) = chars. next ( ) {
286
- let start = initial_len - chars. as_str ( ) . len ( ) - first_char. len_utf8 ( ) ;
287
256
288
- let unescaped_char = match first_char {
257
+ // The `start` and `end` computation here is complicated because
258
+ // `skip_ascii_whitespace` makes us to skip over chars without counting
259
+ // them in the range computation.
260
+ while let Some ( c) = chars. next ( ) {
261
+ let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
262
+ let res = match c {
289
263
'\\' => {
290
- let second_char = chars. clone ( ) . next ( ) ;
291
- match second_char {
264
+ match chars. clone ( ) . next ( ) {
292
265
Some ( '\n' ) => {
293
266
// Rust language specification requires us to skip whitespaces
294
267
// if unescaped '\' character is followed by '\n'.
@@ -297,17 +270,17 @@ where
297
270
skip_ascii_whitespace ( & mut chars, start, callback) ;
298
271
continue ;
299
272
}
300
- _ => scan_escape ( & mut chars, mode ) ,
273
+ _ => scan_escape ( & mut chars, is_byte ) ,
301
274
}
302
275
}
303
276
'\n' => Ok ( '\n' ) ,
304
277
'\t' => Ok ( '\t' ) ,
305
278
'"' => Err ( EscapeError :: EscapeOnlyChar ) ,
306
279
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
307
- _ => ascii_check ( first_char , mode ) ,
280
+ _ => ascii_check ( c , is_byte ) ,
308
281
} ;
309
- let end = initial_len - chars. as_str ( ) . len ( ) ;
310
- callback ( start..end, unescaped_char ) ;
282
+ let end = src . len ( ) - chars. as_str ( ) . len ( ) ;
283
+ callback ( start..end, res ) ;
311
284
}
312
285
313
286
fn skip_ascii_whitespace < F > ( chars : & mut Chars < ' _ > , start : usize , callback : & mut F )
@@ -340,30 +313,29 @@ where
340
313
/// Takes a contents of a string literal (without quotes) and produces a
341
314
/// sequence of characters or errors.
342
315
/// NOTE: Raw strings do not perform any explicit character escaping, here we
343
- /// only translate CRLF to LF and produce errors on bare CR.
344
- fn unescape_raw_str_or_raw_byte_str < F > ( literal_text : & str , mode : Mode , callback : & mut F )
316
+ /// only produce errors on bare CR.
317
+ fn unescape_raw_str_or_raw_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
345
318
where
346
319
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
347
320
{
348
- debug_assert ! ( mode == Mode :: RawStr || mode == Mode :: RawByteStr ) ;
349
- let initial_len = literal_text. len ( ) ;
350
-
351
- let mut chars = literal_text. chars ( ) ;
352
- while let Some ( curr) = chars. next ( ) {
353
- let start = initial_len - chars. as_str ( ) . len ( ) - curr. len_utf8 ( ) ;
321
+ let mut chars = src. chars ( ) ;
354
322
355
- let result = match curr {
323
+ // The `start` and `end` computation here matches the one in
324
+ // `unescape_str_or_byte_str` for consistency, even though this function
325
+ // doesn't have to worry about skipping any chars.
326
+ while let Some ( c) = chars. next ( ) {
327
+ let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
328
+ let res = match c {
356
329
'\r' => Err ( EscapeError :: BareCarriageReturnInRawString ) ,
357
- c if mode. is_bytes ( ) && !c. is_ascii ( ) => Err ( EscapeError :: NonAsciiCharInByteString ) ,
358
- c => Ok ( c) ,
330
+ _ => ascii_check ( c, is_byte) ,
359
331
} ;
360
- let end = initial_len - chars. as_str ( ) . len ( ) ;
361
-
362
- callback ( start..end, result) ;
332
+ let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
333
+ callback ( start..end, res) ;
363
334
}
364
335
}
365
336
366
- fn byte_from_char ( c : char ) -> u8 {
337
+ #[ inline]
338
+ pub fn byte_from_char ( c : char ) -> u8 {
367
339
let res = c as u32 ;
368
340
debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of Mode::ByteStr" ) ;
369
341
res as u8
0 commit comments