Skip to content

Commit 735ac05

Browse files
committedJun 9, 2019
Actually translate CRLF in raw byte strings and unify unescape impl
1 parent 3c1d352 commit 735ac05

File tree

3 files changed

+69
-25
lines changed

3 files changed

+69
-25
lines changed
 

‎src/libsyntax/parse/lexer/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1348,7 +1348,7 @@ impl<'a> StringReader<'a> {
13481348

13491349
fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
13501350
self.with_str_from_to(content_start, content_end, |lit: &str| {
1351-
unescape::unescape_raw_str(lit, unescape::Mode::Str, &mut |range, c| {
1351+
unescape::unescape_raw_str(lit, &mut |range, c| {
13521352
if let Err(err) = c {
13531353
emit_unescape_error(
13541354
&self.sess.span_diagnostic,
@@ -1365,7 +1365,7 @@ impl<'a> StringReader<'a> {
13651365

13661366
fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
13671367
self.with_str_from_to(content_start, content_end, |lit: &str| {
1368-
unescape::unescape_raw_str(lit, unescape::Mode::ByteStr, &mut |range, c| {
1368+
unescape::unescape_raw_byte_str(lit, &mut |range, c| {
13691369
if let Err(err) = c {
13701370
emit_unescape_error(
13711371
&self.sess.span_diagnostic,

‎src/libsyntax/parse/literal.rs

+23-3
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ use crate::ast::{self, Lit, LitKind};
44
use crate::parse::parser::Parser;
55
use crate::parse::PResult;
66
use crate::parse::token::{self, Token, TokenKind};
7-
use crate::parse::unescape::{self, unescape_str, unescape_byte_str, unescape_raw_str};
87
use crate::parse::unescape::{unescape_char, unescape_byte};
8+
use crate::parse::unescape::{unescape_str, unescape_byte_str};
9+
use crate::parse::unescape::{unescape_raw_str, unescape_raw_byte_str};
910
use crate::print::pprust;
1011
use crate::symbol::{kw, sym, Symbol};
1112
use crate::tokenstream::{TokenStream, TokenTree};
@@ -144,7 +145,7 @@ impl LitKind {
144145
let symbol = if s.contains('\r') {
145146
let mut buf = String::with_capacity(s.len());
146147
let mut error = Ok(());
147-
unescape_raw_str(&s, unescape::Mode::Str, &mut |_, unescaped_char| {
148+
unescape_raw_str(&s, &mut |_, unescaped_char| {
148149
match unescaped_char {
149150
Ok(c) => buf.push(c),
150151
Err(_) => error = Err(LitError::LexerError),
@@ -172,7 +173,26 @@ impl LitKind {
172173
buf.shrink_to_fit();
173174
LitKind::ByteStr(Lrc::new(buf))
174175
}
175-
token::ByteStrRaw(_) => LitKind::ByteStr(Lrc::new(symbol.to_string().into_bytes())),
176+
token::ByteStrRaw(_) => {
177+
let s = symbol.as_str();
178+
let bytes = if s.contains('\r') {
179+
let mut buf = Vec::with_capacity(s.len());
180+
let mut error = Ok(());
181+
unescape_raw_byte_str(&s, &mut |_, unescaped_byte| {
182+
match unescaped_byte {
183+
Ok(c) => buf.push(c),
184+
Err(_) => error = Err(LitError::LexerError),
185+
}
186+
});
187+
error?;
188+
buf.shrink_to_fit();
189+
buf
190+
} else {
191+
symbol.to_string().into_bytes()
192+
};
193+
194+
LitKind::ByteStr(Lrc::new(bytes))
195+
},
176196
token::Err => LitKind::Err(symbol),
177197
})
178198
}

‎src/libsyntax/parse/unescape.rs

+44-20
Original file line numberDiff line numberDiff line change
@@ -71,29 +71,24 @@ where
7171
/// sequence of characters or errors.
7272
/// NOTE: Raw strings do not perform any explicit character escaping, here we
7373
/// only translate CRLF to LF and produce errors on bare CR.
74-
pub(crate) fn unescape_raw_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
74+
pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
7575
where
7676
F: FnMut(Range<usize>, Result<char, EscapeError>),
7777
{
78-
let mut byte_offset: usize = 0;
78+
unescape_raw_str_or_byte_str(literal_text, Mode::Str, callback)
79+
}
7980

80-
let mut chars = literal_text.chars().peekable();
81-
while let Some(curr) = chars.next() {
82-
let (result, scanned) = match (curr, chars.peek()) {
83-
('\r', Some('\n')) => {
84-
chars.next();
85-
(Ok('\n'), [Some('\r'), Some('\n')])
86-
},
87-
('\r', _) =>
88-
(Err(EscapeError::BareCarriageReturn), [Some('\r'), None]),
89-
(c, _) if mode.is_bytes() && c > '\x7F' =>
90-
(Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]),
91-
(c, _) => (Ok(c), [Some(c), None]),
92-
};
93-
let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum();
94-
callback(byte_offset..(byte_offset + len_utf8), result);
95-
byte_offset += len_utf8;
96-
}
81+
/// Takes a contents of a string literal (without quotes) and produces a
82+
/// sequence of characters or errors.
83+
/// NOTE: Raw strings do not perform any explicit character escaping, here we
84+
/// only translate CRLF to LF and produce errors on bare CR.
85+
pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
86+
where
87+
F: FnMut(Range<usize>, Result<u8, EscapeError>),
88+
{
89+
unescape_raw_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
90+
callback(range, char.map(byte_from_char))
91+
})
9792
}
9893

9994
#[derive(Debug, Clone, Copy)]
@@ -284,9 +279,38 @@ where
284279
}
285280
}
286281

282+
/// Takes a contents of a string literal (without quotes) and produces a
283+
/// sequence of characters or errors.
284+
/// NOTE: Raw strings do not perform any explicit character escaping, here we
285+
/// only translate CRLF to LF and produce errors on bare CR.
286+
fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
287+
where
288+
F: FnMut(Range<usize>, Result<char, EscapeError>),
289+
{
290+
let mut byte_offset: usize = 0;
291+
292+
let mut chars = literal_text.chars().peekable();
293+
while let Some(curr) = chars.next() {
294+
let (result, scanned) = match (curr, chars.peek()) {
295+
('\r', Some('\n')) => {
296+
chars.next();
297+
(Ok('\n'), [Some('\r'), Some('\n')])
298+
},
299+
('\r', _) =>
300+
(Err(EscapeError::BareCarriageReturn), [Some('\r'), None]),
301+
(c, _) if mode.is_bytes() && !c.is_ascii() =>
302+
(Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]),
303+
(c, _) => (Ok(c), [Some(c), None]),
304+
};
305+
let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum();
306+
callback(byte_offset..(byte_offset + len_utf8), result);
307+
byte_offset += len_utf8;
308+
}
309+
}
310+
287311
fn byte_from_char(c: char) -> u8 {
288312
let res = c as u32;
289-
assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte");
313+
assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte(Str)");
290314
res as u8
291315
}
292316

0 commit comments

Comments
 (0)