Skip to content

Commit 6262b60

Browse files
committed
Auto merge of #12136 - jonas-schievink:lexedstr-converter, r=jonas-schievink
internal: Add a `Converter` type for token conversion Makes it easier to produce multiple tokens from a single rustc token, if that's how we want to approach #1109
2 parents f83dccf + 1f50e19 commit 6262b60

File tree

1 file changed

+168
-143
lines changed

1 file changed

+168
-143
lines changed

crates/parser/src/lexed_str.rs

+168-143
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,19 @@ struct LexError {
2929

3030
impl<'a> LexedStr<'a> {
3131
pub fn new(text: &'a str) -> LexedStr<'a> {
32-
let mut res = LexedStr { text, kind: Vec::new(), start: Vec::new(), error: Vec::new() };
33-
34-
let mut offset = 0;
32+
let mut conv = Converter::new(text);
3533
if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
36-
res.push(SHEBANG, offset);
37-
offset = shebang_len
34+
conv.res.push(SHEBANG, conv.offset);
35+
conv.offset = shebang_len;
3836
};
39-
for token in rustc_lexer::tokenize(&text[offset..]) {
40-
let token_text = &text[offset..][..token.len];
4137

42-
let (kind, err) = from_rustc(&token.kind, token_text);
43-
res.push(kind, offset);
44-
offset += token.len;
38+
for token in rustc_lexer::tokenize(&text[conv.offset..]) {
39+
let token_text = &text[conv.offset..][..token.len];
4540

46-
if let Some(err) = err {
47-
let token = res.len() as u32;
48-
let msg = err.to_string();
49-
res.error.push(LexError { msg, token });
50-
}
41+
conv.extend_token(&token.kind, token_text);
5142
}
52-
res.push(EOF, offset);
5343

54-
res
44+
conv.finalize_with_eof()
5545
}
5646

5747
pub fn single_token(text: &'a str) -> Option<(SyntaxKind, Option<String>)> {
@@ -64,8 +54,12 @@ impl<'a> LexedStr<'a> {
6454
return None;
6555
}
6656

67-
let (kind, err) = from_rustc(&token.kind, text);
68-
Some((kind, err.map(|it| it.to_owned())))
57+
let mut conv = Converter::new(text);
58+
conv.extend_token(&token.kind, text);
59+
match &*conv.res.kind {
60+
[kind] => Some((*kind, conv.res.error.pop().map(|it| it.msg.clone()))),
61+
_ => None,
62+
}
6963
}
7064

7165
pub fn as_str(&self) -> &str {
@@ -128,148 +122,179 @@ impl<'a> LexedStr<'a> {
128122
}
129123
}
130124

131-
/// Returns `SyntaxKind` and an optional tokenize error message.
132-
fn from_rustc(
133-
kind: &rustc_lexer::TokenKind,
134-
token_text: &str,
135-
) -> (SyntaxKind, Option<&'static str>) {
136-
// A note on an intended tradeoff:
137-
// We drop some useful information here (see patterns with double dots `..`)
138-
// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
139-
// being `u16` that come from `rowan::SyntaxKind`.
140-
let mut err = "";
141-
142-
let syntax_kind = {
143-
match kind {
144-
rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT,
145-
rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated } => {
146-
if !terminated {
147-
err = "Missing trailing `*/` symbols to terminate the block comment";
125+
struct Converter<'a> {
126+
res: LexedStr<'a>,
127+
offset: usize,
128+
}
129+
130+
impl<'a> Converter<'a> {
131+
fn new(text: &'a str) -> Self {
132+
Self {
133+
res: LexedStr { text, kind: Vec::new(), start: Vec::new(), error: Vec::new() },
134+
offset: 0,
135+
}
136+
}
137+
138+
fn finalize_with_eof(mut self) -> LexedStr<'a> {
139+
self.res.push(EOF, self.offset);
140+
self.res
141+
}
142+
143+
fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {
144+
self.res.push(kind, self.offset);
145+
self.offset += len;
146+
147+
if let Some(err) = err {
148+
let token = self.res.len() as u32;
149+
let msg = err.to_string();
150+
self.res.error.push(LexError { msg, token });
151+
}
152+
}
153+
154+
fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, token_text: &str) {
155+
// A note on an intended tradeoff:
156+
// We drop some useful information here (see patterns with double dots `..`)
157+
// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
158+
// being `u16` that come from `rowan::SyntaxKind`.
159+
let mut err = "";
160+
161+
let syntax_kind = {
162+
match kind {
163+
rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT,
164+
rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated } => {
165+
if !terminated {
166+
err = "Missing trailing `*/` symbols to terminate the block comment";
167+
}
168+
COMMENT
148169
}
149-
COMMENT
150-
}
151170

152-
rustc_lexer::TokenKind::Whitespace => WHITESPACE,
171+
rustc_lexer::TokenKind::Whitespace => WHITESPACE,
153172

154-
rustc_lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE,
155-
rustc_lexer::TokenKind::Ident => SyntaxKind::from_keyword(token_text).unwrap_or(IDENT),
173+
rustc_lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE,
174+
rustc_lexer::TokenKind::Ident => {
175+
SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
176+
}
156177

157-
rustc_lexer::TokenKind::RawIdent => IDENT,
158-
rustc_lexer::TokenKind::Literal { kind, .. } => return from_rustc_literal(kind),
178+
rustc_lexer::TokenKind::RawIdent => IDENT,
179+
rustc_lexer::TokenKind::Literal { kind, .. } => {
180+
self.extend_literal(token_text.len(), kind);
181+
return;
182+
}
159183

160-
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
161-
if *starts_with_number {
162-
err = "Lifetime name cannot start with a number";
184+
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
185+
if *starts_with_number {
186+
err = "Lifetime name cannot start with a number";
187+
}
188+
LIFETIME_IDENT
163189
}
164-
LIFETIME_IDENT
165-
}
166190

167-
rustc_lexer::TokenKind::Semi => T![;],
168-
rustc_lexer::TokenKind::Comma => T![,],
169-
rustc_lexer::TokenKind::Dot => T![.],
170-
rustc_lexer::TokenKind::OpenParen => T!['('],
171-
rustc_lexer::TokenKind::CloseParen => T![')'],
172-
rustc_lexer::TokenKind::OpenBrace => T!['{'],
173-
rustc_lexer::TokenKind::CloseBrace => T!['}'],
174-
rustc_lexer::TokenKind::OpenBracket => T!['['],
175-
rustc_lexer::TokenKind::CloseBracket => T![']'],
176-
rustc_lexer::TokenKind::At => T![@],
177-
rustc_lexer::TokenKind::Pound => T![#],
178-
rustc_lexer::TokenKind::Tilde => T![~],
179-
rustc_lexer::TokenKind::Question => T![?],
180-
rustc_lexer::TokenKind::Colon => T![:],
181-
rustc_lexer::TokenKind::Dollar => T![$],
182-
rustc_lexer::TokenKind::Eq => T![=],
183-
rustc_lexer::TokenKind::Bang => T![!],
184-
rustc_lexer::TokenKind::Lt => T![<],
185-
rustc_lexer::TokenKind::Gt => T![>],
186-
rustc_lexer::TokenKind::Minus => T![-],
187-
rustc_lexer::TokenKind::And => T![&],
188-
rustc_lexer::TokenKind::Or => T![|],
189-
rustc_lexer::TokenKind::Plus => T![+],
190-
rustc_lexer::TokenKind::Star => T![*],
191-
rustc_lexer::TokenKind::Slash => T![/],
192-
rustc_lexer::TokenKind::Caret => T![^],
193-
rustc_lexer::TokenKind::Percent => T![%],
194-
rustc_lexer::TokenKind::Unknown => ERROR,
195-
}
196-
};
191+
rustc_lexer::TokenKind::Semi => T![;],
192+
rustc_lexer::TokenKind::Comma => T![,],
193+
rustc_lexer::TokenKind::Dot => T![.],
194+
rustc_lexer::TokenKind::OpenParen => T!['('],
195+
rustc_lexer::TokenKind::CloseParen => T![')'],
196+
rustc_lexer::TokenKind::OpenBrace => T!['{'],
197+
rustc_lexer::TokenKind::CloseBrace => T!['}'],
198+
rustc_lexer::TokenKind::OpenBracket => T!['['],
199+
rustc_lexer::TokenKind::CloseBracket => T![']'],
200+
rustc_lexer::TokenKind::At => T![@],
201+
rustc_lexer::TokenKind::Pound => T![#],
202+
rustc_lexer::TokenKind::Tilde => T![~],
203+
rustc_lexer::TokenKind::Question => T![?],
204+
rustc_lexer::TokenKind::Colon => T![:],
205+
rustc_lexer::TokenKind::Dollar => T![$],
206+
rustc_lexer::TokenKind::Eq => T![=],
207+
rustc_lexer::TokenKind::Bang => T![!],
208+
rustc_lexer::TokenKind::Lt => T![<],
209+
rustc_lexer::TokenKind::Gt => T![>],
210+
rustc_lexer::TokenKind::Minus => T![-],
211+
rustc_lexer::TokenKind::And => T![&],
212+
rustc_lexer::TokenKind::Or => T![|],
213+
rustc_lexer::TokenKind::Plus => T![+],
214+
rustc_lexer::TokenKind::Star => T![*],
215+
rustc_lexer::TokenKind::Slash => T![/],
216+
rustc_lexer::TokenKind::Caret => T![^],
217+
rustc_lexer::TokenKind::Percent => T![%],
218+
rustc_lexer::TokenKind::Unknown => ERROR,
219+
}
220+
};
197221

198-
let err = if err.is_empty() { None } else { Some(err) };
199-
(syntax_kind, err)
200-
}
222+
let err = if err.is_empty() { None } else { Some(err) };
223+
self.push(syntax_kind, token_text.len(), err);
224+
}
201225

202-
fn from_rustc_literal(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
203-
let mut err = "";
226+
fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
227+
let mut err = "";
204228

205-
let syntax_kind = match *kind {
206-
rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {
207-
if empty_int {
208-
err = "Missing digits after the integer base prefix";
229+
let syntax_kind = match *kind {
230+
rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {
231+
if empty_int {
232+
err = "Missing digits after the integer base prefix";
233+
}
234+
INT_NUMBER
209235
}
210-
INT_NUMBER
211-
}
212-
rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {
213-
if empty_exponent {
214-
err = "Missing digits after the exponent symbol";
236+
rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {
237+
if empty_exponent {
238+
err = "Missing digits after the exponent symbol";
239+
}
240+
FLOAT_NUMBER
215241
}
216-
FLOAT_NUMBER
217-
}
218-
rustc_lexer::LiteralKind::Char { terminated } => {
219-
if !terminated {
220-
err = "Missing trailing `'` symbol to terminate the character literal";
242+
rustc_lexer::LiteralKind::Char { terminated } => {
243+
if !terminated {
244+
err = "Missing trailing `'` symbol to terminate the character literal";
245+
}
246+
CHAR
221247
}
222-
CHAR
223-
}
224-
rustc_lexer::LiteralKind::Byte { terminated } => {
225-
if !terminated {
226-
err = "Missing trailing `'` symbol to terminate the byte literal";
248+
rustc_lexer::LiteralKind::Byte { terminated } => {
249+
if !terminated {
250+
err = "Missing trailing `'` symbol to terminate the byte literal";
251+
}
252+
BYTE
227253
}
228-
BYTE
229-
}
230-
rustc_lexer::LiteralKind::Str { terminated } => {
231-
if !terminated {
232-
err = "Missing trailing `\"` symbol to terminate the string literal";
254+
rustc_lexer::LiteralKind::Str { terminated } => {
255+
if !terminated {
256+
err = "Missing trailing `\"` symbol to terminate the string literal";
257+
}
258+
STRING
233259
}
234-
STRING
235-
}
236-
rustc_lexer::LiteralKind::ByteStr { terminated } => {
237-
if !terminated {
238-
err = "Missing trailing `\"` symbol to terminate the byte string literal";
260+
rustc_lexer::LiteralKind::ByteStr { terminated } => {
261+
if !terminated {
262+
err = "Missing trailing `\"` symbol to terminate the byte string literal";
263+
}
264+
BYTE_STRING
239265
}
240-
BYTE_STRING
241-
}
242-
rustc_lexer::LiteralKind::RawStr { err: raw_str_err, .. } => {
243-
if let Some(raw_str_err) = raw_str_err {
244-
err = match raw_str_err {
245-
rustc_lexer::RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw string literal",
246-
rustc_lexer::RawStrError::NoTerminator { expected, found, .. } => if expected == found {
247-
"Missing trailing `\"` to terminate the raw string literal"
248-
} else {
249-
"Missing trailing `\"` with `#` symbols to terminate the raw string literal"
250-
},
251-
rustc_lexer::RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols",
266+
rustc_lexer::LiteralKind::RawStr { err: raw_str_err, .. } => {
267+
if let Some(raw_str_err) = raw_str_err {
268+
err = match raw_str_err {
269+
rustc_lexer::RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw string literal",
270+
rustc_lexer::RawStrError::NoTerminator { expected, found, .. } => if expected == found {
271+
"Missing trailing `\"` to terminate the raw string literal"
272+
} else {
273+
"Missing trailing `\"` with `#` symbols to terminate the raw string literal"
274+
},
275+
rustc_lexer::RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols",
276+
};
252277
};
253-
};
254-
STRING
255-
}
256-
rustc_lexer::LiteralKind::RawByteStr { err: raw_str_err, .. } => {
257-
if let Some(raw_str_err) = raw_str_err {
258-
err = match raw_str_err {
259-
rustc_lexer::RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw byte string literal",
260-
rustc_lexer::RawStrError::NoTerminator { expected, found, .. } => if expected == found {
261-
"Missing trailing `\"` to terminate the raw byte string literal"
262-
} else {
263-
"Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"
264-
},
265-
rustc_lexer::RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols",
278+
STRING
279+
}
280+
rustc_lexer::LiteralKind::RawByteStr { err: raw_str_err, .. } => {
281+
if let Some(raw_str_err) = raw_str_err {
282+
err = match raw_str_err {
283+
rustc_lexer::RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw byte string literal",
284+
rustc_lexer::RawStrError::NoTerminator { expected, found, .. } => if expected == found {
285+
"Missing trailing `\"` to terminate the raw byte string literal"
286+
} else {
287+
"Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"
288+
},
289+
rustc_lexer::RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols",
290+
};
266291
};
267-
};
268292

269-
BYTE_STRING
270-
}
271-
};
293+
BYTE_STRING
294+
}
295+
};
272296

273-
let err = if err.is_empty() { None } else { Some(err) };
274-
(syntax_kind, err)
297+
let err = if err.is_empty() { None } else { Some(err) };
298+
self.push(syntax_kind, len, err);
299+
}
275300
}

0 commit comments

Comments
 (0)