Skip to content

Commit dbaf3e6

Browse files
committed
Auto merge of #102508 - nnethercote:even-more-lexer-improvements, r=matklad
Even more lexer improvements These are just about code clarity, rather than performance. r? `@matklad`
2 parents 607b829 + 4e5ddf1 commit dbaf3e6

File tree

6 files changed

+69
-79
lines changed

6 files changed

+69
-79
lines changed

compiler/rustc_ast/src/token.rs

+8-11
Original file line numberDiff line numberDiff line change
@@ -345,17 +345,14 @@ impl Token {
345345
}
346346

347347
pub fn is_op(&self) -> bool {
348-
!matches!(
349-
self.kind,
350-
OpenDelim(..)
351-
| CloseDelim(..)
352-
| Literal(..)
353-
| DocComment(..)
354-
| Ident(..)
355-
| Lifetime(..)
356-
| Interpolated(..)
357-
| Eof
358-
)
348+
match self.kind {
349+
Eq | Lt | Le | EqEq | Ne | Ge | Gt | AndAnd | OrOr | Not | Tilde | BinOp(_)
350+
| BinOpEq(_) | At | Dot | DotDot | DotDotDot | DotDotEq | Comma | Semi | Colon
351+
| ModSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question | SingleQuote => true,
352+
353+
OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | Ident(..)
354+
| Lifetime(..) | Interpolated(..) | Eof => false,
355+
}
359356
}
360357

361358
pub fn is_like_plus(&self) -> bool {

compiler/rustc_ast/src/tokenstream.rs

+11
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,20 @@ pub struct AttributesData {
304304
#[derive(Clone, Debug, Default, Encodable, Decodable)]
305305
pub struct TokenStream(pub(crate) Lrc<Vec<TokenTree>>);
306306

307+
/// Similar to `proc_macro::Spacing`, but for tokens.
308+
///
309+
/// Note that all `ast::TokenTree::Token` instances have a `Spacing`, but when
310+
/// we convert to `proc_macro::TokenTree` for proc macros only `Punct`
311+
/// `TokenTree`s have a `proc_macro::Spacing`.
307312
#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
308313
pub enum Spacing {
314+
/// The token is not immediately followed by an operator token (as
315+
/// determined by `Token::is_op`). E.g. a `+` token is `Alone` in `+ =`,
316+
/// `+/*foo*/=`, `+ident`, and `+()`.
309317
Alone,
318+
319+
/// The token is immediately followed by an operator token. E.g. a `+`
320+
/// token is `Joint` in `+=` and `++`.
310321
Joint,
311322
}
312323

compiler/rustc_expand/src/proc_macro_server.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,14 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre
110110
tokenstream::TokenTree::Token(token, spacing) => (token, spacing == Joint),
111111
};
112112

113+
// Split the operator into one or more `Punct`s, one per character.
114+
// The final one inherits the jointness of the original token. Any
115+
// before that get `joint = true`.
113116
let mut op = |s: &str| {
114117
assert!(s.is_ascii());
115-
trees.extend(s.as_bytes().iter().enumerate().map(|(idx, &ch)| {
116-
TokenTree::Punct(Punct { ch, joint: joint || idx != s.len() - 1, span })
118+
trees.extend(s.bytes().enumerate().map(|(idx, ch)| {
119+
let is_final = idx == s.len() - 1;
120+
TokenTree::Punct(Punct { ch, joint: if is_final { joint } else { true }, span })
117121
}));
118122
};
119123

compiler/rustc_parse/src/lexer/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ pub(crate) fn parse_token_trees<'a>(
5252
let cursor = Cursor::new(src);
5353
let string_reader =
5454
StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
55-
tokentrees::TokenTreesReader::parse_token_trees(string_reader)
55+
tokentrees::TokenTreesReader::parse_all_token_trees(string_reader)
5656
}
5757

5858
struct StringReader<'a> {

compiler/rustc_parse/src/lexer/tokentrees.rs

+38-64
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ pub(super) struct TokenTreesReader<'a> {
2727
}
2828

2929
impl<'a> TokenTreesReader<'a> {
30-
pub(super) fn parse_token_trees(
30+
pub(super) fn parse_all_token_trees(
3131
string_reader: StringReader<'a>,
3232
) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
3333
let mut tt_reader = TokenTreesReader {
@@ -40,36 +40,51 @@ impl<'a> TokenTreesReader<'a> {
4040
last_delim_empty_block_spans: FxHashMap::default(),
4141
matching_block_spans: Vec::new(),
4242
};
43-
let res = tt_reader.parse_all_token_trees();
43+
let res = tt_reader.parse_token_trees(/* is_delimited */ false);
4444
(res, tt_reader.unmatched_braces)
4545
}
4646

47-
// Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`.
48-
fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> {
47+
// Parse a stream of tokens into a list of `TokenTree`s.
48+
fn parse_token_trees(&mut self, is_delimited: bool) -> PResult<'a, TokenStream> {
4949
self.token = self.string_reader.next_token().0;
50-
let mut buf = TokenStreamBuilder::default();
50+
let mut buf = Vec::new();
5151
loop {
5252
match self.token.kind {
5353
token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
54-
token::CloseDelim(delim) => return Err(self.close_delim_err(delim)),
55-
token::Eof => return Ok(buf.into_token_stream()),
56-
_ => buf.push(self.parse_token_tree_non_delim_non_eof()),
57-
}
58-
}
59-
}
60-
61-
// Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`.
62-
fn parse_token_trees_until_close_delim(&mut self) -> TokenStream {
63-
let mut buf = TokenStreamBuilder::default();
64-
loop {
65-
match self.token.kind {
66-
token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
67-
token::CloseDelim(..) => return buf.into_token_stream(),
54+
token::CloseDelim(delim) => {
55+
return if is_delimited {
56+
Ok(TokenStream::new(buf))
57+
} else {
58+
Err(self.close_delim_err(delim))
59+
};
60+
}
6861
token::Eof => {
69-
self.eof_err().emit();
70-
return buf.into_token_stream();
62+
if is_delimited {
63+
self.eof_err().emit();
64+
}
65+
return Ok(TokenStream::new(buf));
66+
}
67+
_ => {
68+
// Get the next normal token. This might require getting multiple adjacent
69+
// single-char tokens and joining them together.
70+
let (this_spacing, next_tok) = loop {
71+
let (next_tok, is_next_tok_preceded_by_whitespace) =
72+
self.string_reader.next_token();
73+
if !is_next_tok_preceded_by_whitespace {
74+
if let Some(glued) = self.token.glue(&next_tok) {
75+
self.token = glued;
76+
} else {
77+
let this_spacing =
78+
if next_tok.is_op() { Spacing::Joint } else { Spacing::Alone };
79+
break (this_spacing, next_tok);
80+
}
81+
} else {
82+
break (Spacing::Alone, next_tok);
83+
}
84+
};
85+
let this_tok = std::mem::replace(&mut self.token, next_tok);
86+
buf.push(TokenTree::Token(this_tok, this_spacing));
7187
}
72-
_ => buf.push(self.parse_token_tree_non_delim_non_eof()),
7388
}
7489
}
7590
}
@@ -113,14 +128,12 @@ impl<'a> TokenTreesReader<'a> {
113128
// The span for beginning of the delimited section
114129
let pre_span = self.token.span;
115130

116-
// Move past the open delimiter.
117131
self.open_braces.push((open_delim, self.token.span));
118-
self.token = self.string_reader.next_token().0;
119132

120133
// Parse the token trees within the delimiters.
121134
// We stop at any delimiter so we can try to recover if the user
122135
// uses an incorrect delimiter.
123-
let tts = self.parse_token_trees_until_close_delim();
136+
let tts = self.parse_token_trees(/* is_delimited */ true).unwrap();
124137

125138
// Expand to cover the entire delimited token tree
126139
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
@@ -242,43 +255,4 @@ impl<'a> TokenTreesReader<'a> {
242255
err.span_label(self.token.span, "unexpected closing delimiter");
243256
err
244257
}
245-
246-
#[inline]
247-
fn parse_token_tree_non_delim_non_eof(&mut self) -> TokenTree {
248-
// `this_spacing` for the returned token refers to whether the token is
249-
// immediately followed by another op token. It is determined by the
250-
// next token: its kind and its `preceded_by_whitespace` status.
251-
let (next_tok, is_next_tok_preceded_by_whitespace) = self.string_reader.next_token();
252-
let this_spacing = if is_next_tok_preceded_by_whitespace || !next_tok.is_op() {
253-
Spacing::Alone
254-
} else {
255-
Spacing::Joint
256-
};
257-
let this_tok = std::mem::replace(&mut self.token, next_tok);
258-
TokenTree::Token(this_tok, this_spacing)
259-
}
260-
}
261-
262-
#[derive(Default)]
263-
struct TokenStreamBuilder {
264-
buf: Vec<TokenTree>,
265-
}
266-
267-
impl TokenStreamBuilder {
268-
#[inline(always)]
269-
fn push(&mut self, tree: TokenTree) {
270-
if let Some(TokenTree::Token(prev_token, Spacing::Joint)) = self.buf.last()
271-
&& let TokenTree::Token(token, joint) = &tree
272-
&& let Some(glued) = prev_token.glue(token)
273-
{
274-
self.buf.pop();
275-
self.buf.push(TokenTree::Token(glued, *joint));
276-
} else {
277-
self.buf.push(tree)
278-
}
279-
}
280-
281-
fn into_token_stream(self) -> TokenStream {
282-
TokenStream::new(self.buf)
283-
}
284258
}

compiler/rustc_parse/src/parser/mod.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,10 @@ impl TokenCursor {
302302

303303
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
304304
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
305-
// required to wrap the text.
305+
// required to wrap the text. E.g.
306+
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
307+
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
308+
// - `abc "##d##"` is wrapped as `r###"abc "d""###` (num_of_hashes = 3)
306309
let mut num_of_hashes = 0;
307310
let mut count = 0;
308311
for ch in data.as_str().chars() {
@@ -314,6 +317,7 @@ impl TokenCursor {
314317
num_of_hashes = cmp::max(num_of_hashes, count);
315318
}
316319

320+
// `/// foo` becomes `doc = r"foo".
317321
let delim_span = DelimSpan::from_single(span);
318322
let body = TokenTree::Delimited(
319323
delim_span,

0 commit comments

Comments
 (0)