Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser: Keep current and previous tokens precisely #69006

Merged
merged 1 commit into from
Feb 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 51 additions & 23 deletions src/librustc_parse/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,23 +95,32 @@ enum PrevTokenKind {
Other,
}

// NOTE: `Ident`s are handled by `common.rs`.

#[derive(Clone)]
pub struct Parser<'a> {
pub sess: &'a ParseSess,
/// The current normalized token.
/// "Normalized" means that some interpolated tokens
/// (`$i: ident` and `$l: lifetime` meta-variables) are replaced
/// with non-interpolated identifier and lifetime tokens they refer to.
/// Perhaps the normalized / non-normalized setup can be simplified somehow.
/// Use span from this token if you need an isolated span.
pub token: Token,
/// The span of the current non-normalized token.
meta_var_span: Option<Span>,
/// The span of the previous non-normalized token.
pub prev_span: Span,
/// The kind of the previous normalized token (in simplified form).
/// The current non-normalized token if it's different from `token`.
/// Preferable use is through the `unnormalized_token()` getter.
/// Use span from this token if you need to concatenate it with some neighbouring spans.
unnormalized_token: Option<Token>,
/// The previous normalized token.
/// Use span from this token if you need an isolated span.
prev_token: Token,
/// The previous non-normalized token if it's different from `prev_token`.
/// Preferable use is through the `unnormalized_prev_token()` getter.
/// Use span from this token if you need to concatenate it with some neighbouring spans.
unnormalized_prev_token: Option<Token>,
/// Equivalent to `prev_token.kind` in simplified form.
/// FIXME: Remove in favor of `(unnormalized_)prev_token().kind`.
prev_token_kind: PrevTokenKind,
/// Equivalent to `unnormalized_prev_token().span`.
/// FIXME: Remove in favor of `(unnormalized_)prev_token().span`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is used so frequently that we might want self.prev_span() (unless those occurrences are meant to be replaced with self.prev_token.span).

pub prev_span: Span,
restrictions: Restrictions,
/// Used to determine the path to externally loaded source files.
pub(super) directory: Directory<'a>,
Expand Down Expand Up @@ -384,9 +393,11 @@ impl<'a> Parser<'a> {
let mut parser = Parser {
sess,
token: Token::dummy(),
prev_span: DUMMY_SP,
meta_var_span: None,
unnormalized_token: None,
prev_token: Token::dummy(),
unnormalized_prev_token: None,
prev_token_kind: PrevTokenKind::Other,
prev_span: DUMMY_SP,
restrictions: Restrictions::empty(),
recurse_into_file_modules,
directory: Directory {
Expand Down Expand Up @@ -427,6 +438,14 @@ impl<'a> Parser<'a> {
parser
}

fn unnormalized_token(&self) -> &Token {
self.unnormalized_token.as_ref().unwrap_or(&self.token)
}

fn unnormalized_prev_token(&self) -> &Token {
self.unnormalized_prev_token.as_ref().unwrap_or(&self.prev_token)
}

fn next_tok(&mut self) -> Token {
let mut next = if self.desugar_doc_comments {
self.token_cursor.next_desugared()
Expand All @@ -435,7 +454,7 @@ impl<'a> Parser<'a> {
};
if next.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
next.span = self.prev_span.with_ctxt(next.span.ctxt());
next.span = self.unnormalized_token().span.with_ctxt(next.span.ctxt());
}
next
}
Expand Down Expand Up @@ -895,10 +914,13 @@ impl<'a> Parser<'a> {
self.span_bug(self.token.span, msg);
}

self.prev_span = self.meta_var_span.take().unwrap_or(self.token.span);
// Update the current and previous tokens.
let next_token = self.next_tok();
self.prev_token = mem::replace(&mut self.token, next_token);
self.unnormalized_prev_token = self.unnormalized_token.take();

// Record last token kind for possible error recovery.
self.prev_token_kind = match self.token.kind {
// Update fields derived from the previous token.
self.prev_token_kind = match self.prev_token.kind {
token::DocComment(..) => PrevTokenKind::DocComment,
token::Comma => PrevTokenKind::Comma,
token::BinOp(token::Plus) => PrevTokenKind::Plus,
Expand All @@ -908,22 +930,28 @@ impl<'a> Parser<'a> {
token::Ident(..) => PrevTokenKind::Ident,
_ => PrevTokenKind::Other,
};
self.prev_span = self.unnormalized_prev_token().span;

self.token = self.next_tok();
self.expected_tokens.clear();
// Check after each token.
self.process_potential_macro_variable();
}

/// Advances the parser using provided token as a next one. Use this when
/// consuming a part of a token. For example a single `<` from `<<`.
/// FIXME: this function sets the previous token data to some semi-nonsensical values
/// which kind of work because they are currently used in very limited ways in practice.
/// Correct token kinds and spans need to be calculated instead.
fn bump_with(&mut self, next: TokenKind, span: Span) {
self.prev_span = self.token.span.with_hi(span.lo());
// It would be incorrect to record the kind of the current token, but
// fortunately for tokens currently using `bump_with`, the
// `prev_token_kind` will be of no use anyway.
// Update the current and previous tokens.
let next_token = Token::new(next, span);
self.prev_token = mem::replace(&mut self.token, next_token);
self.unnormalized_prev_token = self.unnormalized_token.take();

// Update fields derived from the previous token.
self.prev_token_kind = PrevTokenKind::Other;
self.token = Token::new(next, span);
self.prev_span = self.unnormalized_prev_token().span.with_hi(span.lo());

self.expected_tokens.clear();
}

Expand Down Expand Up @@ -1054,7 +1082,7 @@ impl<'a> Parser<'a> {
}

pub fn process_potential_macro_variable(&mut self) {
self.token = match self.token.kind {
let normalized_token = match self.token.kind {
token::Dollar
if self.token.span.from_expansion() && self.look_ahead(1, |t| t.is_ident()) =>
{
Expand All @@ -1071,7 +1099,6 @@ impl<'a> Parser<'a> {
return;
}
token::Interpolated(ref nt) => {
self.meta_var_span = Some(self.token.span);
// Interpolated identifier and lifetime tokens are replaced with usual identifier
// and lifetime tokens, so the former are never encountered during normal parsing.
match **nt {
Expand All @@ -1084,6 +1111,7 @@ impl<'a> Parser<'a> {
}
_ => return,
};
self.unnormalized_token = Some(mem::replace(&mut self.token, normalized_token));
}

/// Parses a single token tree from the input.
Expand All @@ -1100,7 +1128,7 @@ impl<'a> Parser<'a> {
}
token::CloseDelim(_) | token::Eof => unreachable!(),
_ => {
let token = self.token.take();
let token = self.token.clone();
self.bump();
TokenTree::Token(token)
}
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_parse/parser/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ impl<'a> Parser<'a> {
path
});

let lo = self.meta_var_span.unwrap_or(self.token.span);
let lo = self.unnormalized_token().span;
let mut segments = Vec::new();
let mod_sep_ctxt = self.token.span.ctxt();
if self.eat(&token::ModSep) {
Expand Down
4 changes: 2 additions & 2 deletions src/test/ui/parser/mbe_missing_right_paren.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ LL | macro_rules! abc(ؼ;
| ^

error: unexpected end of macro invocation
--> $DIR/mbe_missing_right_paren.rs:3:1
--> $DIR/mbe_missing_right_paren.rs:3:19
|
LL | macro_rules! abc(ؼ
| ^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
| ^ missing tokens in macro arguments

error: aborting due to 3 previous errors