Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up the macro parser #50855

Merged
merged 3 commits into from
May 20, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 71 additions & 23 deletions src/libsyntax/ext/tt/macro_parser.rs
Original file line number Diff line number Diff line change
@@ -82,7 +82,7 @@

pub use self::NamedMatch::*;
pub use self::ParseResult::*;
use self::TokenTreeOrTokenTreeVec::*;
use self::TokenTreeOrTokenTreeSlice::*;

use ast::Ident;
use syntax_pos::{self, BytePos, Span};
@@ -97,6 +97,7 @@ use tokenstream::TokenStream;
use util::small_vector::SmallVector;

use std::mem;
use std::ops::{Deref, DerefMut};
use std::rc::Rc;
use std::collections::HashMap;
use std::collections::hash_map::Entry::{Occupied, Vacant};
@@ -106,12 +107,12 @@ use std::collections::hash_map::Entry::{Occupied, Vacant};
/// Either a sequence of token trees or a single one. This is used as the representation of the
/// sequence of tokens that make up a matcher.
#[derive(Clone)]
enum TokenTreeOrTokenTreeVec {
enum TokenTreeOrTokenTreeSlice<'a> {
Tt(TokenTree),
TtSeq(Vec<TokenTree>),
TtSeq(&'a [TokenTree]),
}

impl TokenTreeOrTokenTreeVec {
impl<'a> TokenTreeOrTokenTreeSlice<'a> {
/// Returns the number of constituent top-level token trees of `self` (top-level in that it
/// will not recursively descend into subtrees).
fn len(&self) -> usize {
@@ -135,19 +136,19 @@ impl TokenTreeOrTokenTreeVec {
/// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have
/// descended into.
#[derive(Clone)]
struct MatcherTtFrame {
struct MatcherTtFrame<'a> {
/// The "parent" matcher that we are descending into.
elts: TokenTreeOrTokenTreeVec,
elts: TokenTreeOrTokenTreeSlice<'a>,
/// The position of the "dot" in `elts` at the time we descended.
idx: usize,
}

/// Represents a single "position" (aka "matcher position", aka "item"), as described in the module
/// documentation.
#[derive(Clone)]
struct MatcherPos {
struct MatcherPos<'a> {
/// The token or sequence of tokens that make up the matcher
top_elts: TokenTreeOrTokenTreeVec,
top_elts: TokenTreeOrTokenTreeSlice<'a>,
/// The position of the "dot" in this matcher
idx: usize,
/// The beginning position in the source that the beginning of this matcher corresponds to. In
@@ -186,7 +187,7 @@ struct MatcherPos {
sep: Option<Token>,
/// The "parent" matcher position if we are in a repetition. That is, the matcher position just
/// before we enter the sequence.
up: Option<Box<MatcherPos>>,
up: Option<MatcherPosHandle<'a>>,

// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from
// a delimited token tree (e.g. something wrapped in `(` `)`) or to get the contents of a doc
@@ -195,17 +196,60 @@ struct MatcherPos {
/// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
/// that where the bottom of the stack is the outermost matcher.
// Also, throughout the comments, this "descent" is often referred to as "unzipping"...
stack: Vec<MatcherTtFrame>,
stack: Vec<MatcherTtFrame<'a>>,
}

impl MatcherPos {
impl<'a> MatcherPos<'a> {
/// Add `m` as a named match for the `idx`-th metavar.
fn push_match(&mut self, idx: usize, m: NamedMatch) {
let matches = Rc::make_mut(&mut self.matches[idx]);
matches.push(m);
}
}

// Lots of MatcherPos instances are created at runtime. Allocating them on the
// heap is slow. Furthermore, using SmallVec<MatcherPos> to allocate them all
// on the stack is also slow, because MatcherPos is quite a large type and
// instances get moved around a lot between vectors, which requires lots of
// slow memcpy calls.
//
// Therefore, the initial MatcherPos is always allocated on the stack,
// subsequent ones (of which there aren't that many) are allocated on the heap,
// and this type is used to encapsulate both cases.
enum MatcherPosHandle<'a> {
Ref(&'a mut MatcherPos<'a>),
Box(Box<MatcherPos<'a>>),
}

impl<'a> Clone for MatcherPosHandle<'a> {
// This always produces a new Box.
fn clone(&self) -> Self {
MatcherPosHandle::Box(match *self {
MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()),
MatcherPosHandle::Box(ref b) => b.clone(),
})
}
}

impl<'a> Deref for MatcherPosHandle<'a> {
type Target = MatcherPos<'a>;
fn deref(&self) -> &Self::Target {
match *self {
MatcherPosHandle::Ref(ref r) => r,
MatcherPosHandle::Box(ref b) => b,
}
}
}

impl<'a> DerefMut for MatcherPosHandle<'a> {
fn deref_mut(&mut self) -> &mut MatcherPos<'a> {
match *self {
MatcherPosHandle::Ref(ref mut r) => r,
MatcherPosHandle::Box(ref mut b) => b,
}
}
}

/// Represents the possible results of an attempted parse.
pub enum ParseResult<T> {
/// Parsed successfully.
@@ -241,10 +285,10 @@ fn create_matches(len: usize) -> Vec<Rc<Vec<NamedMatch>>> {

/// Generate the top-level matcher position in which the "dot" is before the first token of the
/// matcher `ms` and we are going to start matching at position `lo` in the source.
fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> {
let match_idx_hi = count_names(&ms[..]);
fn initial_matcher_pos(ms: &[TokenTree], lo: BytePos) -> MatcherPos {
let match_idx_hi = count_names(ms);
let matches = create_matches(match_idx_hi);
Box::new(MatcherPos {
MatcherPos {
// Start with the top level matcher given to us
top_elts: TtSeq(ms), // "elts" is an abbr. for "elements"
// The "dot" is before the first token of the matcher
@@ -267,7 +311,7 @@ fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> {
seq_op: None,
sep: None,
up: None,
})
}
}

/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
@@ -394,12 +438,12 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
/// # Returns
///
/// A `ParseResult`. Note that matches are kept track of through the items generated.
fn inner_parse_loop(
fn inner_parse_loop<'a>(
sess: &ParseSess,
cur_items: &mut SmallVector<Box<MatcherPos>>,
next_items: &mut Vec<Box<MatcherPos>>,
eof_items: &mut SmallVector<Box<MatcherPos>>,
bb_items: &mut SmallVector<Box<MatcherPos>>,
cur_items: &mut SmallVector<MatcherPosHandle<'a>>,
next_items: &mut Vec<MatcherPosHandle<'a>>,
eof_items: &mut SmallVector<MatcherPosHandle<'a>>,
bb_items: &mut SmallVector<MatcherPosHandle<'a>>,
token: &Token,
span: syntax_pos::Span,
) -> ParseResult<()> {
@@ -502,7 +546,7 @@ fn inner_parse_loop(
}

let matches = create_matches(item.matches.len());
cur_items.push(Box::new(MatcherPos {
cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos {
stack: vec![],
sep: seq.separator.clone(),
seq_op: Some(seq.op),
@@ -514,7 +558,7 @@ fn inner_parse_loop(
up: Some(item),
sp_lo: sp.lo(),
top_elts: Tt(TokenTree::Sequence(sp, seq)),
}));
})));
}

// We need to match a metavar (but the identifier is invalid)... this is an error
@@ -596,7 +640,11 @@ pub fn parse(
// processes all of these possible matcher positions and produces posible next positions into
// `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items`
// and we start over again.
let mut cur_items = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo()));
//
// This MatcherPos instance is allocated on the stack. All others -- and
// there are frequently *no* others! -- are allocated on the heap.
let mut initial = initial_matcher_pos(ms, parser.span.lo());
let mut cur_items = SmallVector::one(MatcherPosHandle::Ref(&mut initial));
let mut next_items = Vec::new();

loop {
3 changes: 2 additions & 1 deletion src/libsyntax/ext/tt/macro_rules.rs
Original file line number Diff line number Diff line change
@@ -26,6 +26,7 @@ use parse::token::Token::*;
use symbol::Symbol;
use tokenstream::{TokenStream, TokenTree};

use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::hash_map::Entry;

@@ -141,7 +142,7 @@ fn generic_extension<'cx>(cx: &'cx mut ExtCtxt,
}

let directory = Directory {
path: cx.current_expansion.module.directory.clone(),
path: Cow::from(cx.current_expansion.module.directory.as_path()),
ownership: cx.current_expansion.directory_ownership,
};
let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false);
5 changes: 3 additions & 2 deletions src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@ use symbol::Symbol;
use tokenstream::{TokenStream, TokenTree};
use diagnostics::plugin::ErrorMap;

use std::borrow::Cow;
use std::collections::HashSet;
use std::iter;
use std::path::{Path, PathBuf};
@@ -89,8 +90,8 @@ impl ParseSess {
}

#[derive(Clone)]
pub struct Directory {
pub path: PathBuf,
pub struct Directory<'a> {
pub path: Cow<'a, Path>,
pub ownership: DirectoryOwnership,
}

17 changes: 9 additions & 8 deletions src/libsyntax/parse/parser.rs
Original file line number Diff line number Diff line change
@@ -57,6 +57,7 @@ use tokenstream::{self, Delimited, ThinTokenStream, TokenTree, TokenStream};
use symbol::{Symbol, keywords};
use util::ThinVec;

use std::borrow::Cow;
use std::cmp;
use std::mem;
use std::path::{self, Path, PathBuf};
@@ -228,7 +229,7 @@ pub struct Parser<'a> {
prev_token_kind: PrevTokenKind,
pub restrictions: Restrictions,
/// Used to determine the path to externally loaded source files
pub directory: Directory,
pub directory: Directory<'a>,
/// Whether to parse sub-modules in other files.
pub recurse_into_file_modules: bool,
/// Name of the root module this parser originated from. If `None`, then the
@@ -535,7 +536,7 @@ enum TokenExpectType {
impl<'a> Parser<'a> {
pub fn new(sess: &'a ParseSess,
tokens: TokenStream,
directory: Option<Directory>,
directory: Option<Directory<'a>>,
recurse_into_file_modules: bool,
desugar_doc_comments: bool)
-> Self {
@@ -549,7 +550,7 @@ impl<'a> Parser<'a> {
restrictions: Restrictions::empty(),
recurse_into_file_modules,
directory: Directory {
path: PathBuf::new(),
path: Cow::from(PathBuf::new()),
ownership: DirectoryOwnership::Owned { relative: None }
},
root_module_name: None,
@@ -572,9 +573,9 @@ impl<'a> Parser<'a> {
if let Some(directory) = directory {
parser.directory = directory;
} else if !parser.span.source_equal(&DUMMY_SP) {
if let FileName::Real(path) = sess.codemap().span_to_unmapped_path(parser.span) {
parser.directory.path = path;
parser.directory.path.pop();
if let FileName::Real(mut path) = sess.codemap().span_to_unmapped_path(parser.span) {
path.pop();
parser.directory.path = Cow::from(path);
}
}

@@ -6000,10 +6001,10 @@ impl<'a> Parser<'a> {

fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
if let Some(path) = attr::first_attr_value_str_by_name(attrs, "path") {
self.directory.path.push(&path.as_str());
self.directory.path.to_mut().push(&path.as_str());
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
} else {
self.directory.path.push(&id.name.as_str());
self.directory.path.to_mut().push(&id.name.as_str());
}
}

3 changes: 2 additions & 1 deletion src/libsyntax/tokenstream.rs
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@ use print::pprust;
use serialize::{Decoder, Decodable, Encoder, Encodable};
use util::RcSlice;

use std::borrow::Cow;
use std::{fmt, iter, mem};
use std::hash::{self, Hash};

@@ -106,7 +107,7 @@ impl TokenTree {
-> macro_parser::NamedParseResult {
// `None` is because we're not interpolating
let directory = Directory {
path: cx.current_expansion.module.directory.clone(),
path: Cow::from(cx.current_expansion.module.directory.as_path()),
ownership: cx.current_expansion.directory_ownership,
};
macro_parser::parse(cx.parse_sess(), tts, mtch, Some(directory), true)