diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 94df7a5a6c2d9..f94d5a5e4b5e3 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1764,7 +1764,9 @@ impl<'a> StrSlice<'a> for &'a str { #[inline] fn slice(&self, begin: uint, end: uint) -> &'a str { - assert!(self.is_char_boundary(begin) && self.is_char_boundary(end)); + assert!(self.is_char_boundary(begin) && self.is_char_boundary(end), + "index {} and/or {} in `{}` do not lie on character boundary", begin, + end, *self); unsafe { raw::slice_bytes(*self, begin, end) } } @@ -1775,7 +1777,8 @@ impl<'a> StrSlice<'a> for &'a str { #[inline] fn slice_to(&self, end: uint) -> &'a str { - assert!(self.is_char_boundary(end)); + assert!(self.is_char_boundary(end), "index {} in `{}` does not lie on \ + a character boundary", end, *self); unsafe { raw::slice_bytes(*self, 0, end) } } diff --git a/src/librustc/lint/builtin.rs b/src/librustc/lint/builtin.rs index 98a6f7d5ed38d..ae401b9d6f15c 100644 --- a/src/librustc/lint/builtin.rs +++ b/src/librustc/lint/builtin.rs @@ -1114,7 +1114,7 @@ impl UnusedMut { match mode { ast::BindByValue(ast::MutMutable) => { if !token::get_ident(ident).get().starts_with("_") { - mutables.insert_or_update_with(ident.name as uint, + mutables.insert_or_update_with(ident.name.uint(), vec!(id), |_, old| { old.push(id); }); } } diff --git a/src/librustc/metadata/decoder.rs b/src/librustc/metadata/decoder.rs index 8a2b95ae463b4..cc41223688ee0 100644 --- a/src/librustc/metadata/decoder.rs +++ b/src/librustc/metadata/decoder.rs @@ -323,7 +323,7 @@ fn item_name(intr: &IdentInterner, item: ebml::Doc) -> ast::Ident { let string = name.as_str_slice(); match intr.find_equiv(&string) { None => token::str_to_ident(string), - Some(val) => ast::Ident::new(val as ast::Name), + Some(val) => ast::Ident::new(val), } } diff --git a/src/librustc/middle/astencode.rs b/src/librustc/middle/astencode.rs index 11b1687dc5599..fb2b4951ea3d6 100644 --- a/src/librustc/middle/astencode.rs +++ b/src/librustc/middle/astencode.rs @@ -1523,7 +1523,7 @@ fn test_basic() { fn foo() {} )); } - +/* NOTE: When there's a snapshot, update this (yay quasiquoter!) #[test] fn test_smalltalk() { let cx = mk_ctxt(); @@ -1531,6 +1531,7 @@ fn test_smalltalk() { fn foo() -> int { 3 + 4 } // first smalltalk program ever executed. )); } +*/ #[test] fn test_more() { diff --git a/src/librustc/middle/trans/consts.rs b/src/librustc/middle/trans/consts.rs index c35767f99a835..11a8207f8c43e 100644 --- a/src/librustc/middle/trans/consts.rs +++ b/src/librustc/middle/trans/consts.rs @@ -42,6 +42,7 @@ use syntax::{ast, ast_util}; pub fn const_lit(cx: &CrateContext, e: &ast::Expr, lit: ast::Lit) -> ValueRef { let _icx = push_ctxt("trans_lit"); + debug!("const_lit: {}", lit); match lit.node { ast::LitByte(b) => C_integral(Type::uint_from_ty(cx, ast::TyU8), b as u64, false), ast::LitChar(i) => C_integral(Type::char(cx), i as u64, false), diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 82bb1bd58a6d3..3cb5cdc043962 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -18,7 +18,6 @@ use std::io; use syntax::parse; use syntax::parse::lexer; -use syntax::codemap::{BytePos, Span}; use html::escape::Escape; @@ -59,38 +58,30 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader, None => {} } try!(write!(out, "class='rust {}'>\n", class.unwrap_or(""))); - let mut last = BytePos(0); let mut is_attribute = false; let mut is_macro = false; let mut is_macro_nonterminal = false; loop { let next = lexer.next_token(); - let test = if next.tok == t::EOF {lexer.pos} else {next.sp.lo}; - - // The lexer consumes all whitespace and non-doc-comments when iterating - // between tokens. If this token isn't directly adjacent to our last - // token, then we need to emit the whitespace/comment. - // - // If the gap has any '/' characters then we consider the whole thing a - // comment. This will classify some whitespace as a comment, but that - // doesn't matter too much for syntax highlighting purposes. - if test > last { - let snip = sess.span_diagnostic.cm.span_to_snippet(Span { - lo: last, - hi: test, - expn_info: None, - }).unwrap(); - if snip.as_slice().contains("/") { - try!(write!(out, "{}", - Escape(snip.as_slice()))); - } else { - try!(write!(out, "{}", Escape(snip.as_slice()))); - } - } - last = next.sp.hi; + + let snip = |sp| sess.span_diagnostic.cm.span_to_snippet(sp).unwrap(); + if next.tok == t::EOF { break } let klass = match next.tok { + t::WS => { + try!(write!(out, "{}", Escape(snip(next.sp).as_slice()))); + continue + }, + t::COMMENT => { + try!(write!(out, "{}", + Escape(snip(next.sp).as_slice()))); + continue + }, + t::SHEBANG(s) => { + try!(write!(out, "{}", Escape(s.as_str()))); + continue + }, // If this '&' token is directly adjacent to another token, assume // that it's the address-of operator instead of the and-operator. // This allows us to give all pointers their own class (`Box` and @@ -144,8 +135,7 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader, t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string", // number literals - t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) | - t::LIT_FLOAT(..) | t::LIT_FLOAT_UNSUFFIXED(..) => "number", + t::LIT_INTEGER(..) | t::LIT_FLOAT(..) => "number", // keywords are also included in the identifier set t::IDENT(ident, _is_mod_sep) => { diff --git a/src/libsyntax/abi.rs b/src/libsyntax/abi.rs index 9771bc9386b16..5aaf7ed3dba5d 100644 --- a/src/libsyntax/abi.rs +++ b/src/libsyntax/abi.rs @@ -60,9 +60,12 @@ pub struct AbiData { } pub enum AbiArchitecture { - RustArch, // Not a real ABI (e.g., intrinsic) - AllArch, // An ABI that specifies cross-platform defaults (e.g., "C") - Archs(u32) // Multiple architectures (bitset) + /// Not a real ABI (e.g., intrinsic) + RustArch, + /// An ABI that specifies cross-platform defaults (e.g., "C") + AllArch, + /// Multiple architectures (bitset) + Archs(u32) } static AbiDatas: &'static [AbiData] = &[ @@ -84,21 +87,13 @@ static AbiDatas: &'static [AbiData] = &[ AbiData {abi: RustIntrinsic, name: "rust-intrinsic", abi_arch: RustArch}, ]; +/// Iterates through each of the defined ABIs. fn each_abi(op: |abi: Abi| -> bool) -> bool { - /*! - * - * Iterates through each of the defined ABIs. - */ - AbiDatas.iter().advance(|abi_data| op(abi_data.abi)) } +/// Returns the ABI with the given name (if any). pub fn lookup(name: &str) -> Option { - /*! - * - * Returns the ABI with the given name (if any). - */ - let mut res = None; each_abi(|abi| { diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 5f3adbdb54df4..ebfc45d22cee9 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -24,7 +24,8 @@ use std::rc::Rc; use std::gc::{Gc, GC}; use serialize::{Encodable, Decodable, Encoder, Decoder}; -/// A pointer abstraction. FIXME(eddyb) #10676 use Rc in the future. +/// A pointer abstraction. +// FIXME(eddyb) #10676 use Rc in the future. pub type P = Gc; #[allow(non_snake_case_functions)] @@ -36,11 +37,11 @@ pub fn P(value: T) -> P { // FIXME #6993: in librustc, uses of "ident" should be replaced // by just "Name". -// an identifier contains a Name (index into the interner -// table) and a SyntaxContext to track renaming and -// macro expansion per Flatt et al., "Macros -// That Work Together" -#[deriving(Clone, Hash, PartialOrd, Eq, Ord, Show)] +/// An identifier contains a Name (index into the interner +/// table) and a SyntaxContext to track renaming and +/// macro expansion per Flatt et al., "Macros +/// That Work Together" +#[deriving(Clone, Hash, PartialOrd, Eq, Ord)] pub struct Ident { pub name: Name, pub ctxt: SyntaxContext @@ -49,6 +50,16 @@ pub struct Ident { impl Ident { /// Construct an identifier with the given name and an empty context: pub fn new(name: Name) -> Ident { Ident {name: name, ctxt: EMPTY_CTXT}} + + pub fn as_str<'a>(&'a self) -> &'a str { + self.name.as_str() + } +} + +impl Show for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "\"{}\"#{}", token::get_ident(*self).get(), self.ctxt) + } } impl PartialEq for Ident { @@ -95,7 +106,26 @@ pub static ILLEGAL_CTXT : SyntaxContext = 1; /// A name is a part of an identifier, representing a string or gensym. It's /// the result of interning. -pub type Name = u32; +#[deriving(Eq, Ord, PartialEq, PartialOrd, Hash, Encodable, Decodable, Clone, Show)] +pub struct Name(pub u32); + +impl Name { + pub fn as_str<'a>(&'a self) -> &'a str { + unsafe { + // FIXME #12938: can't use copy_lifetime since &str isn't a &T + ::std::mem::transmute(token::get_name(*self).get()) + } + } + + pub fn uint(&self) -> uint { + let Name(nm) = *self; + nm as uint + } + + pub fn ident(&self) -> Ident { + Ident { name: *self, ctxt: 0 } + } +} /// A mark represents a unique id associated with a macro expansion pub type Mrk = u32; @@ -122,10 +152,9 @@ pub struct Lifetime { pub name: Name } -// a "Path" is essentially Rust's notion of a name; -// for instance: std::cmp::PartialEq . It's represented -// as a sequence of identifiers, along with a bunch -// of supporting information. +/// A "Path" is essentially Rust's notion of a name; for instance: +/// std::cmp::PartialEq . It's represented as a sequence of identifiers, +/// along with a bunch of supporting information. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct Path { pub span: Span, @@ -163,15 +192,15 @@ pub struct DefId { pub static LOCAL_CRATE: CrateNum = 0; pub static CRATE_NODE_ID: NodeId = 0; -// When parsing and doing expansions, we initially give all AST nodes this AST -// node value. Then later, in the renumber pass, we renumber them to have -// small, positive ids. +/// When parsing and doing expansions, we initially give all AST nodes this AST +/// node value. Then later, in the renumber pass, we renumber them to have +/// small, positive ids. pub static DUMMY_NODE_ID: NodeId = -1; -// The AST represents all type param bounds as types. -// typeck::collect::compute_bounds matches these against -// the "special" built-in traits (see middle::lang_items) and -// detects Copy, Send and Share. +/// The AST represents all type param bounds as types. +/// typeck::collect::compute_bounds matches these against +/// the "special" built-in traits (see middle::lang_items) and +/// detects Copy, Send and Share. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum TyParamBound { TraitTyParamBound(TraitRef), @@ -210,9 +239,9 @@ impl Generics { } } -// The set of MetaItems that define the compilation environment of the crate, -// used to drive conditional compilation -pub type CrateConfig = Vec>; +/// The set of MetaItems that define the compilation environment of the crate, +/// used to drive conditional compilation +pub type CrateConfig = Vec> ; #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct Crate { @@ -289,13 +318,13 @@ pub enum BindingMode { pub enum Pat_ { PatWild, PatWildMulti, - // A PatIdent may either be a new bound variable, - // or a nullary enum (in which case the third field - // is None). - // In the nullary enum case, the parser can't determine - // which it is. The resolver determines this, and - // records this pattern's NodeId in an auxiliary - // set (of "PatIdents that refer to nullary enums") + /// A PatIdent may either be a new bound variable, + /// or a nullary enum (in which case the third field + /// is None). + /// In the nullary enum case, the parser can't determine + /// which it is. The resolver determines this, and + /// records this pattern's NodeId in an auxiliary + /// set (of "PatIdents that refer to nullary enums") PatIdent(BindingMode, SpannedIdent, Option>), PatEnum(Path, Option>>), /* "none" means a * pattern where * we don't bind the fields to names */ @@ -305,8 +334,8 @@ pub enum Pat_ { PatRegion(Gc), // reference pattern PatLit(Gc), PatRange(Gc, Gc), - // [a, b, ..i, y, z] is represented as - // PatVec(~[a, b], Some(i), ~[y, z]) + /// [a, b, ..i, y, z] is represented as: + /// PatVec(~[a, b], Some(i), ~[y, z]) PatVec(Vec>, Option>, Vec>), PatMac(Mac), } @@ -319,9 +348,12 @@ pub enum Mutability { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum ExprVstore { - ExprVstoreUniq, // ~[1,2,3,4] - ExprVstoreSlice, // &[1,2,3,4] - ExprVstoreMutSlice, // &mut [1,2,3,4] + /// ~[1, 2, 3, 4] + ExprVstoreUniq, + /// &[1, 2, 3, 4] + ExprVstoreSlice, + /// &mut [1, 2, 3, 4] + ExprVstoreMutSlice, } #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] @@ -359,16 +391,16 @@ pub type Stmt = Spanned; #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Stmt_ { - // could be an item or a local (let) binding: + /// Could be an item or a local (let) binding: StmtDecl(Gc, NodeId), - // expr without trailing semi-colon (must have unit type): + /// Expr without trailing semi-colon (must have unit type): StmtExpr(Gc, NodeId), - // expr with trailing semi-colon (may have any type): + /// Expr with trailing semi-colon (may have any type): StmtSemi(Gc, NodeId), - // bool: is there a trailing sem-colon? + /// bool: is there a trailing sem-colon? StmtMac(Mac, bool), } @@ -397,9 +429,9 @@ pub type Decl = Spanned; #[deriving(PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Decl_ { - // a local (let) binding: + /// A local (let) binding: DeclLocal(Gc), - // an item binding: + /// An item binding: DeclItem(Gc), } @@ -443,7 +475,7 @@ pub struct Expr { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Expr_ { ExprVstore(Gc, ExprVstore), - // First expr is the place; second expr is the value. + /// First expr is the place; second expr is the value. ExprBox(Gc, Gc), ExprVec(Vec>), ExprCall(Gc, Vec>), @@ -483,130 +515,127 @@ pub enum Expr_ { ExprMac(Mac), - // A struct literal expression. + /// A struct literal expression. ExprStruct(Path, Vec , Option> /* base */), - // A vector literal constructed from one repeated element. + /// A vector literal constructed from one repeated element. ExprRepeat(Gc /* element */, Gc /* count */), - // No-op: used solely so we can pretty-print faithfully + /// No-op: used solely so we can pretty-print faithfully ExprParen(Gc) } -// When the main rust parser encounters a syntax-extension invocation, it -// parses the arguments to the invocation as a token-tree. This is a very -// loose structure, such that all sorts of different AST-fragments can -// be passed to syntax extensions using a uniform type. -// -// If the syntax extension is an MBE macro, it will attempt to match its -// LHS "matchers" against the provided token tree, and if it finds a -// match, will transcribe the RHS token tree, splicing in any captured -// macro_parser::matched_nonterminals into the TTNonterminals it finds. -// -// The RHS of an MBE macro is the only place a TTNonterminal or TTSeq -// makes any real sense. You could write them elsewhere but nothing -// else knows what to do with them, so you'll probably get a syntax -// error. -// +/// When the main rust parser encounters a syntax-extension invocation, it +/// parses the arguments to the invocation as a token-tree. This is a very +/// loose structure, such that all sorts of different AST-fragments can +/// be passed to syntax extensions using a uniform type. +/// +/// If the syntax extension is an MBE macro, it will attempt to match its +/// LHS "matchers" against the provided token tree, and if it finds a +/// match, will transcribe the RHS token tree, splicing in any captured +/// macro_parser::matched_nonterminals into the TTNonterminals it finds. +/// +/// The RHS of an MBE macro is the only place a TTNonterminal or TTSeq +/// makes any real sense. You could write them elsewhere but nothing +/// else knows what to do with them, so you'll probably get a syntax +/// error. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] #[doc="For macro invocations; parsing is delegated to the macro"] pub enum TokenTree { - // a single token + /// A single token TTTok(Span, ::parse::token::Token), - // a delimited sequence (the delimiters appear as the first - // and last elements of the vector) + /// A delimited sequence (the delimiters appear as the first + /// and last elements of the vector) // FIXME(eddyb) #6308 Use Rc<[TokenTree]> after DST. TTDelim(Rc>), // These only make sense for right-hand-sides of MBE macros: - // a kleene-style repetition sequence with a span, a TTForest, - // an optional separator, and a boolean where true indicates - // zero or more (..), and false indicates one or more (+). + /// A kleene-style repetition sequence with a span, a TTForest, + /// an optional separator, and a boolean where true indicates + /// zero or more (..), and false indicates one or more (+). // FIXME(eddyb) #6308 Use Rc<[TokenTree]> after DST. TTSeq(Span, Rc>, Option<::parse::token::Token>, bool), - // a syntactic variable that will be filled in by macro expansion. + /// A syntactic variable that will be filled in by macro expansion. TTNonterminal(Span, Ident) } -// -// Matchers are nodes defined-by and recognized-by the main rust parser and -// language, but they're only ever found inside syntax-extension invocations; -// indeed, the only thing that ever _activates_ the rules in the rust parser -// for parsing a matcher is a matcher looking for the 'matchers' nonterminal -// itself. Matchers represent a small sub-language for pattern-matching -// token-trees, and are thus primarily used by the macro-defining extension -// itself. -// -// MatchTok -// -------- -// -// A matcher that matches a single token, denoted by the token itself. So -// long as there's no $ involved. -// -// -// MatchSeq -// -------- -// -// A matcher that matches a sequence of sub-matchers, denoted various -// possible ways: -// -// $(M)* zero or more Ms -// $(M)+ one or more Ms -// $(M),+ one or more comma-separated Ms -// $(A B C);* zero or more semi-separated 'A B C' seqs -// -// -// MatchNonterminal -// ----------------- -// -// A matcher that matches one of a few interesting named rust -// nonterminals, such as types, expressions, items, or raw token-trees. A -// black-box matcher on expr, for example, binds an expr to a given ident, -// and that ident can re-occur as an interpolation in the RHS of a -// macro-by-example rule. For example: -// -// $foo:expr => 1 + $foo // interpolate an expr -// $foo:tt => $foo // interpolate a token-tree -// $foo:tt => bar! $foo // only other valid interpolation -// // is in arg position for another -// // macro -// -// As a final, horrifying aside, note that macro-by-example's input is -// also matched by one of these matchers. Holy self-referential! It is matched -// by a MatchSeq, specifically this one: -// -// $( $lhs:matchers => $rhs:tt );+ -// -// If you understand that, you have closed to loop and understand the whole -// macro system. Congratulations. -// +/// Matchers are nodes defined-by and recognized-by the main rust parser and +/// language, but they're only ever found inside syntax-extension invocations; +/// indeed, the only thing that ever _activates_ the rules in the rust parser +/// for parsing a matcher is a matcher looking for the 'matchers' nonterminal +/// itself. Matchers represent a small sub-language for pattern-matching +/// token-trees, and are thus primarily used by the macro-defining extension +/// itself. +/// +/// MatchTok +/// -------- +/// +/// A matcher that matches a single token, denoted by the token itself. So +/// long as there's no $ involved. +/// +/// +/// MatchSeq +/// -------- +/// +/// A matcher that matches a sequence of sub-matchers, denoted various +/// possible ways: +/// +/// $(M)* zero or more Ms +/// $(M)+ one or more Ms +/// $(M),+ one or more comma-separated Ms +/// $(A B C);* zero or more semi-separated 'A B C' seqs +/// +/// +/// MatchNonterminal +/// ----------------- +/// +/// A matcher that matches one of a few interesting named rust +/// nonterminals, such as types, expressions, items, or raw token-trees. A +/// black-box matcher on expr, for example, binds an expr to a given ident, +/// and that ident can re-occur as an interpolation in the RHS of a +/// macro-by-example rule. For example: +/// +/// $foo:expr => 1 + $foo // interpolate an expr +/// $foo:tt => $foo // interpolate a token-tree +/// $foo:tt => bar! $foo // only other valid interpolation +/// // is in arg position for another +/// // macro +/// +/// As a final, horrifying aside, note that macro-by-example's input is +/// also matched by one of these matchers. Holy self-referential! It is matched +/// by a MatchSeq, specifically this one: +/// +/// $( $lhs:matchers => $rhs:tt );+ +/// +/// If you understand that, you have closed the loop and understand the whole +/// macro system. Congratulations. pub type Matcher = Spanned; #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Matcher_ { - // match one token + /// Match one token MatchTok(::parse::token::Token), - // match repetitions of a sequence: body, separator, zero ok?, - // lo, hi position-in-match-array used: + /// Match repetitions of a sequence: body, separator, zero ok?, + /// lo, hi position-in-match-array used: MatchSeq(Vec , Option<::parse::token::Token>, bool, uint, uint), - // parse a Rust NT: name to bind, name of NT, position in match array: + /// Parse a Rust NT: name to bind, name of NT, position in match array: MatchNonterminal(Ident, Ident, uint) } pub type Mac = Spanned; -// represents a macro invocation. The Path indicates which macro -// is being invoked, and the vector of token-trees contains the source -// of the macro invocation. -// There's only one flavor, now, so this could presumably be simplified. +/// Represents a macro invocation. The Path indicates which macro +/// is being invoked, and the vector of token-trees contains the source +/// of the macro invocation. +/// There's only one flavor, now, so this could presumably be simplified. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Mac_ { MacInvocTT(Path, Vec , SyntaxContext), // new macro-invocation } -#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] +#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)] pub enum StrStyle { CookedStr, RawStr(uint) @@ -614,7 +643,7 @@ pub enum StrStyle { pub type Lit = Spanned; -#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] +#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)] pub enum Lit_ { LitStr(InternedString, StrStyle), LitBinary(Rc >), @@ -659,11 +688,10 @@ pub struct TypeMethod { pub vis: Visibility, } -/// Represents a method declaration in a trait declaration, possibly -/// including a default implementation -// A trait method is either required (meaning it doesn't have an -// implementation, just a signature) or provided (meaning it has a default -// implementation). +/// Represents a method declaration in a trait declaration, possibly including +/// a default implementation A trait method is either required (meaning it +/// doesn't have an implementation, just a signature) or provided (meaning it +/// has a default implementation). #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum TraitMethod { Required(TypeMethod), @@ -685,6 +713,16 @@ impl fmt::Show for IntTy { } } +impl IntTy { + pub fn suffix_len(&self) -> uint { + match *self { + TyI => 1, + TyI8 => 2, + TyI16 | TyI32 | TyI64 => 3, + } + } +} + #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum UintTy { TyU, @@ -694,6 +732,16 @@ pub enum UintTy { TyU64, } +impl UintTy { + pub fn suffix_len(&self) -> uint { + match *self { + TyU => 1, + TyU8 => 2, + TyU16 | TyU32 | TyU64 => 3, + } + } +} + impl fmt::Show for UintTy { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", ast_util::uint_ty_to_string(*self, None)) @@ -712,6 +760,14 @@ impl fmt::Show for FloatTy { } } +impl FloatTy { + pub fn suffix_len(&self) -> uint { + match *self { + TyF32 | TyF64 => 3, // add F128 handling here + } + } +} + // NB PartialEq method appears below. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct Ty { @@ -720,7 +776,7 @@ pub struct Ty { pub span: Span, } -// Not represented directly in the AST, referred to by name through a ty_path. +/// Not represented directly in the AST, referred to by name through a ty_path. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum PrimTy { TyInt(IntTy), @@ -753,10 +809,10 @@ pub struct ClosureTy { pub fn_style: FnStyle, pub onceness: Onceness, pub decl: P, - // Optional optvec distinguishes between "fn()" and "fn:()" so we can - // implement issue #7264. None means "fn()", which means infer a default - // bound based on pointer sigil during typeck. Some(Empty) means "fn:()", - // which means use no bounds (e.g., not even Owned on a ~fn()). + /// Optional optvec distinguishes between "fn()" and "fn:()" so we can + /// implement issue #7264. None means "fn()", which means infer a default + /// bound based on pointer sigil during typeck. Some(Empty) means "fn:()", + /// which means use no bounds (e.g., not even Owned on a ~fn()). pub bounds: Option>, } @@ -789,11 +845,11 @@ pub enum Ty_ { TyUnboxedFn(Gc), TyTup(Vec> ), TyPath(Path, Option>, NodeId), // for #7264; see above - // No-op; kept solely so that we can pretty-print faithfully + /// No-op; kept solely so that we can pretty-print faithfully TyParen(P), TyTypeof(Gc), - // TyInfer means the type should be inferred instead of it having been - // specified. This can appear anywhere in a type. + /// TyInfer means the type should be inferred instead of it having been + /// specified. This can appear anywhere in a type. TyInfer, } @@ -854,8 +910,10 @@ pub struct FnDecl { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum FnStyle { - UnsafeFn, // declared with "unsafe fn" - NormalFn, // declared with "fn" + /// Declared with "unsafe fn" + UnsafeFn, + /// Declared with "fn" + NormalFn, } impl fmt::Show for FnStyle { @@ -869,18 +927,24 @@ impl fmt::Show for FnStyle { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum RetStyle { - NoReturn, // functions with return type _|_ that always - // raise an error or exit (i.e. never return to the caller) - Return, // everything else + /// Functions with return type ! that always + /// raise an error or exit (i.e. never return to the caller) + NoReturn, + /// Everything else + Return, } /// Represents the kind of 'self' associated with a method #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum ExplicitSelf_ { - SelfStatic, // no self - SelfValue(Ident), // `self` - SelfRegion(Option, Mutability, Ident), // `&'lt self`, `&'lt mut self` - SelfUniq(Ident), // `~self` + /// No self + SelfStatic, + /// `self + SelfValue(Ident), + /// `&'lt self`, `&'lt mut self` + SelfRegion(Option, Mutability, Ident), + /// `~self` + SelfUniq(Ident) } pub type ExplicitSelf = Spanned; @@ -959,17 +1023,17 @@ pub type ViewPath = Spanned; #[deriving(PartialEq, Eq, Encodable, Decodable, Hash)] pub enum ViewPath_ { - // quux = foo::bar::baz - // - // or just - // - // foo::bar::baz (with 'baz =' implicitly on the left) + /// `quux = foo::bar::baz` + /// + /// or just + /// + /// `foo::bar::baz ` (with 'baz =' implicitly on the left) ViewPathSimple(Ident, Path, NodeId), - // foo::bar::* + /// `foo::bar::*` ViewPathGlob(Path, NodeId), - // foo::bar::{a,b,c} + /// `foo::bar::{a,b,c}` ViewPathList(Path, Vec , NodeId) } @@ -983,20 +1047,20 @@ pub struct ViewItem { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum ViewItem_ { - // ident: name used to refer to this crate in the code - // optional (InternedString,StrStyle): if present, this is a location - // (containing arbitrary characters) from which to fetch the crate sources - // For example, extern crate whatever = "github.com/rust-lang/rust" + /// Ident: name used to refer to this crate in the code + /// optional (InternedString,StrStyle): if present, this is a location + /// (containing arbitrary characters) from which to fetch the crate sources + /// For example, extern crate whatever = "github.com/rust-lang/rust" ViewItemExternCrate(Ident, Option<(InternedString,StrStyle)>, NodeId), ViewItemUse(Gc), } -// Meta-data associated with an item +/// Meta-data associated with an item pub type Attribute = Spanned; -// Distinguishes between Attributes that decorate items and Attributes that -// are contained as statements within items. These two cases need to be -// distinguished for pretty-printing. +/// Distinguishes between Attributes that decorate items and Attributes that +/// are contained as statements within items. These two cases need to be +/// distinguished for pretty-printing. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum AttrStyle { AttrOuter, @@ -1006,7 +1070,7 @@ pub enum AttrStyle { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct AttrId(pub uint); -// doc-comments are promoted to attributes that have is_sugared_doc = true +/// Doc-comments are promoted to attributes that have is_sugared_doc = true #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct Attribute_ { pub id: AttrId, @@ -1015,13 +1079,12 @@ pub struct Attribute_ { pub is_sugared_doc: bool, } -/* - TraitRef's appear in impls. - resolve maps each TraitRef's ref_id to its defining trait; that's all - that the ref_id is for. The impl_id maps to the "self type" of this impl. - If this impl is an ItemImpl, the impl_id is redundant (it could be the - same as the impl's node id). - */ + +/// TraitRef's appear in impls. +/// resolve maps each TraitRef's ref_id to its defining trait; that's all +/// that the ref_id is for. The impl_id maps to the "self type" of this impl. +/// If this impl is an ItemImpl, the impl_id is redundant (it could be the +/// same as the impl's node id). #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct TraitRef { pub path: Path, @@ -1065,7 +1128,8 @@ pub type StructField = Spanned; #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum StructFieldKind { NamedField(Ident, Visibility), - UnnamedField(Visibility), // element of a tuple-like struct + /// Element of a tuple-like struct + UnnamedField(Visibility), } impl StructFieldKind { @@ -1079,12 +1143,15 @@ impl StructFieldKind { #[deriving(PartialEq, Eq, Encodable, Decodable, Hash)] pub struct StructDef { - pub fields: Vec, /* fields, not including ctor */ - /* ID of the constructor. This is only used for tuple- or enum-like - * structs. */ + /// Fields, not including ctor + pub fields: Vec, + /// ID of the constructor. This is only used for tuple- or enum-like + /// structs. pub ctor_id: Option, - pub super_struct: Option>, // Super struct, if specified. - pub is_virtual: bool, // True iff the struct may be inherited from. + /// Super struct, if specified. + pub super_struct: Option>, + /// True iff the struct may be inherited from. + pub is_virtual: bool, } /* @@ -1120,7 +1187,7 @@ pub enum Item_ { Option, // (optional) trait this impl implements P, // self Vec>), - // a macro invocation (which includes macro definition) + /// A macro invocation (which includes macro definition) ItemMac(Mac), } @@ -1140,9 +1207,9 @@ pub enum ForeignItem_ { ForeignItemStatic(P, /* is_mutbl */ bool), } -// The data we save and restore about an inlined item or method. This is not -// part of the AST that we parse from a file, but it becomes part of the tree -// that we trans. +/// The data we save and restore about an inlined item or method. This is not +/// part of the AST that we parse from a file, but it becomes part of the tree +/// that we trans. #[deriving(PartialEq, Eq, Encodable, Decodable, Hash)] pub enum InlinedItem { IIItem(Gc), diff --git a/src/libsyntax/ast_map.rs b/src/libsyntax/ast_map.rs index c95ea4a24aadb..25c8e81bdbc91 100644 --- a/src/libsyntax/ast_map.rs +++ b/src/libsyntax/ast_map.rs @@ -112,13 +112,13 @@ pub enum Node { NodeLifetime(Gc), } -// The odd layout is to bring down the total size. +/// The odd layout is to bring down the total size. #[deriving(Clone)] enum MapEntry { - // Placeholder for holes in the map. + /// Placeholder for holes in the map. NotPresent, - // All the node types, with a parent ID. + /// All the node types, with a parent ID. EntryItem(NodeId, Gc), EntryForeignItem(NodeId, Gc), EntryTraitMethod(NodeId, Gc), @@ -133,14 +133,14 @@ enum MapEntry { EntryStructCtor(NodeId, Gc), EntryLifetime(NodeId, Gc), - // Roots for node trees. + /// Roots for node trees. RootCrate, RootInlinedParent(P) } struct InlinedParent { path: Vec , - // Required by NodeTraitMethod and NodeMethod. + /// Required by NodeTraitMethod and NodeMethod. def_id: DefId } @@ -243,7 +243,7 @@ impl Map { ItemForeignMod(ref nm) => Some(nm.abi), _ => None }, - // Wrong but OK, because the only inlined foreign items are intrinsics. + /// Wrong but OK, because the only inlined foreign items are intrinsics. Some(RootInlinedParent(_)) => Some(abi::RustIntrinsic), _ => None }; @@ -432,8 +432,8 @@ pub trait FoldOps { pub struct Ctx<'a, F> { map: &'a Map, - // The node in which we are currently mapping (an item or a method). - // When equal to DUMMY_NODE_ID, the next mapped node becomes the parent. + /// The node in which we are currently mapping (an item or a method). + /// When equal to DUMMY_NODE_ID, the next mapped node becomes the parent. parent: NodeId, fold_ops: F } @@ -618,9 +618,9 @@ pub fn map_crate(krate: Crate, fold_ops: F) -> (Crate, Map) { (krate, map) } -// Used for items loaded from external crate that are being inlined into this -// crate. The `path` should be the path to the item but should not include -// the item itself. +/// Used for items loaded from external crate that are being inlined into this +/// crate. The `path` should be the path to the item but should not include +/// the item itself. pub fn map_decoded_item(map: &Map, path: Vec , fold_ops: F, diff --git a/src/libsyntax/ast_util.rs b/src/libsyntax/ast_util.rs index 57c60b4a94903..13fe8a1506459 100644 --- a/src/libsyntax/ast_util.rs +++ b/src/libsyntax/ast_util.rs @@ -101,8 +101,8 @@ pub fn is_path(e: Gc) -> bool { return match e.node { ExprPath(_) => true, _ => false }; } -// Get a string representation of a signed int type, with its value. -// We want to avoid "45int" and "-3int" in favor of "45" and "-3" +/// Get a string representation of a signed int type, with its value. +/// We want to avoid "45int" and "-3int" in favor of "45" and "-3" pub fn int_ty_to_string(t: IntTy, val: Option) -> String { let s = match t { TyI if val.is_some() => "i", @@ -131,8 +131,8 @@ pub fn int_ty_max(t: IntTy) -> u64 { } } -// Get a string representation of an unsigned int type, with its value. -// We want to avoid "42uint" in favor of "42u" +/// Get a string representation of an unsigned int type, with its value. +/// We want to avoid "42uint" in favor of "42u" pub fn uint_ty_to_string(t: UintTy, val: Option) -> String { let s = match t { TyU if val.is_some() => "u", @@ -249,8 +249,8 @@ pub fn public_methods(ms: Vec> ) -> Vec> { }).collect() } -// extract a TypeMethod from a TraitMethod. if the TraitMethod is -// a default, pull out the useful fields to make a TypeMethod +/// extract a TypeMethod from a TraitMethod. if the TraitMethod is +/// a default, pull out the useful fields to make a TypeMethod pub fn trait_method_to_ty_method(method: &TraitMethod) -> TypeMethod { match *method { Required(ref m) => (*m).clone(), @@ -705,7 +705,7 @@ pub fn segments_name_eq(a : &[ast::PathSegment], b : &[ast::PathSegment]) -> boo } } -// Returns true if this literal is a string and false otherwise. +/// Returns true if this literal is a string and false otherwise. pub fn lit_is_str(lit: Gc) -> bool { match lit.node { LitStr(..) => true, @@ -754,14 +754,14 @@ mod test { #[test] fn idents_name_eq_test() { assert!(segments_name_eq( - [Ident{name:3,ctxt:4}, Ident{name:78,ctxt:82}] + [Ident{name:Name(3),ctxt:4}, Ident{name:Name(78),ctxt:82}] .iter().map(ident_to_segment).collect::>().as_slice(), - [Ident{name:3,ctxt:104}, Ident{name:78,ctxt:182}] + [Ident{name:Name(3),ctxt:104}, Ident{name:Name(78),ctxt:182}] .iter().map(ident_to_segment).collect::>().as_slice())); assert!(!segments_name_eq( - [Ident{name:3,ctxt:4}, Ident{name:78,ctxt:82}] + [Ident{name:Name(3),ctxt:4}, Ident{name:Name(78),ctxt:82}] .iter().map(ident_to_segment).collect::>().as_slice(), - [Ident{name:3,ctxt:104}, Ident{name:77,ctxt:182}] + [Ident{name:Name(3),ctxt:104}, Ident{name:Name(77),ctxt:182}] .iter().map(ident_to_segment).collect::>().as_slice())); } } diff --git a/src/libsyntax/attr.rs b/src/libsyntax/attr.rs index 3b2ee4e2a6134..e8b9ec9628f7d 100644 --- a/src/libsyntax/attr.rs +++ b/src/libsyntax/attr.rs @@ -46,10 +46,8 @@ pub trait AttrMetaMethods { /// #[foo="bar"] and #[foo(bar)] fn name(&self) -> InternedString; - /** - * Gets the string value if self is a MetaNameValue variant - * containing a string, otherwise None. - */ + /// Gets the string value if self is a MetaNameValue variant + /// containing a string, otherwise None. fn value_str(&self) -> Option; /// Gets a list of inner meta items from a list MetaItem type. fn meta_item_list<'a>(&'a self) -> Option<&'a [Gc]>; @@ -420,18 +418,16 @@ pub fn require_unique_names(diagnostic: &SpanHandler, metas: &[Gc]) { } -/** - * Fold this over attributes to parse #[repr(...)] forms. - * - * Valid repr contents: any of the primitive integral type names (see - * `int_type_of_word`, below) to specify the discriminant type; and `C`, to use - * the same discriminant size that the corresponding C enum would. These are - * not allowed on univariant or zero-variant enums, which have no discriminant. - * - * If a discriminant type is so specified, then the discriminant will be - * present (before fields, if any) with that type; reprensentation - * optimizations which would remove it will not be done. - */ +/// Fold this over attributes to parse #[repr(...)] forms. +/// +/// Valid repr contents: any of the primitive integral type names (see +/// `int_type_of_word`, below) to specify the discriminant type; and `C`, to use +/// the same discriminant size that the corresponding C enum would. These are +/// not allowed on univariant or zero-variant enums, which have no discriminant. +/// +/// If a discriminant type is so specified, then the discriminant will be +/// present (before fields, if any) with that type; reprensentation +/// optimizations which would remove it will not be done. pub fn find_repr_attr(diagnostic: &SpanHandler, attr: &Attribute, acc: ReprAttr) -> ReprAttr { let mut acc = acc; diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index b3adf1daf418c..ef4024a8f83fe 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -96,7 +96,7 @@ pub struct Span { pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_info: None }; -#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] +#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)] pub struct Spanned { pub node: T, pub span: Span, @@ -252,15 +252,15 @@ pub struct FileMap { } impl FileMap { - // EFFECT: register a start-of-line offset in the - // table of line-beginnings. - // UNCHECKED INVARIANT: these offsets must be added in the right - // order and must be in the right places; there is shared knowledge - // about what ends a line between this file and parse.rs - // WARNING: pos param here is the offset relative to start of CodeMap, - // and CodeMap will append a newline when adding a filemap without a newline at the end, - // so the safe way to call this is with value calculated as - // filemap.start_pos + newline_offset_relative_to_the_start_of_filemap. + /// EFFECT: register a start-of-line offset in the + /// table of line-beginnings. + /// UNCHECKED INVARIANT: these offsets must be added in the right + /// order and must be in the right places; there is shared knowledge + /// about what ends a line between this file and parse.rs + /// WARNING: pos param here is the offset relative to start of CodeMap, + /// and CodeMap will append a newline when adding a filemap without a newline at the end, + /// so the safe way to call this is with value calculated as + /// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap. pub fn next_line(&self, pos: BytePos) { // the new charpos must be > the last one (or it's the first one). let mut lines = self.lines.borrow_mut();; @@ -269,7 +269,7 @@ impl FileMap { lines.push(pos); } - // get a line from the list of pre-computed line-beginnings + /// get a line from the list of pre-computed line-beginnings pub fn get_line(&self, line: int) -> String { let mut lines = self.lines.borrow_mut(); let begin: BytePos = *lines.get(line as uint) - self.start_pos; @@ -428,9 +428,8 @@ impl CodeMap { FileMapAndBytePos {fm: fm, pos: offset} } - // Converts an absolute BytePos to a CharPos relative to the filemap and above. + /// Converts an absolute BytePos to a CharPos relative to the filemap and above. pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { - debug!("codemap: converting {:?} to char pos", bpos); let idx = self.lookup_filemap_idx(bpos); let files = self.files.borrow(); let map = files.get(idx); @@ -439,7 +438,7 @@ impl CodeMap { let mut total_extra_bytes = 0; for mbc in map.multibyte_chars.borrow().iter() { - debug!("codemap: {:?}-byte char at {:?}", mbc.bytes, mbc.pos); + debug!("{}-byte char at {}", mbc.bytes, mbc.pos); if mbc.pos < bpos { // every character is at least one byte, so we only // count the actual extra bytes. @@ -514,11 +513,11 @@ impl CodeMap { let chpos = self.bytepos_to_file_charpos(pos); let linebpos = *f.lines.borrow().get(a); let linechpos = self.bytepos_to_file_charpos(linebpos); - debug!("codemap: byte pos {:?} is on the line at byte pos {:?}", + debug!("byte pos {} is on the line at byte pos {}", pos, linebpos); - debug!("codemap: char pos {:?} is on the line at char pos {:?}", + debug!("char pos {} is on the line at char pos {}", chpos, linechpos); - debug!("codemap: byte is on line: {:?}", line); + debug!("byte is on line: {}", line); assert!(chpos >= linechpos); Loc { file: f, diff --git a/src/libsyntax/diagnostic.rs b/src/libsyntax/diagnostic.rs index 3805390776e8d..e469f327ae8ba 100644 --- a/src/libsyntax/diagnostic.rs +++ b/src/libsyntax/diagnostic.rs @@ -21,7 +21,7 @@ use std::string::String; use term::WriterWrapper; use term; -// maximum number of lines we will print for each error; arbitrary. +/// maximum number of lines we will print for each error; arbitrary. static MAX_LINES: uint = 6u; #[deriving(Clone)] @@ -73,9 +73,9 @@ pub struct FatalError; /// or `.span_bug` rather than a failed assertion, etc. pub struct ExplicitBug; -// a span-handler is like a handler but also -// accepts span information for source-location -// reporting. +/// A span-handler is like a handler but also +/// accepts span information for source-location +/// reporting. pub struct SpanHandler { pub handler: Handler, pub cm: codemap::CodeMap, @@ -114,9 +114,9 @@ impl SpanHandler { } } -// a handler deals with errors; certain errors -// (fatal, bug, unimpl) may cause immediate exit, -// others log errors for later reporting. +/// A handler deals with errors; certain errors +/// (fatal, bug, unimpl) may cause immediate exit, +/// others log errors for later reporting. pub struct Handler { err_count: Cell, emit: RefCell>, @@ -442,12 +442,12 @@ fn highlight_lines(err: &mut EmitterWriter, Ok(()) } -// Here are the differences between this and the normal `highlight_lines`: -// `custom_highlight_lines` will always put arrow on the last byte of the -// span (instead of the first byte). Also, when the span is too long (more -// than 6 lines), `custom_highlight_lines` will print the first line, then -// dot dot dot, then last line, whereas `highlight_lines` prints the first -// six lines. +/// Here are the differences between this and the normal `highlight_lines`: +/// `custom_highlight_lines` will always put arrow on the last byte of the +/// span (instead of the first byte). Also, when the span is too long (more +/// than 6 lines), `custom_highlight_lines` will print the first line, then +/// dot dot dot, then last line, whereas `highlight_lines` prints the first +/// six lines. fn custom_highlight_lines(w: &mut EmitterWriter, cm: &codemap::CodeMap, sp: Span, diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index a2a442f8b6aa7..9a5c7e86d21c6 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -278,9 +278,9 @@ pub enum SyntaxExtension { pub type NamedSyntaxExtension = (Name, SyntaxExtension); pub struct BlockInfo { - // should macros escape from this scope? + /// Should macros escape from this scope? pub macros_escape: bool, - // what are the pending renames? + /// What are the pending renames? pub pending_renames: mtwt::RenameList, } @@ -293,8 +293,8 @@ impl BlockInfo { } } -// The base map of methods for expanding syntax extension -// AST nodes into full ASTs +/// The base map of methods for expanding syntax extension +/// AST nodes into full ASTs pub fn syntax_expander_table() -> SyntaxEnv { // utility function to simplify creating NormalTT syntax extensions fn builtin_normal_expander(f: MacroExpanderFn) -> SyntaxExtension { @@ -398,9 +398,9 @@ pub fn syntax_expander_table() -> SyntaxEnv { syntax_expanders } -// One of these is made during expansion and incrementally updated as we go; -// when a macro expansion occurs, the resulting nodes have the backtrace() -// -> expn_info of their expansion context stored into their span. +/// One of these is made during expansion and incrementally updated as we go; +/// when a macro expansion occurs, the resulting nodes have the backtrace() +/// -> expn_info of their expansion context stored into their span. pub struct ExtCtxt<'a> { pub parse_sess: &'a parse::ParseSess, pub cfg: ast::CrateConfig, @@ -535,6 +535,9 @@ impl<'a> ExtCtxt<'a> { pub fn ident_of(&self, st: &str) -> ast::Ident { str_to_ident(st) } + pub fn name_of(&self, st: &str) -> ast::Name { + token::intern(st) + } } /// Extract a string literal from the macro expanded version of `expr`, @@ -579,9 +582,9 @@ pub fn get_single_str_from_tts(cx: &ExtCtxt, cx.span_err(sp, format!("{} takes 1 argument.", name).as_slice()); } else { match tts[0] { - ast::TTTok(_, token::LIT_STR(ident)) - | ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => { - return Some(token::get_ident(ident).get().to_string()) + ast::TTTok(_, token::LIT_STR(ident)) => return Some(parse::str_lit(ident.as_str())), + ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => { + return Some(parse::raw_str_lit(ident.as_str())) } _ => { cx.span_err(sp, @@ -612,11 +615,11 @@ pub fn get_exprs_from_tts(cx: &mut ExtCtxt, Some(es) } -// in order to have some notion of scoping for macros, -// we want to implement the notion of a transformation -// environment. +/// In order to have some notion of scoping for macros, +/// we want to implement the notion of a transformation +/// environment. -// This environment maps Names to SyntaxExtensions. +/// This environment maps Names to SyntaxExtensions. //impl question: how to implement it? Initially, the // env will contain only macros, so it might be painful @@ -633,7 +636,6 @@ struct MapChainFrame { map: HashMap, } -// Only generic to make it easy to test pub struct SyntaxEnv { chain: Vec , } diff --git a/src/libsyntax/ext/deriving/encodable.rs b/src/libsyntax/ext/deriving/encodable.rs index 652d593c0042c..3b34407edfeaa 100644 --- a/src/libsyntax/ext/deriving/encodable.rs +++ b/src/libsyntax/ext/deriving/encodable.rs @@ -8,79 +8,76 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -The compiler code necessary to implement the `#[deriving(Encodable)]` -(and `Decodable`, in decodable.rs) extension. The idea here is that -type-defining items may be tagged with `#[deriving(Encodable, Decodable)]`. - -For example, a type like: - -```ignore -#[deriving(Encodable, Decodable)] -struct Node { id: uint } -``` - -would generate two implementations like: - -```ignore -impl Encodable for Node { - fn encode(&self, s: &S) { - s.emit_struct("Node", 1, || { - s.emit_field("id", 0, || s.emit_uint(self.id)) - }) - } -} - -impl Decodable for node_id { - fn decode(d: &D) -> Node { - d.read_struct("Node", 1, || { - Node { - id: d.read_field("x".to_string(), 0, || decode(d)) - } - }) - } -} -``` - -Other interesting scenarios are whe the item has type parameters or -references other non-built-in types. A type definition like: - -```ignore -#[deriving(Encodable, Decodable)] -struct spanned { node: T, span: Span } -``` - -would yield functions like: - -```ignore - impl< - S: Encoder, - T: Encodable - > spanned: Encodable { - fn encode(s: &S) { - s.emit_rec(|| { - s.emit_field("node", 0, || self.node.encode(s)); - s.emit_field("span", 1, || self.span.encode(s)); - }) - } - } - - impl< - D: Decoder, - T: Decodable - > spanned: Decodable { - fn decode(d: &D) -> spanned { - d.read_rec(|| { - { - node: d.read_field("node".to_string(), 0, || decode(d)), - span: d.read_field("span".to_string(), 1, || decode(d)), - } - }) - } - } -``` -*/ +//! The compiler code necessary to implement the `#[deriving(Encodable)]` +//! (and `Decodable`, in decodable.rs) extension. The idea here is that +//! type-defining items may be tagged with `#[deriving(Encodable, Decodable)]`. +//! +//! For example, a type like: +//! +//! ```ignore +//! #[deriving(Encodable, Decodable)] +//! struct Node { id: uint } +//! ``` +//! +//! would generate two implementations like: +//! +//! ```ignore +//! impl Encodable for Node { +//! fn encode(&self, s: &S) { +//! s.emit_struct("Node", 1, || { +//! s.emit_field("id", 0, || s.emit_uint(self.id)) +//! }) +//! } +//! } +//! +//! impl Decodable for node_id { +//! fn decode(d: &D) -> Node { +//! d.read_struct("Node", 1, || { +//! Node { +//! id: d.read_field("x".to_string(), 0, || decode(d)) +//! } +//! }) +//! } +//! } +//! ``` +//! +//! Other interesting scenarios are whe the item has type parameters or +//! references other non-built-in types. A type definition like: +//! +//! ```ignore +//! #[deriving(Encodable, Decodable)] +//! struct spanned { node: T, span: Span } +//! ``` +//! +//! would yield functions like: +//! +//! ```ignore +//! impl< +//! S: Encoder, +//! T: Encodable +//! > spanned: Encodable { +//! fn encode(s: &S) { +//! s.emit_rec(|| { +//! s.emit_field("node", 0, || self.node.encode(s)); +//! s.emit_field("span", 1, || self.span.encode(s)); +//! }) +//! } +//! } +//! +//! impl< +//! D: Decoder, +//! T: Decodable +//! > spanned: Decodable { +//! fn decode(d: &D) -> spanned { +//! d.read_rec(|| { +//! { +//! node: d.read_field("node".to_string(), 0, || decode(d)), +//! span: d.read_field("span".to_string(), 1, || decode(d)), +//! } +//! }) +//! } +//! } +//! ``` use ast::{MetaItem, Item, Expr, ExprRet, MutMutable, LitNil}; use codemap::Span; diff --git a/src/libsyntax/ext/deriving/generic/mod.rs b/src/libsyntax/ext/deriving/generic/mod.rs index 764c88cc954ed..c9f5936a9bb05 100644 --- a/src/libsyntax/ext/deriving/generic/mod.rs +++ b/src/libsyntax/ext/deriving/generic/mod.rs @@ -8,174 +8,170 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Some code that abstracts away much of the boilerplate of writing -`deriving` instances for traits. Among other things it manages getting -access to the fields of the 4 different sorts of structs and enum -variants, as well as creating the method and impl ast instances. - -Supported features (fairly exhaustive): - -- Methods taking any number of parameters of any type, and returning - any type, other than vectors, bottom and closures. -- Generating `impl`s for types with type parameters and lifetimes - (e.g. `Option`), the parameters are automatically given the - current trait as a bound. (This includes separate type parameters - and lifetimes for methods.) -- Additional bounds on the type parameters, e.g. the `Ord` instance - requires an explicit `PartialEq` bound at the - moment. (`TraitDef.additional_bounds`) - -Unsupported: FIXME #6257: calling methods on reference fields, -e.g. deriving Eq/Ord/Clone don't work on `struct A(&int)`, -because of how the auto-dereferencing happens. - -The most important thing for implementers is the `Substructure` and -`SubstructureFields` objects. The latter groups 5 possibilities of the -arguments: - -- `Struct`, when `Self` is a struct (including tuple structs, e.g - `struct T(int, char)`). -- `EnumMatching`, when `Self` is an enum and all the arguments are the - same variant of the enum (e.g. `Some(1)`, `Some(3)` and `Some(4)`) -- `EnumNonMatching` when `Self` is an enum and the arguments are not - the same variant (e.g. `None`, `Some(1)` and `None`). If - `const_nonmatching` is true, this will contain an empty list. -- `StaticEnum` and `StaticStruct` for static methods, where the type - being derived upon is either an enum or struct respectively. (Any - argument with type Self is just grouped among the non-self - arguments.) - -In the first two cases, the values from the corresponding fields in -all the arguments are grouped together. In the `EnumNonMatching` case -this isn't possible (different variants have different fields), so the -fields are grouped by which argument they come from. There are no -fields with values in the static cases, so these are treated entirely -differently. - -The non-static cases have `Option` in several places associated -with field `expr`s. This represents the name of the field it is -associated with. It is only not `None` when the associated field has -an identifier in the source code. For example, the `x`s in the -following snippet - -```rust -struct A { x : int } - -struct B(int); - -enum C { - C0(int), - C1 { x: int } -} -``` - -The `int`s in `B` and `C0` don't have an identifier, so the -`Option`s would be `None` for them. - -In the static cases, the structure is summarised, either into the just -spans of the fields or a list of spans and the field idents (for tuple -structs and record structs, respectively), or a list of these, for -enums (one for each variant). For empty struct and empty enum -variants, it is represented as a count of 0. - -# Examples - -The following simplified `PartialEq` is used for in-code examples: - -```rust -trait PartialEq { - fn eq(&self, other: &Self); -} -impl PartialEq for int { - fn eq(&self, other: &int) -> bool { - *self == *other - } -} -``` - -Some examples of the values of `SubstructureFields` follow, using the -above `PartialEq`, `A`, `B` and `C`. - -## Structs - -When generating the `expr` for the `A` impl, the `SubstructureFields` is - -~~~text -Struct(~[FieldInfo { - span: - name: Some(), - self_: , - other: ~[, - name: None, - - ~[] - }]) -~~~ - -## Enums - -When generating the `expr` for a call with `self == C0(a)` and `other -== C0(b)`, the SubstructureFields is - -~~~text -EnumMatching(0, , - ~[FieldInfo { - span: - name: None, - self_: , - other: ~[] - }]) -~~~ - -For `C1 {x}` and `C1 {x}`, - -~~~text -EnumMatching(1, , - ~[FieldInfo { - span: - name: Some(), - self_: , - other: ~[] - }]) -~~~ - -For `C0(a)` and `C1 {x}` , - -~~~text -EnumNonMatching(~[(0, , - ~[(, None, )]), - (1, , - ~[(, Some(), - )])]) -~~~ - -(and vice versa, but with the order of the outermost list flipped.) - -## Static - -A static method on the above would result in, - -~~~text -StaticStruct(, Named(~[(, )])) - -StaticStruct(, Unnamed(~[])) - -StaticEnum(, ~[(, , Unnamed(~[])), - (, , - Named(~[(, )]))]) -~~~ - -*/ +//! Some code that abstracts away much of the boilerplate of writing +//! `deriving` instances for traits. Among other things it manages getting +//! access to the fields of the 4 different sorts of structs and enum +//! variants, as well as creating the method and impl ast instances. +//! +//! Supported features (fairly exhaustive): +//! +//! - Methods taking any number of parameters of any type, and returning +//! any type, other than vectors, bottom and closures. +//! - Generating `impl`s for types with type parameters and lifetimes +//! (e.g. `Option`), the parameters are automatically given the +//! current trait as a bound. (This includes separate type parameters +//! and lifetimes for methods.) +//! - Additional bounds on the type parameters, e.g. the `Ord` instance +//! requires an explicit `PartialEq` bound at the +//! moment. (`TraitDef.additional_bounds`) +//! +//! Unsupported: FIXME #6257: calling methods on reference fields, +//! e.g. deriving Eq/Ord/Clone don't work on `struct A(&int)`, +//! because of how the auto-dereferencing happens. +//! +//! The most important thing for implementers is the `Substructure` and +//! `SubstructureFields` objects. The latter groups 5 possibilities of the +//! arguments: +//! +//! - `Struct`, when `Self` is a struct (including tuple structs, e.g +//! `struct T(int, char)`). +//! - `EnumMatching`, when `Self` is an enum and all the arguments are the +//! same variant of the enum (e.g. `Some(1)`, `Some(3)` and `Some(4)`) +//! - `EnumNonMatching` when `Self` is an enum and the arguments are not +//! the same variant (e.g. `None`, `Some(1)` and `None`). If +//! `const_nonmatching` is true, this will contain an empty list. +//! - `StaticEnum` and `StaticStruct` for static methods, where the type +//! being derived upon is either an enum or struct respectively. (Any +//! argument with type Self is just grouped among the non-self +//! arguments.) +//! +//! In the first two cases, the values from the corresponding fields in +//! all the arguments are grouped together. In the `EnumNonMatching` case +//! this isn't possible (different variants have different fields), so the +//! fields are grouped by which argument they come from. There are no +//! fields with values in the static cases, so these are treated entirely +//! differently. +//! +//! The non-static cases have `Option` in several places associated +//! with field `expr`s. This represents the name of the field it is +//! associated with. It is only not `None` when the associated field has +//! an identifier in the source code. For example, the `x`s in the +//! following snippet +//! +//! ```rust +//! struct A { x : int } +//! +//! struct B(int); +//! +//! enum C { +//! C0(int), +//! C1 { x: int } +//! } +//! ``` +//! +//! The `int`s in `B` and `C0` don't have an identifier, so the +//! `Option`s would be `None` for them. +//! +//! In the static cases, the structure is summarised, either into the just +//! spans of the fields or a list of spans and the field idents (for tuple +//! structs and record structs, respectively), or a list of these, for +//! enums (one for each variant). For empty struct and empty enum +//! variants, it is represented as a count of 0. +//! +//! # Examples +//! +//! The following simplified `PartialEq` is used for in-code examples: +//! +//! ```rust +//! trait PartialEq { +//! fn eq(&self, other: &Self); +//! } +//! impl PartialEq for int { +//! fn eq(&self, other: &int) -> bool { +//! *self == *other +//! } +//! } +//! ``` +//! +//! Some examples of the values of `SubstructureFields` follow, using the +//! above `PartialEq`, `A`, `B` and `C`. +//! +//! ## Structs +//! +//! When generating the `expr` for the `A` impl, the `SubstructureFields` is +//! +//! ~~~text +//! Struct(~[FieldInfo { +//! span: +//! name: Some(), +//! self_: , +//! other: ~[, +//! name: None, +//! +//! ~[] +//! }]) +//! ~~~ +//! +//! ## Enums +//! +//! When generating the `expr` for a call with `self == C0(a)` and `other +//! == C0(b)`, the SubstructureFields is +//! +//! ~~~text +//! EnumMatching(0, , +//! ~[FieldInfo { +//! span: +//! name: None, +//! self_: , +//! other: ~[] +//! }]) +//! ~~~ +//! +//! For `C1 {x}` and `C1 {x}`, +//! +//! ~~~text +//! EnumMatching(1, , +//! ~[FieldInfo { +//! span: +//! name: Some(), +//! self_: , +//! other: ~[] +//! }]) +//! ~~~ +//! +//! For `C0(a)` and `C1 {x}` , +//! +//! ~~~text +//! EnumNonMatching(~[(0, , +//! ~[(, None, )]), +//! (1, , +//! ~[(, Some(), +//! )])]) +//! ~~~ +//! +//! (and vice versa, but with the order of the outermost list flipped.) +//! +//! ## Static +//! +//! A static method on the above would result in, +//! +//! ~~~text +//! StaticStruct(, Named(~[(, )])) +//! +//! StaticStruct(, Unnamed(~[])) +//! +//! StaticEnum(, ~[(, , Unnamed(~[])), +//! (, , +//! Named(~[(, )]))]) +//! ~~~ use std::cell::RefCell; use std::gc::{Gc, GC}; diff --git a/src/libsyntax/ext/deriving/generic/ty.rs b/src/libsyntax/ext/deriving/generic/ty.rs index b53281f99633f..f6a39d7b2e6c1 100644 --- a/src/libsyntax/ext/deriving/generic/ty.rs +++ b/src/libsyntax/ext/deriving/generic/ty.rs @@ -25,8 +25,10 @@ use std::gc::Gc; /// The types of pointers pub enum PtrTy<'a> { - Send, // ~ - Borrowed(Option<&'a str>, ast::Mutability), // &['lifetime] [mut] + /// ~ + Send, + /// &'lifetime mut + Borrowed(Option<&'a str>, ast::Mutability), } /// A path, e.g. `::std::option::Option::` (global). Has support @@ -83,12 +85,12 @@ impl<'a> Path<'a> { /// A type. Supports pointers (except for *), Self, and literals pub enum Ty<'a> { Self, - // &/Box/ Ty + /// &/Box/ Ty Ptr(Box>, PtrTy<'a>), - // mod::mod::Type<[lifetime], [Params...]>, including a plain type - // parameter, and things like `int` + /// mod::mod::Type<[lifetime], [Params...]>, including a plain type + /// parameter, and things like `int` Literal(Path<'a>), - // includes nil + /// includes unit Tuple(Vec> ) } diff --git a/src/libsyntax/ext/deriving/show.rs b/src/libsyntax/ext/deriving/show.rs index 8e673ff246598..05b5131d7e4d3 100644 --- a/src/libsyntax/ext/deriving/show.rs +++ b/src/libsyntax/ext/deriving/show.rs @@ -55,8 +55,8 @@ pub fn expand_deriving_show(cx: &mut ExtCtxt, trait_def.expand(cx, mitem, item, push) } -// we construct a format string and then defer to std::fmt, since that -// knows what's up with formatting at so on. +/// We construct a format string and then defer to std::fmt, since that +/// knows what's up with formatting and so on. fn show_substructure(cx: &mut ExtCtxt, span: Span, substr: &Substructure) -> Gc { // build ``, `({}, {}, ...)` or ` { : {}, diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index 9fe431cfb6c75..b7d72ae4debc1 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -246,11 +246,11 @@ pub fn expand_expr(e: Gc, fld: &mut MacroExpander) -> Gc { } } -// Rename loop label and expand its loop body -// -// The renaming procedure for loop is different in the sense that the loop -// body is in a block enclosed by loop head so the renaming of loop label -// must be propagated to the enclosed context. +/// Rename loop label and expand its loop body +/// +/// The renaming procedure for loop is different in the sense that the loop +/// body is in a block enclosed by loop head so the renaming of loop label +/// must be propagated to the enclosed context. fn expand_loop_block(loop_block: P, opt_ident: Option, fld: &mut MacroExpander) -> (P, Option) { @@ -1150,7 +1150,7 @@ mod test { use super::{pattern_bindings, expand_crate, contains_macro_escape}; use super::{PatIdentFinder, IdentRenamer, PatIdentRenamer}; use ast; - use ast::{Attribute_, AttrOuter, MetaWord}; + use ast::{Attribute_, AttrOuter, MetaWord, Name}; use attr; use codemap; use codemap::Spanned; @@ -1665,12 +1665,12 @@ foo_module!() let f_ident = token::str_to_ident("f"); let x_ident = token::str_to_ident("x"); let int_ident = token::str_to_ident("int"); - let renames = vec!((x_ident,16)); + let renames = vec!((x_ident,Name(16))); let mut renamer = IdentRenamer{renames: &renames}; let renamed_crate = renamer.fold_crate(the_crate); let idents = crate_idents(&renamed_crate); let resolved : Vec = idents.iter().map(|id| mtwt::resolve(*id)).collect(); - assert_eq!(resolved,vec!(f_ident.name,16,int_ident.name,16,16,16)); + assert_eq!(resolved,vec!(f_ident.name,Name(16),int_ident.name,Name(16),Name(16),Name(16))); } // test the PatIdentRenamer; only PatIdents get renamed @@ -1680,13 +1680,13 @@ foo_module!() let f_ident = token::str_to_ident("f"); let x_ident = token::str_to_ident("x"); let int_ident = token::str_to_ident("int"); - let renames = vec!((x_ident,16)); + let renames = vec!((x_ident,Name(16))); let mut renamer = PatIdentRenamer{renames: &renames}; let renamed_crate = renamer.fold_crate(the_crate); let idents = crate_idents(&renamed_crate); let resolved : Vec = idents.iter().map(|id| mtwt::resolve(*id)).collect(); let x_name = x_ident.name; - assert_eq!(resolved,vec!(f_ident.name,16,int_ident.name,16,x_name,x_name)); + assert_eq!(resolved,vec!(f_ident.name,Name(16),int_ident.name,Name(16),x_name,x_name)); } diff --git a/src/libsyntax/ext/format.rs b/src/libsyntax/ext/format.rs index f486d2de3398b..786fd953f8901 100644 --- a/src/libsyntax/ext/format.rs +++ b/src/libsyntax/ext/format.rs @@ -37,24 +37,24 @@ struct Context<'a, 'b> { ecx: &'a mut ExtCtxt<'b>, fmtsp: Span, - // Parsed argument expressions and the types that we've found so far for - // them. + /// Parsed argument expressions and the types that we've found so far for + /// them. args: Vec>, arg_types: Vec>, - // Parsed named expressions and the types that we've found for them so far. - // Note that we keep a side-array of the ordering of the named arguments - // found to be sure that we can translate them in the same order that they - // were declared in. + /// Parsed named expressions and the types that we've found for them so far. + /// Note that we keep a side-array of the ordering of the named arguments + /// found to be sure that we can translate them in the same order that they + /// were declared in. names: HashMap>, name_types: HashMap, name_ordering: Vec, - // Collection of the compiled `rt::Piece` structures + /// Collection of the compiled `rt::Piece` structures pieces: Vec>, name_positions: HashMap, method_statics: Vec>, - // Updated as arguments are consumed or methods are entered + /// Updated as arguments are consumed or methods are entered nest_level: uint, next_arg: uint, } diff --git a/src/libsyntax/ext/mtwt.rs b/src/libsyntax/ext/mtwt.rs index 18466e381a58b..2c94db5296750 100644 --- a/src/libsyntax/ext/mtwt.rs +++ b/src/libsyntax/ext/mtwt.rs @@ -21,16 +21,16 @@ use std::cell::RefCell; use std::rc::Rc; use std::collections::HashMap; -// the SCTable contains a table of SyntaxContext_'s. It -// represents a flattened tree structure, to avoid having -// managed pointers everywhere (that caused an ICE). -// the mark_memo and rename_memo fields are side-tables -// that ensure that adding the same mark to the same context -// gives you back the same context as before. This shouldn't -// change the semantics--everything here is immutable--but -// it should cut down on memory use *a lot*; applying a mark -// to a tree containing 50 identifiers would otherwise generate -// 50 new contexts +/// The SCTable contains a table of SyntaxContext_'s. It +/// represents a flattened tree structure, to avoid having +/// managed pointers everywhere (that caused an ICE). +/// the mark_memo and rename_memo fields are side-tables +/// that ensure that adding the same mark to the same context +/// gives you back the same context as before. This shouldn't +/// change the semantics--everything here is immutable--but +/// it should cut down on memory use *a lot*; applying a mark +/// to a tree containing 50 identifiers would otherwise generate +/// 50 new contexts pub struct SCTable { table: RefCell>, mark_memo: RefCell>, @@ -41,16 +41,16 @@ pub struct SCTable { pub enum SyntaxContext_ { EmptyCtxt, Mark (Mrk,SyntaxContext), - // flattening the name and syntaxcontext into the rename... - // HIDDEN INVARIANTS: - // 1) the first name in a Rename node - // can only be a programmer-supplied name. - // 2) Every Rename node with a given Name in the - // "to" slot must have the same name and context - // in the "from" slot. In essence, they're all - // pointers to a single "rename" event node. + /// flattening the name and syntaxcontext into the rename... + /// HIDDEN INVARIANTS: + /// 1) the first name in a Rename node + /// can only be a programmer-supplied name. + /// 2) Every Rename node with a given Name in the + /// "to" slot must have the same name and context + /// in the "from" slot. In essence, they're all + /// pointers to a single "rename" event node. Rename (Ident,Name,SyntaxContext), - // actually, IllegalCtxt may not be necessary. + /// actually, IllegalCtxt may not be necessary. IllegalCtxt } @@ -62,7 +62,7 @@ pub fn apply_mark(m: Mrk, ctxt: SyntaxContext) -> SyntaxContext { with_sctable(|table| apply_mark_internal(m, ctxt, table)) } -// Extend a syntax context with a given mark and sctable (explicit memoization) +/// Extend a syntax context with a given mark and sctable (explicit memoization) fn apply_mark_internal(m: Mrk, ctxt: SyntaxContext, table: &SCTable) -> SyntaxContext { let key = (ctxt, m); let new_ctxt = |_: &(SyntaxContext, Mrk)| @@ -77,13 +77,13 @@ pub fn apply_rename(id: Ident, to:Name, with_sctable(|table| apply_rename_internal(id, to, ctxt, table)) } -// Extend a syntax context with a given rename and sctable (explicit memoization) +/// Extend a syntax context with a given rename and sctable (explicit memoization) fn apply_rename_internal(id: Ident, to: Name, ctxt: SyntaxContext, table: &SCTable) -> SyntaxContext { - let key = (ctxt,id,to); - let new_ctxt = |_: &(SyntaxContext, Ident, Mrk)| + let key = (ctxt, id, to); + let new_ctxt = |_: &(SyntaxContext, Ident, Name)| idx_push(&mut *table.table.borrow_mut(), Rename(id, to, ctxt)); *table.rename_memo.borrow_mut().find_or_insert_with(key, new_ctxt) @@ -141,8 +141,8 @@ pub fn clear_tables() { with_resolve_table_mut(|table| *table = HashMap::new()); } -// Add a value to the end of a vec, return its index -fn idx_push(vec: &mut Vec , val: T) -> u32 { +/// Add a value to the end of a vec, return its index +fn idx_push(vec: &mut Vec, val: T) -> u32 { vec.push(val); (vec.len() - 1) as u32 } @@ -173,8 +173,8 @@ fn with_resolve_table_mut(op: |&mut ResolveTable| -> T) -> T { } } -// Resolve a syntax object to a name, per MTWT. -// adding memoization to resolve 500+ seconds in resolve for librustc (!) +/// Resolve a syntax object to a name, per MTWT. +/// adding memoization to resolve 500+ seconds in resolve for librustc (!) fn resolve_internal(id: Ident, table: &SCTable, resolve_table: &mut ResolveTable) -> Name { @@ -264,8 +264,8 @@ pub fn outer_mark(ctxt: SyntaxContext) -> Mrk { }) } -// Push a name... unless it matches the one on top, in which -// case pop and discard (so two of the same marks cancel) +/// Push a name... unless it matches the one on top, in which +/// case pop and discard (so two of the same marks cancel) fn xor_push(marks: &mut Vec, mark: Mrk) { if (marks.len() > 0) && (*marks.last().unwrap() == mark) { marks.pop().unwrap(); @@ -301,8 +301,8 @@ mod tests { assert_eq!(s.clone(), vec!(14)); } - fn id(n: Name, s: SyntaxContext) -> Ident { - Ident {name: n, ctxt: s} + fn id(n: u32, s: SyntaxContext) -> Ident { + Ident {name: Name(n), ctxt: s} } // because of the SCTable, I now need a tidy way of @@ -349,12 +349,12 @@ mod tests { fn test_unfold_refold(){ let mut t = new_sctable_internal(); - let test_sc = vec!(M(3),R(id(101,0),14),M(9)); + let test_sc = vec!(M(3),R(id(101,0),Name(14)),M(9)); assert_eq!(unfold_test_sc(test_sc.clone(),EMPTY_CTXT,&mut t),4); { let table = t.table.borrow(); assert!(*table.get(2) == Mark(9,0)); - assert!(*table.get(3) == Rename(id(101,0),14,2)); + assert!(*table.get(3) == Rename(id(101,0),Name(14),2)); assert!(*table.get(4) == Mark(3,3)); } assert_eq!(refold_test_sc(4,&t),test_sc); @@ -381,8 +381,8 @@ mod tests { #[test] fn test_marksof () { - let stopname = 242; - let name1 = 243; + let stopname = Name(242); + let name1 = Name(243); let mut t = new_sctable_internal(); assert_eq!(marksof_internal (EMPTY_CTXT,stopname,&t),Vec::new()); // FIXME #5074: ANF'd to dodge nested calls @@ -396,16 +396,16 @@ mod tests { assert_eq! (marksof_internal (ans, stopname,&t), vec!(16));} // rename where stop doesn't match: { let chain = vec!(M(9), - R(id(name1, + R(id(name1.uint() as u32, apply_mark_internal (4, EMPTY_CTXT,&mut t)), - 100101102), + Name(100101102)), M(14)); let ans = unfold_test_sc(chain,EMPTY_CTXT,&mut t); assert_eq! (marksof_internal (ans, stopname, &t), vec!(9,14));} // rename where stop does match { let name1sc = apply_mark_internal(4, EMPTY_CTXT, &mut t); let chain = vec!(M(9), - R(id(name1, name1sc), + R(id(name1.uint() as u32, name1sc), stopname), M(14)); let ans = unfold_test_sc(chain,EMPTY_CTXT,&mut t); @@ -419,55 +419,55 @@ mod tests { let mut t = new_sctable_internal(); let mut rt = HashMap::new(); // - ctxt is MT - assert_eq!(resolve_internal(id(a,EMPTY_CTXT),&mut t, &mut rt),a); + assert_eq!(resolve_internal(id(a,EMPTY_CTXT),&mut t, &mut rt),Name(a)); // - simple ignored marks { let sc = unfold_marks(vec!(1,2,3),EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),a);} + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),Name(a));} // - orthogonal rename where names don't match - { let sc = unfold_test_sc(vec!(R(id(50,EMPTY_CTXT),51),M(12)),EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),a);} + { let sc = unfold_test_sc(vec!(R(id(50,EMPTY_CTXT),Name(51)),M(12)),EMPTY_CTXT,&mut t); + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),Name(a));} // - rename where names do match, but marks don't { let sc1 = apply_mark_internal(1,EMPTY_CTXT,&mut t); - let sc = unfold_test_sc(vec!(R(id(a,sc1),50), + let sc = unfold_test_sc(vec!(R(id(a,sc1),Name(50)), M(1), M(2)), EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), a);} + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), Name(a));} // - rename where names and marks match { let sc1 = unfold_test_sc(vec!(M(1),M(2)),EMPTY_CTXT,&mut t); - let sc = unfold_test_sc(vec!(R(id(a,sc1),50),M(1),M(2)),EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), 50); } + let sc = unfold_test_sc(vec!(R(id(a,sc1),Name(50)),M(1),M(2)),EMPTY_CTXT,&mut t); + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), Name(50)); } // - rename where names and marks match by literal sharing { let sc1 = unfold_test_sc(vec!(M(1),M(2)),EMPTY_CTXT,&mut t); - let sc = unfold_test_sc(vec!(R(id(a,sc1),50)),sc1,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), 50); } + let sc = unfold_test_sc(vec!(R(id(a,sc1),Name(50))),sc1,&mut t); + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), Name(50)); } // - two renames of the same var.. can only happen if you use // local-expand to prevent the inner binding from being renamed // during the rename-pass caused by the first: println!("about to run bad test"); - { let sc = unfold_test_sc(vec!(R(id(a,EMPTY_CTXT),50), - R(id(a,EMPTY_CTXT),51)), + { let sc = unfold_test_sc(vec!(R(id(a,EMPTY_CTXT),Name(50)), + R(id(a,EMPTY_CTXT),Name(51))), EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), 51); } + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), Name(51)); } // the simplest double-rename: - { let a_to_a50 = apply_rename_internal(id(a,EMPTY_CTXT),50,EMPTY_CTXT,&mut t); - let a50_to_a51 = apply_rename_internal(id(a,a_to_a50),51,a_to_a50,&mut t); - assert_eq!(resolve_internal(id(a,a50_to_a51),&mut t, &mut rt),51); + { let a_to_a50 = apply_rename_internal(id(a,EMPTY_CTXT),Name(50),EMPTY_CTXT,&mut t); + let a50_to_a51 = apply_rename_internal(id(a,a_to_a50),Name(51),a_to_a50,&mut t); + assert_eq!(resolve_internal(id(a,a50_to_a51),&mut t, &mut rt),Name(51)); // mark on the outside doesn't stop rename: let sc = apply_mark_internal(9,a50_to_a51,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),51); + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),Name(51)); // but mark on the inside does: - let a50_to_a51_b = unfold_test_sc(vec!(R(id(a,a_to_a50),51), + let a50_to_a51_b = unfold_test_sc(vec!(R(id(a,a_to_a50),Name(51)), M(9)), a_to_a50, &mut t); - assert_eq!(resolve_internal(id(a,a50_to_a51_b),&mut t, &mut rt),50);} + assert_eq!(resolve_internal(id(a,a50_to_a51_b),&mut t, &mut rt),Name(50));} } #[test] fn mtwt_resolve_test(){ let a = 40; - assert_eq!(resolve(id(a,EMPTY_CTXT)),a); + assert_eq!(resolve(id(a,EMPTY_CTXT)),Name(a)); } @@ -496,10 +496,10 @@ mod tests { #[test] fn new_resolves_test() { - let renames = vec!((Ident{name:23,ctxt:EMPTY_CTXT},24), - (Ident{name:29,ctxt:EMPTY_CTXT},29)); + let renames = vec!((Ident{name:Name(23),ctxt:EMPTY_CTXT},Name(24)), + (Ident{name:Name(29),ctxt:EMPTY_CTXT},Name(29))); let new_ctxt1 = apply_renames(&renames,EMPTY_CTXT); - assert_eq!(resolve(Ident{name:23,ctxt:new_ctxt1}),24); - assert_eq!(resolve(Ident{name:29,ctxt:new_ctxt1}),29); + assert_eq!(resolve(Ident{name:Name(23),ctxt:new_ctxt1}),Name(24)); + assert_eq!(resolve(Ident{name:Name(29),ctxt:new_ctxt1}),Name(29)); } } diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index a3c901904a948..696d62838ba79 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -363,6 +363,15 @@ fn mk_ident(cx: &ExtCtxt, sp: Span, ident: ast::Ident) -> Gc { vec!(e_str)) } +// Lift a name to the expr that evaluates to that name +fn mk_name(cx: &ExtCtxt, sp: Span, ident: ast::Ident) -> Gc { + let e_str = cx.expr_str(sp, token::get_ident(ident)); + cx.expr_method_call(sp, + cx.expr_ident(sp, id_ext("ext_cx")), + id_ext("name_of"), + vec!(e_str)) +} + fn mk_ast_path(cx: &ExtCtxt, sp: Span, name: &str) -> Gc { let idents = vec!(id_ext("syntax"), id_ext("ast"), id_ext(name)); cx.expr_path(cx.path_global(sp, idents)) @@ -401,68 +410,37 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc { } LIT_BYTE(i) => { - let e_byte = cx.expr_lit(sp, ast::LitByte(i)); + let e_byte = mk_name(cx, sp, i.ident()); return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte)); } LIT_CHAR(i) => { - let e_char = cx.expr_lit(sp, ast::LitChar(i)); + let e_char = mk_name(cx, sp, i.ident()); return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_CHAR"), vec!(e_char)); } - LIT_INT(i, ity) => { - let s_ity = match ity { - ast::TyI => "TyI", - ast::TyI8 => "TyI8", - ast::TyI16 => "TyI16", - ast::TyI32 => "TyI32", - ast::TyI64 => "TyI64" - }; - let e_ity = mk_ast_path(cx, sp, s_ity); - let e_i64 = cx.expr_lit(sp, ast::LitInt(i, ast::TyI64)); - return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_INT"), vec!(e_i64, e_ity)); + LIT_INTEGER(i) => { + let e_int = mk_name(cx, sp, i.ident()); + return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_INTEGER"), vec!(e_int)); } - LIT_UINT(u, uty) => { - let s_uty = match uty { - ast::TyU => "TyU", - ast::TyU8 => "TyU8", - ast::TyU16 => "TyU16", - ast::TyU32 => "TyU32", - ast::TyU64 => "TyU64" - }; - let e_uty = mk_ast_path(cx, sp, s_uty); - let e_u64 = cx.expr_lit(sp, ast::LitUint(u, ast::TyU64)); - return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_UINT"), vec!(e_u64, e_uty)); - } - - LIT_INT_UNSUFFIXED(i) => { - let e_i64 = cx.expr_lit(sp, ast::LitInt(i, ast::TyI64)); - return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_INT_UNSUFFIXED"), vec!(e_i64)); - } - - LIT_FLOAT(fident, fty) => { - let s_fty = match fty { - ast::TyF32 => "TyF32", - ast::TyF64 => "TyF64", - }; - let e_fty = mk_ast_path(cx, sp, s_fty); - let e_fident = mk_ident(cx, sp, fident); - return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_FLOAT"), vec!(e_fident, e_fty)); + LIT_FLOAT(fident) => { + let e_fident = mk_name(cx, sp, fident.ident()); + return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_FLOAT"), vec!(e_fident)); } LIT_STR(ident) => { return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_STR"), - vec!(mk_ident(cx, sp, ident))); + vec!(mk_name(cx, sp, ident.ident()))); } LIT_STR_RAW(ident, n) => { return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_STR_RAW"), - vec!(mk_ident(cx, sp, ident), cx.expr_uint(sp, n))); + vec!(mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n))); } IDENT(ident, b) => { @@ -480,7 +458,7 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc { DOC_COMMENT(ident) => { return cx.expr_call(sp, mk_token_path(cx, sp, "DOC_COMMENT"), - vec!(mk_ident(cx, sp, ident))); + vec!(mk_name(cx, sp, ident.ident()))); } INTERPOLATED(_) => fail!("quote! with interpolated token"), diff --git a/src/libsyntax/ext/source_util.rs b/src/libsyntax/ext/source_util.rs index 8922f423aad31..5ac9dc86fcec2 100644 --- a/src/libsyntax/ext/source_util.rs +++ b/src/libsyntax/ext/source_util.rs @@ -28,7 +28,7 @@ use std::str; // the column/row/filename of the expression, or they include // a given file into the current one. -/* line!(): expands to the current line number */ +/// line!(): expands to the current line number pub fn expand_line(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) -> Box { base::check_zero_tts(cx, sp, tts, "line!"); @@ -49,9 +49,9 @@ pub fn expand_col(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) base::MacExpr::new(cx.expr_uint(topmost.call_site, loc.col.to_uint())) } -/* file!(): expands to the current filename */ -/* The filemap (`loc.file`) contains a bunch more information we could spit - * out if we wanted. */ +/// file!(): expands to the current filename */ +/// The filemap (`loc.file`) contains a bunch more information we could spit +/// out if we wanted. pub fn expand_file(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) -> Box { base::check_zero_tts(cx, sp, tts, "file!"); @@ -82,9 +82,9 @@ pub fn expand_mod(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) token::intern_and_get_ident(string.as_slice()))) } -// include! : parse the given file as an expr -// This is generally a bad idea because it's going to behave -// unhygienically. +/// include! : parse the given file as an expr +/// This is generally a bad idea because it's going to behave +/// unhygienically. pub fn expand_include(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) -> Box { let file = match get_single_str_from_tts(cx, sp, tts, "include!") { diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index b30ede70f0e4b..bdf1f6eb6007e 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -8,7 +8,72 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// Earley-like parser for macros. +//! This is an Earley-like parser, without support for in-grammar nonterminals, +//! only by calling out to the main rust parser for named nonterminals (which it +//! commits to fully when it hits one in a grammar). This means that there are no +//! completer or predictor rules, and therefore no need to store one column per +//! token: instead, there's a set of current Earley items and a set of next +//! ones. Instead of NTs, we have a special case for Kleene star. The big-O, in +//! pathological cases, is worse than traditional Earley parsing, but it's an +//! easier fit for Macro-by-Example-style rules, and I think the overhead is +//! lower. (In order to prevent the pathological case, we'd need to lazily +//! construct the resulting `NamedMatch`es at the very end. It'd be a pain, +//! and require more memory to keep around old items, but it would also save +//! overhead) +//! +//! Quick intro to how the parser works: +//! +//! A 'position' is a dot in the middle of a matcher, usually represented as a +//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. +//! +//! The parser walks through the input a character at a time, maintaining a list +//! of items consistent with the current position in the input string: `cur_eis`. +//! +//! As it processes them, it fills up `eof_eis` with items that would be valid if +//! the macro invocation is now over, `bb_eis` with items that are waiting on +//! a Rust nonterminal like `$e:expr`, and `next_eis` with items that are waiting +//! on the a particular token. Most of the logic concerns moving the · through the +//! repetitions indicated by Kleene stars. It only advances or calls out to the +//! real Rust parser when no `cur_eis` items remain +//! +//! Example: Start parsing `a a a a b` against [· a $( a )* a b]. +//! +//! Remaining input: `a a a a b` +//! next_eis: [· a $( a )* a b] +//! +//! - - - Advance over an `a`. - - - +//! +//! Remaining input: `a a a b` +//! cur: [a · $( a )* a b] +//! Descend/Skip (first item). +//! next: [a $( · a )* a b] [a $( a )* · a b]. +//! +//! - - - Advance over an `a`. - - - +//! +//! Remaining input: `a a b` +//! cur: [a $( a · )* a b] next: [a $( a )* a · b] +//! Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an `a`. - - - (this looks exactly like the last step) +//! +//! Remaining input: `a b` +//! cur: [a $( a · )* a b] next: [a $( a )* a · b] +//! Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an `a`. - - - (this looks exactly like the last step) +//! +//! Remaining input: `b` +//! cur: [a $( a · )* a b] next: [a $( a )* a · b] +//! Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] +//! +//! - - - Advance over a `b`. - - - +//! +//! Remaining input: `` +//! eof: [a $( a )* a b ·] + use ast; use ast::{Matcher, MatchTok, MatchSeq, MatchNonterminal, Ident}; @@ -25,75 +90,6 @@ use std::rc::Rc; use std::gc::GC; use std::collections::HashMap; -/* This is an Earley-like parser, without support for in-grammar nonterminals, -only by calling out to the main rust parser for named nonterminals (which it -commits to fully when it hits one in a grammar). This means that there are no -completer or predictor rules, and therefore no need to store one column per -token: instead, there's a set of current Earley items and a set of next -ones. Instead of NTs, we have a special case for Kleene star. The big-O, in -pathological cases, is worse than traditional Earley parsing, but it's an -easier fit for Macro-by-Example-style rules, and I think the overhead is -lower. (In order to prevent the pathological case, we'd need to lazily -construct the resulting `NamedMatch`es at the very end. It'd be a pain, -and require more memory to keep around old items, but it would also save -overhead)*/ - -/* Quick intro to how the parser works: - -A 'position' is a dot in the middle of a matcher, usually represented as a -dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. - -The parser walks through the input a character at a time, maintaining a list -of items consistent with the current position in the input string: `cur_eis`. - -As it processes them, it fills up `eof_eis` with items that would be valid if -the macro invocation is now over, `bb_eis` with items that are waiting on -a Rust nonterminal like `$e:expr`, and `next_eis` with items that are waiting -on the a particular token. Most of the logic concerns moving the · through the -repetitions indicated by Kleene stars. It only advances or calls out to the -real Rust parser when no `cur_eis` items remain - -Example: Start parsing `a a a a b` against [· a $( a )* a b]. - -Remaining input: `a a a a b` -next_eis: [· a $( a )* a b] - -- - - Advance over an `a`. - - - - -Remaining input: `a a a b` -cur: [a · $( a )* a b] -Descend/Skip (first item). -next: [a $( · a )* a b] [a $( a )* · a b]. - -- - - Advance over an `a`. - - - - -Remaining input: `a a b` -cur: [a $( a · )* a b] next: [a $( a )* a · b] -Finish/Repeat (first item) -next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] - -- - - Advance over an `a`. - - - (this looks exactly like the last step) - -Remaining input: `a b` -cur: [a $( a · )* a b] next: [a $( a )* a · b] -Finish/Repeat (first item) -next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] - -- - - Advance over an `a`. - - - (this looks exactly like the last step) - -Remaining input: `b` -cur: [a $( a · )* a b] next: [a $( a )* a · b] -Finish/Repeat (first item) -next: [a $( a )* · a b] [a $( · a )* a b] - -- - - Advance over a `b`. - - - - -Remaining input: `` -eof: [a $( a )* a b ·] - - */ - - /* to avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body. */ @@ -147,24 +143,24 @@ pub fn initial_matcher_pos(ms: Vec , sep: Option, lo: BytePos) } } -// NamedMatch is a pattern-match result for a single ast::MatchNonterminal: -// so it is associated with a single ident in a parse, and all -// MatchedNonterminal's in the NamedMatch have the same nonterminal type -// (expr, item, etc). All the leaves in a single NamedMatch correspond to a -// single matcher_nonterminal in the ast::Matcher that produced it. -// -// It should probably be renamed, it has more or less exact correspondence to -// ast::match nodes, and the in-memory structure of a particular NamedMatch -// represents the match that occurred when a particular subset of an -// ast::match -- those ast::Matcher nodes leading to a single -// MatchNonterminal -- was applied to a particular token tree. -// -// The width of each MatchedSeq in the NamedMatch, and the identity of the -// MatchedNonterminal's, will depend on the token tree it was applied to: each -// MatchedSeq corresponds to a single MatchSeq in the originating -// ast::Matcher. The depth of the NamedMatch structure will therefore depend -// only on the nesting depth of ast::MatchSeq's in the originating -// ast::Matcher it was derived from. +/// NamedMatch is a pattern-match result for a single ast::MatchNonterminal: +/// so it is associated with a single ident in a parse, and all +/// MatchedNonterminal's in the NamedMatch have the same nonterminal type +/// (expr, item, etc). All the leaves in a single NamedMatch correspond to a +/// single matcher_nonterminal in the ast::Matcher that produced it. +/// +/// It should probably be renamed, it has more or less exact correspondence to +/// ast::match nodes, and the in-memory structure of a particular NamedMatch +/// represents the match that occurred when a particular subset of an +/// ast::match -- those ast::Matcher nodes leading to a single +/// MatchNonterminal -- was applied to a particular token tree. +/// +/// The width of each MatchedSeq in the NamedMatch, and the identity of the +/// MatchedNonterminal's, will depend on the token tree it was applied to: each +/// MatchedSeq corresponds to a single MatchSeq in the originating +/// ast::Matcher. The depth of the NamedMatch structure will therefore depend +/// only on the nesting depth of ast::MatchSeq's in the originating +/// ast::Matcher it was derived from. pub enum NamedMatch { MatchedSeq(Vec>, codemap::Span), @@ -224,7 +220,8 @@ pub fn parse_or_else(sess: &ParseSess, } } -// perform a token equality check, ignoring syntax context (that is, an unhygienic comparison) +/// Perform a token equality check, ignoring syntax context (that is, an +/// unhygienic comparison) pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { match (t1,t2) { (&token::IDENT(id1,_),&token::IDENT(id2,_)) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 2b481cb0596e7..249e9305150d6 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -119,7 +119,7 @@ impl MacResult for MacroRulesDefiner { } } -// Given `lhses` and `rhses`, this is the new macro we create +/// Given `lhses` and `rhses`, this is the new macro we create fn generic_extension(cx: &ExtCtxt, sp: Span, name: Ident, @@ -193,9 +193,9 @@ fn generic_extension(cx: &ExtCtxt, cx.span_fatal(best_fail_spot, best_fail_msg.as_slice()); } -// this procedure performs the expansion of the -// macro_rules! macro. It parses the RHS and adds -// an extension to the current context. +/// This procedure performs the expansion of the +/// macro_rules! macro. It parses the RHS and adds +/// an extension to the current context. pub fn add_new_extension(cx: &mut ExtCtxt, sp: Span, name: Ident, diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index c0c066fe4668b..726a7315f6991 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -32,7 +32,7 @@ struct TtFrame { #[deriving(Clone)] pub struct TtReader<'a> { pub sp_diag: &'a SpanHandler, - // the unzipped tree: + /// the unzipped tree: stack: Vec, /* for MBE-style macro transcription */ interpolations: HashMap>, @@ -43,9 +43,9 @@ pub struct TtReader<'a> { pub cur_span: Span, } -/** This can do Macro-By-Example transcription. On the other hand, if - * `src` contains no `TTSeq`s and `TTNonterminal`s, `interp` can (and - * should) be none. */ +/// This can do Macro-By-Example transcription. On the other hand, if +/// `src` contains no `TTSeq`s and `TTNonterminal`s, `interp` can (and +/// should) be none. pub fn new_tt_reader<'a>(sp_diag: &'a SpanHandler, interp: Option>>, src: Vec ) @@ -138,8 +138,8 @@ fn lockstep_iter_size(t: &TokenTree, r: &TtReader) -> LockstepIterSize { } } -// return the next token from the TtReader. -// EFFECT: advances the reader's token field +/// Return the next token from the TtReader. +/// EFFECT: advances the reader's token field pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { // FIXME(pcwalton): Bad copy? let ret_val = TokenAndSpan { diff --git a/src/libsyntax/lib.rs b/src/libsyntax/lib.rs index 6df91c66a25e8..53ee991385ae3 100644 --- a/src/libsyntax/lib.rs +++ b/src/libsyntax/lib.rs @@ -8,15 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -The Rust parser and macro expander. - -# Note - -This API is completely unstable and subject to change. - -*/ +//! The Rust parser and macro expander. +//! +//! # Note +//! +//! This API is completely unstable and subject to change. #![crate_id = "syntax#0.11.0"] // NOTE: remove after stage0 #![crate_name = "syntax"] diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index 53489e3283766..55ad1b7712310 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -18,7 +18,7 @@ use parse::token::INTERPOLATED; use std::gc::{Gc, GC}; -// a parser that can parse attributes. +/// A parser that can parse attributes. pub trait ParserAttr { fn parse_outer_attributes(&mut self) -> Vec; fn parse_attribute(&mut self, permit_inner: bool) -> ast::Attribute; @@ -30,11 +30,11 @@ pub trait ParserAttr { } impl<'a> ParserAttr for Parser<'a> { - // Parse attributes that appear before an item + /// Parse attributes that appear before an item fn parse_outer_attributes(&mut self) -> Vec { let mut attrs: Vec = Vec::new(); loop { - debug!("parse_outer_attributes: self.token={:?}", + debug!("parse_outer_attributes: self.token={}", self.token); match self.token { token::POUND => { @@ -43,7 +43,7 @@ impl<'a> ParserAttr for Parser<'a> { token::DOC_COMMENT(s) => { let attr = ::attr::mk_sugared_doc_attr( attr::mk_attr_id(), - self.id_to_interned_str(s), + self.id_to_interned_str(s.ident()), self.span.lo, self.span.hi ); @@ -59,10 +59,10 @@ impl<'a> ParserAttr for Parser<'a> { return attrs; } - // matches attribute = # ! [ meta_item ] - // - // if permit_inner is true, then a leading `!` indicates an inner - // attribute + /// Matches `attribute = # ! [ meta_item ]` + /// + /// If permit_inner is true, then a leading `!` indicates an inner + /// attribute fn parse_attribute(&mut self, permit_inner: bool) -> ast::Attribute { debug!("parse_attributes: permit_inner={:?} self.token={:?}", permit_inner, self.token); @@ -114,17 +114,17 @@ impl<'a> ParserAttr for Parser<'a> { }; } - // Parse attributes that appear after the opening of an item. These should - // be preceded by an exclamation mark, but we accept and warn about one - // terminated by a semicolon. In addition to a vector of inner attributes, - // this function also returns a vector that may contain the first outer - // attribute of the next item (since we can't know whether the attribute - // is an inner attribute of the containing item or an outer attribute of - // the first contained item until we see the semi). - - // matches inner_attrs* outer_attr? - // you can make the 'next' field an Option, but the result is going to be - // more useful as a vector. + /// Parse attributes that appear after the opening of an item. These should + /// be preceded by an exclamation mark, but we accept and warn about one + /// terminated by a semicolon. In addition to a vector of inner attributes, + /// this function also returns a vector that may contain the first outer + /// attribute of the next item (since we can't know whether the attribute + /// is an inner attribute of the containing item or an outer attribute of + /// the first contained item until we see the semi). + + /// matches inner_attrs* outer_attr? + /// you can make the 'next' field an Option, but the result is going to be + /// more useful as a vector. fn parse_inner_attrs_and_next(&mut self) -> (Vec , Vec ) { let mut inner_attrs: Vec = Vec::new(); @@ -139,7 +139,7 @@ impl<'a> ParserAttr for Parser<'a> { let Span { lo, hi, .. } = self.span; self.bump(); attr::mk_sugared_doc_attr(attr::mk_attr_id(), - self.id_to_interned_str(s), + self.id_to_interned_str(s.ident()), lo, hi) } @@ -157,9 +157,9 @@ impl<'a> ParserAttr for Parser<'a> { (inner_attrs, next_outer_attrs) } - // matches meta_item = IDENT - // | IDENT = lit - // | IDENT meta_seq + /// matches meta_item = IDENT + /// | IDENT = lit + /// | IDENT meta_seq fn parse_meta_item(&mut self) -> Gc { match self.token { token::INTERPOLATED(token::NtMeta(e)) => { @@ -201,7 +201,7 @@ impl<'a> ParserAttr for Parser<'a> { } } - // matches meta_seq = ( COMMASEP(meta_item) ) + /// matches meta_seq = ( COMMASEP(meta_item) ) fn parse_meta_seq(&mut self) -> Vec> { self.parse_seq(&token::LPAREN, &token::RPAREN, diff --git a/src/libsyntax/parse/classify.rs b/src/libsyntax/parse/classify.rs index 8d9cc305c26e8..516f22cdf4d60 100644 --- a/src/libsyntax/parse/classify.rs +++ b/src/libsyntax/parse/classify.rs @@ -15,13 +15,13 @@ use ast; use std::gc::Gc; -// does this expression require a semicolon to be treated -// as a statement? The negation of this: 'can this expression -// be used as a statement without a semicolon' -- is used -// as an early-bail-out in the parser so that, for instance, -// 'if true {...} else {...} -// |x| 5 ' -// isn't parsed as (if true {...} else {...} | x) | 5 +/// Does this expression require a semicolon to be treated +/// as a statement? The negation of this: 'can this expression +/// be used as a statement without a semicolon' -- is used +/// as an early-bail-out in the parser so that, for instance, +/// if true {...} else {...} +/// |x| 5 +/// isn't parsed as (if true {...} else {...} | x) | 5 pub fn expr_requires_semi_to_be_stmt(e: Gc) -> bool { match e.node { ast::ExprIf(..) @@ -41,9 +41,9 @@ pub fn expr_is_simple_block(e: Gc) -> bool { } } -// this statement requires a semicolon after it. -// note that in one case (stmt_semi), we've already -// seen the semicolon, and thus don't need another. +/// this statement requires a semicolon after it. +/// note that in one case (stmt_semi), we've already +/// seen the semicolon, and thus don't need another. pub fn stmt_ends_with_semi(stmt: &ast::Stmt) -> bool { return match stmt.node { ast::StmtDecl(d, _) => { diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs index 3c3f0c7a82044..3842170d67777 100644 --- a/src/libsyntax/parse/common.rs +++ b/src/libsyntax/parse/common.rs @@ -12,8 +12,8 @@ use parse::token; -// SeqSep : a sequence separator (token) -// and whether a trailing separator is allowed. +/// SeqSep : a sequence separator (token) +/// and whether a trailing separator is allowed. pub struct SeqSep { pub sep: Option, pub trailing_sep_allowed: bool diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 73e5bb97f51d0..3f3a8a723f10c 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -13,7 +13,7 @@ use codemap::{BytePos, CharPos, CodeMap, Pos}; use diagnostic; use parse::lexer::{is_whitespace, Reader}; use parse::lexer::{StringReader, TokenAndSpan}; -use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment}; +use parse::lexer::is_block_doc_comment; use parse::lexer; use parse::token; @@ -24,10 +24,14 @@ use std::uint; #[deriving(Clone, PartialEq)] pub enum CommentStyle { - Isolated, // No code on either side of each line of the comment - Trailing, // Code exists to the left of the comment - Mixed, // Code before /* foo */ and after the comment - BlankLine, // Just a manual blank line "\n\n", for layout + /// No code on either side of each line of the comment + Isolated, + /// Code exists to the left of the comment + Trailing, + /// Code before /* foo */ and after the comment + Mixed, + /// Just a manual blank line "\n\n", for layout + BlankLine, } #[deriving(Clone)] @@ -38,9 +42,9 @@ pub struct Comment { } pub fn is_doc_comment(s: &str) -> bool { - (s.starts_with("///") && !is_line_non_doc_comment(s)) || + (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") || - (s.starts_with("/**") && !is_block_non_doc_comment(s)) || + (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!") } @@ -198,9 +202,9 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool, } } -// Returns None if the first col chars of s contain a non-whitespace char. -// Otherwise returns Some(k) where k is first char offset after that leading -// whitespace. Note k may be outside bounds of s. +/// Returns None if the first col chars of s contain a non-whitespace char. +/// Otherwise returns Some(k) where k is first char offset after that leading +/// whitespace. Note k may be outside bounds of s. fn all_whitespace(s: &str, col: CharPos) -> Option { let len = s.len(); let mut col = col.to_uint(); @@ -256,7 +260,7 @@ fn read_block_comment(rdr: &mut StringReader, rdr.bump(); rdr.bump(); } - if !is_block_non_doc_comment(curr_line.as_slice()) { + if is_block_doc_comment(curr_line.as_slice()) { return } assert!(!curr_line.as_slice().contains_char('\n')); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 1e72b2de20f73..0aaddacfab624 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -18,7 +18,6 @@ use parse::token::{str_to_ident}; use std::char; use std::mem::replace; -use std::num::from_str_radix; use std::rc::Rc; use std::str; @@ -44,13 +43,13 @@ pub struct TokenAndSpan { pub struct StringReader<'a> { pub span_diagnostic: &'a SpanHandler, - // The absolute offset within the codemap of the next character to read + /// The absolute offset within the codemap of the next character to read pub pos: BytePos, - // The absolute offset within the codemap of the last character read(curr) + /// The absolute offset within the codemap of the last character read(curr) pub last_pos: BytePos, - // The column of the next character to read + /// The column of the next character to read pub col: CharPos, - // The last character to be read + /// The last character to be read pub curr: Option, pub filemap: Rc, /* cached: */ @@ -60,7 +59,7 @@ pub struct StringReader<'a> { impl<'a> Reader for StringReader<'a> { fn is_eof(&self) -> bool { self.curr.is_none() } - // return the next token. EFFECT: advances the string_reader. + /// Return the next token. EFFECT: advances the string_reader. fn next_token(&mut self) -> TokenAndSpan { let ret_val = TokenAndSpan { tok: replace(&mut self.peek_tok, token::UNDERSCORE), @@ -90,7 +89,7 @@ impl<'a> Reader for TtReader<'a> { } fn next_token(&mut self) -> TokenAndSpan { let r = tt_next_token(self); - debug!("TtReader: r={:?}", r); + debug!("TtReader: r={}", r); r } fn fatal(&self, m: &str) -> ! { @@ -188,7 +187,7 @@ impl<'a> StringReader<'a> { /// Advance peek_tok and peek_span to refer to the next token, and /// possibly update the interner. fn advance_token(&mut self) { - match self.consume_whitespace_and_comments() { + match self.scan_whitespace_or_comment() { Some(comment) => { self.peek_span = comment.sp; self.peek_tok = comment.tok; @@ -217,6 +216,20 @@ impl<'a> StringReader<'a> { self.with_str_from_to(start, self.last_pos, f) } + /// Create a Name from a given offset to the current offset, each + /// adjusted 1 towards each other (assumes that on either side there is a + /// single-byte delimiter). + pub fn name_from(&self, start: BytePos) -> ast::Name { + debug!("taking an ident from {} to {}", start, self.last_pos); + self.with_str_from(start, token::intern) + } + + /// As name_from, with an explicit endpoint. + pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name { + debug!("taking an ident from {} to {}", start, end); + self.with_str_from_to(start, end, token::intern) + } + /// Calls `f` with a string slice of the source text spanning from `start` /// up to but excluding `end`. fn with_str_from_to(&self, start: BytePos, end: BytePos, f: |s: &str| -> T) -> T { @@ -326,8 +339,7 @@ impl<'a> StringReader<'a> { /// PRECONDITION: self.curr is not whitespace /// Eats any kind of comment. - /// Returns a Some(sugared-doc-attr) if one exists, None otherwise - fn consume_any_line_comment(&mut self) -> Option { + fn scan_comment(&mut self) -> Option { match self.curr { Some(c) => { if c.is_whitespace() { @@ -362,28 +374,32 @@ impl<'a> StringReader<'a> { } self.bump(); } - let ret = self.with_str_from(start_bpos, |string| { + return self.with_str_from(start_bpos, |string| { // but comments with only more "/"s are not - if !is_line_non_doc_comment(string) { - Some(TokenAndSpan{ - tok: token::DOC_COMMENT(str_to_ident(string)), - sp: codemap::mk_sp(start_bpos, self.last_pos) - }) + let tok = if is_doc_comment(string) { + token::DOC_COMMENT(token::intern(string)) } else { - None - } - }); + token::COMMENT + }; - if ret.is_some() { - return ret; - } + return Some(TokenAndSpan{ + tok: tok, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); + }); } else { + let start_bpos = self.last_pos - BytePos(2); while !self.curr_is('\n') && !self.is_eof() { self.bump(); } + return Some(TokenAndSpan { + tok: token::COMMENT, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); } - // Restart whitespace munch. - self.consume_whitespace_and_comments() } - Some('*') => { self.bump(); self.bump(); self.consume_block_comment() } + Some('*') => { + self.bump(); self.bump(); + self.scan_block_comment() + } _ => None } } else if self.curr_is('#') { @@ -399,9 +415,15 @@ impl<'a> StringReader<'a> { let cmap = CodeMap::new(); cmap.files.borrow_mut().push(self.filemap.clone()); let loc = cmap.lookup_char_pos_adj(self.last_pos); + debug!("Skipping a shebang"); if loc.line == 1u && loc.col == CharPos(0u) { + // FIXME: Add shebang "token", return it + let start = self.last_pos; while !self.curr_is('\n') && !self.is_eof() { self.bump(); } - return self.consume_whitespace_and_comments(); + return Some(TokenAndSpan { + tok: token::SHEBANG(self.name_from(start)), + sp: codemap::mk_sp(start, self.last_pos) + }); } } None @@ -410,15 +432,33 @@ impl<'a> StringReader<'a> { } } - /// EFFECT: eats whitespace and comments. - /// Returns a Some(sugared-doc-attr) if one exists, None otherwise. - fn consume_whitespace_and_comments(&mut self) -> Option { - while is_whitespace(self.curr) { self.bump(); } - return self.consume_any_line_comment(); + /// If there is whitespace, shebang, or a comment, scan it. Otherwise, + /// return None. + fn scan_whitespace_or_comment(&mut self) -> Option { + match self.curr.unwrap_or('\0') { + // # to handle shebang at start of file -- this is the entry point + // for skipping over all "junk" + '/' | '#' => { + let c = self.scan_comment(); + debug!("scanning a comment {}", c); + c + }, + c if is_whitespace(Some(c)) => { + let start_bpos = self.last_pos; + while is_whitespace(self.curr) { self.bump(); } + let c = Some(TokenAndSpan { + tok: token::WS, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); + debug!("scanning whitespace: {}", c); + c + }, + _ => None + } } - // might return a sugared-doc-attr - fn consume_block_comment(&mut self) -> Option { + /// Might return a sugared-doc-attr + fn scan_block_comment(&mut self) -> Option { // block comments starting with "/**" or "/*!" are doc-comments let is_doc_comment = self.curr_is('*') || self.curr_is('!'); let start_bpos = self.last_pos - BytePos(2); @@ -453,228 +493,132 @@ impl<'a> StringReader<'a> { self.bump(); } - let res = if is_doc_comment { - self.with_str_from(start_bpos, |string| { - // but comments with only "*"s between two "/"s are not - if !is_block_non_doc_comment(string) { - let string = if has_cr { - self.translate_crlf(start_bpos, string, - "bare CR not allowed in block doc-comment") - } else { string.into_maybe_owned() }; - Some(TokenAndSpan{ - tok: token::DOC_COMMENT(str_to_ident(string.as_slice())), - sp: codemap::mk_sp(start_bpos, self.last_pos) - }) - } else { - None - } - }) - } else { - None - }; - - // restart whitespace munch. - if res.is_some() { res } else { self.consume_whitespace_and_comments() } - } - - fn scan_exponent(&mut self, start_bpos: BytePos) -> Option { - // \x00 hits the `return None` case immediately, so this is fine. - let mut c = self.curr.unwrap_or('\x00'); - let mut rslt = String::new(); - if c == 'e' || c == 'E' { - rslt.push_char(c); - self.bump(); - c = self.curr.unwrap_or('\x00'); - if c == '-' || c == '+' { - rslt.push_char(c); - self.bump(); - } - let exponent = self.scan_digits(10u); - if exponent.len() > 0u { - rslt.push_str(exponent.as_slice()); - return Some(rslt); + self.with_str_from(start_bpos, |string| { + // but comments with only "*"s between two "/"s are not + let tok = if is_block_doc_comment(string) { + let string = if has_cr { + self.translate_crlf(start_bpos, string, + "bare CR not allowed in block doc-comment") + } else { string.into_maybe_owned() }; + token::DOC_COMMENT(token::intern(string.as_slice())) } else { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "scan_exponent: bad fp literal"); - rslt.push_str("1"); // arbitrary placeholder exponent - return Some(rslt); - } - } else { - return None::; - } + token::COMMENT + }; + + Some(TokenAndSpan{ + tok: tok, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }) + }) } - fn scan_digits(&mut self, radix: uint) -> String { - let mut rslt = String::new(); + /// Scan through any digits (base `radix`) or underscores, and return how + /// many digits there were. + fn scan_digits(&mut self, radix: uint) -> uint { + let mut len = 0u; loop { let c = self.curr; - if c == Some('_') { self.bump(); continue; } + if c == Some('_') { debug!("skipping a _"); self.bump(); continue; } match c.and_then(|cc| char::to_digit(cc, radix)) { - Some(_) => { - rslt.push_char(c.unwrap()); - self.bump(); - } - _ => return rslt + Some(_) => { + debug!("{} in scan_digits", c); + len += 1; + self.bump(); + } + _ => return len } }; } - fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: uint) { - match base { - 16u => self.err_span_(start_bpos, last_bpos, - "hexadecimal float literal is not supported"), - 8u => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"), - 2u => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"), - _ => () - } - } - + /// Lex a LIT_INTEGER or a LIT_FLOAT fn scan_number(&mut self, c: char) -> token::Token { - let mut num_str; - let mut base = 10u; - let mut c = c; - let mut n = self.nextch().unwrap_or('\x00'); + let mut num_digits; + let mut base = 10; let start_bpos = self.last_pos; - if c == '0' && n == 'x' { - self.bump(); - self.bump(); - base = 16u; - } else if c == '0' && n == 'o' { - self.bump(); - self.bump(); - base = 8u; - } else if c == '0' && n == 'b' { - self.bump(); - self.bump(); - base = 2u; - } - num_str = self.scan_digits(base); - c = self.curr.unwrap_or('\x00'); - self.nextch(); - if c == 'u' || c == 'i' { - enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) } - let signed = c == 'i'; - let mut tp = { - if signed { Signed(ast::TyI) } - else { Unsigned(ast::TyU) } - }; - self.bump(); - c = self.curr.unwrap_or('\x00'); - if c == '8' { - self.bump(); - tp = if signed { Signed(ast::TyI8) } - else { Unsigned(ast::TyU8) }; - } - n = self.nextch().unwrap_or('\x00'); - if c == '1' && n == '6' { - self.bump(); - self.bump(); - tp = if signed { Signed(ast::TyI16) } - else { Unsigned(ast::TyU16) }; - } else if c == '3' && n == '2' { - self.bump(); - self.bump(); - tp = if signed { Signed(ast::TyI32) } - else { Unsigned(ast::TyU32) }; - } else if c == '6' && n == '4' { - self.bump(); - self.bump(); - tp = if signed { Signed(ast::TyI64) } - else { Unsigned(ast::TyU64) }; - } - if num_str.len() == 0u { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "no valid digits found for number"); - num_str = "1".to_string(); - } - let parsed = match from_str_radix::(num_str.as_slice(), - base as uint) { - Some(p) => p, - None => { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "int literal is too large"); - 1 - } - }; - match tp { - Signed(t) => return token::LIT_INT(parsed as i64, t), - Unsigned(t) => return token::LIT_UINT(parsed, t) + self.bump(); + + if c == '0' { + match self.curr.unwrap_or('\0') { + 'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2); } + 'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8); } + 'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16); } + '0'..'9' | '_' | '.' => { + num_digits = self.scan_digits(10) + 1; + } + 'u' | 'i' => { + self.scan_int_suffix(); + return token::LIT_INTEGER(self.name_from(start_bpos)); + }, + 'f' => { + let last_pos = self.last_pos; + self.scan_float_suffix(); + self.check_float_base(start_bpos, last_pos, base); + return token::LIT_FLOAT(self.name_from(start_bpos)); + } + _ => { + // just a 0 + return token::LIT_INTEGER(self.name_from(start_bpos)); + } } + } else if c.is_digit_radix(10) { + num_digits = self.scan_digits(10) + 1; + } else { + num_digits = 0; } - let mut is_float = false; - if self.curr_is('.') && !(ident_start(self.nextch()) || self.nextch_is('.')) { - is_float = true; - self.bump(); - let dec_part = self.scan_digits(10u); - num_str.push_char('.'); - num_str.push_str(dec_part.as_slice()); - } - match self.scan_exponent(start_bpos) { - Some(ref s) => { - is_float = true; - num_str.push_str(s.as_slice()); - } - None => () + + if num_digits == 0 { + self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); + // eat any suffix + self.scan_int_suffix(); + return token::LIT_INTEGER(token::intern("0")); } - if self.curr_is('f') { + // might be a float, but don't be greedy if this is actually an + // integer literal followed by field/method access or a range pattern + // (`0..2` and `12.foo()`) + if self.curr_is('.') && !self.nextch_is('.') && !self.nextch().unwrap_or('\0') + .is_XID_start() { + // might have stuff after the ., and if it does, it needs to start + // with a number self.bump(); - c = self.curr.unwrap_or('\x00'); - n = self.nextch().unwrap_or('\x00'); - if c == '3' && n == '2' { - self.bump(); - self.bump(); - let last_bpos = self.last_pos; - self.check_float_base(start_bpos, last_bpos, base); - return token::LIT_FLOAT(str_to_ident(num_str.as_slice()), - ast::TyF32); - } else if c == '6' && n == '4' { - self.bump(); - self.bump(); - let last_bpos = self.last_pos; - self.check_float_base(start_bpos, last_bpos, base); - return token::LIT_FLOAT(str_to_ident(num_str.as_slice()), - ast::TyF64); - /* FIXME (#2252): if this is out of range for either a - 32-bit or 64-bit float, it won't be noticed till the - back-end. */ + if self.curr.unwrap_or('\0').is_digit_radix(10) { + self.scan_digits(10); + self.scan_float_exponent(); + self.scan_float_suffix(); } - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "expected `f32` or `f64` suffix"); - } - if is_float { - let last_bpos = self.last_pos; - self.check_float_base(start_bpos, last_bpos, base); - return token::LIT_FLOAT_UNSUFFIXED(str_to_ident( - num_str.as_slice())); + let last_pos = self.last_pos; + self.check_float_base(start_bpos, last_pos, base); + return token::LIT_FLOAT(self.name_from(start_bpos)); + } else if self.curr_is('f') { + // or it might be an integer literal suffixed as a float + self.scan_float_suffix(); + let last_pos = self.last_pos; + self.check_float_base(start_bpos, last_pos, base); + return token::LIT_FLOAT(self.name_from(start_bpos)); } else { - if num_str.len() == 0u { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "no valid digits found for number"); - num_str = "1".to_string(); + // it might be a float if it has an exponent + if self.curr_is('e') || self.curr_is('E') { + self.scan_float_exponent(); + self.scan_float_suffix(); + let last_pos = self.last_pos; + self.check_float_base(start_bpos, last_pos, base); + return token::LIT_FLOAT(self.name_from(start_bpos)); } - let parsed = match from_str_radix::(num_str.as_slice(), - base as uint) { - Some(p) => p, - None => { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "int literal is too large"); - 1 - } - }; - - debug!("lexing {} as an unsuffixed integer literal", - num_str.as_slice()); - return token::LIT_INT_UNSUFFIXED(parsed as i64); + // but we certainly have an integer! + self.scan_int_suffix(); + return token::LIT_INTEGER(self.name_from(start_bpos)); } } - - fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char { - let mut accum_int = 0u32; + /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an + /// error if too many or too few digits are encountered. + fn scan_hex_digits(&mut self, n_digits: uint, delim: char) -> bool { + debug!("scanning {} digits until {}", n_digits, delim); let start_bpos = self.last_pos; - for _ in range(0, n_hex_digits) { + let mut accum_int = 0; + + for _ in range(0, n_digits) { if self.is_eof() { let last_bpos = self.last_pos; self.fatal_span_(start_bpos, last_bpos, "unterminated numeric character escape"); @@ -695,11 +639,11 @@ impl<'a> StringReader<'a> { } match char::from_u32(accum_int) { - Some(x) => x, + Some(_) => true, None => { let last_bpos = self.last_pos; self.err_span_(start_bpos, last_bpos, "illegal numeric character escape"); - '?' + false } } } @@ -707,8 +651,10 @@ impl<'a> StringReader<'a> { /// Scan for a single (possibly escaped) byte or char /// in a byte, (non-raw) byte string, char, or (non-raw) string literal. /// `start` is the position of `first_source_char`, which is already consumed. + /// + /// Returns true if there was a valid char/byte, false otherwise. fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char, - ascii_only: bool, delim: char) -> Option { + ascii_only: bool, delim: char) -> bool { match first_source_char { '\\' => { // '\X' for some X must be a character constant: @@ -718,24 +664,18 @@ impl<'a> StringReader<'a> { match escaped { None => {}, // EOF here is an error that will be checked later. Some(e) => { - return Some(match e { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '"' => '"', - '0' => '\x00', - 'x' => self.scan_numeric_escape(2u, delim), - 'u' if !ascii_only => self.scan_numeric_escape(4u, delim), - 'U' if !ascii_only => self.scan_numeric_escape(8u, delim), + return match e { + 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, + 'x' => self.scan_hex_digits(2u, delim), + 'u' if !ascii_only => self.scan_hex_digits(4u, delim), + 'U' if !ascii_only => self.scan_hex_digits(8u, delim), '\n' if delim == '"' => { self.consume_whitespace(); - return None + true }, '\r' if delim == '"' && self.curr_is('\n') => { self.consume_whitespace(); - return None + true } c => { let last_pos = self.last_pos; @@ -744,9 +684,9 @@ impl<'a> StringReader<'a> { if ascii_only { "unknown byte escape" } else { "unknown character escape" }, c); - c + false } - }) + } } } } @@ -757,14 +697,16 @@ impl<'a> StringReader<'a> { if ascii_only { "byte constant must be escaped" } else { "character constant must be escaped" }, first_source_char); + return false; } '\r' => { if self.curr_is('\n') { self.bump(); - return Some('\n'); + return true; } else { self.err_span_(start, self.last_pos, "bare CR not allowed in string, use \\r instead"); + return false; } } _ => if ascii_only && first_source_char > '\x7F' { @@ -773,9 +715,84 @@ impl<'a> StringReader<'a> { start, last_pos, "byte constant must be ASCII. \ Use a \\xHH escape for a non-ASCII byte", first_source_char); + return false; + } + } + true + } + + /// Scan over an int literal suffix. + fn scan_int_suffix(&mut self) { + match self.curr { + Some('i') | Some('u') => { + self.bump(); + + if self.curr_is('8') { + self.bump(); + } else if self.curr_is('1') { + if !self.nextch_is('6') { + self.err_span_(self.last_pos, self.pos, + "illegal int suffix"); + } else { + self.bump(); self.bump(); + } + } else if self.curr_is('3') { + if !self.nextch_is('2') { + self.err_span_(self.last_pos, self.pos, + "illegal int suffix"); + } else { + self.bump(); self.bump(); + } + } else if self.curr_is('6') { + if !self.nextch_is('4') { + self.err_span_(self.last_pos, self.pos, + "illegal int suffix"); + } else { + self.bump(); self.bump(); + } + } + }, + _ => { } + } + } + + /// Scan over a float literal suffix + fn scan_float_suffix(&mut self) { + if self.curr_is('f') { + if (self.nextch_is('3') && self.nextnextch_is('2')) + || (self.nextch_is('6') && self.nextnextch_is('4')) { + self.bump(); + self.bump(); + self.bump(); + } else { + self.err_span_(self.last_pos, self.pos, "illegal float suffix"); } } - Some(first_source_char) + } + + /// Scan over a float exponent. + fn scan_float_exponent(&mut self) { + if self.curr_is('e') || self.curr_is('E') { + self.bump(); + if self.curr_is('-') || self.curr_is('+') { + self.bump(); + } + if self.scan_digits(10) == 0 { + self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent") + } + } + } + + /// Check that a base is valid for a floating literal, emitting a nice + /// error if it isn't. + fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: uint) { + match base { + 16u => self.err_span_(start_bpos, last_bpos, "hexadecimal float literal is not \ + supported"), + 8u => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"), + 2u => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"), + _ => () + } } fn binop(&mut self, op: token::BinOp) -> token::Token { @@ -910,7 +927,7 @@ impl<'a> StringReader<'a> { let start = self.last_pos; // the eof will be picked up by the final `'` check below - let mut c2 = self.curr.unwrap_or('\x00'); + let c2 = self.curr.unwrap_or('\x00'); self.bump(); // If the character is an ident start not followed by another single @@ -953,7 +970,7 @@ impl<'a> StringReader<'a> { } // Otherwise it is a character constant: - c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap(); + let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\''); if !self.curr_is('\'') { let last_bpos = self.last_pos; self.fatal_span_verbose( @@ -963,118 +980,23 @@ impl<'a> StringReader<'a> { start - BytePos(1), last_bpos, "unterminated character constant".to_string()); } + let id = if valid { self.name_from(start) } else { token::intern("0") }; self.bump(); // advance curr past token - return token::LIT_CHAR(c2); + return token::LIT_CHAR(id); } 'b' => { self.bump(); return match self.curr { - Some('\'') => parse_byte(self), - Some('"') => parse_byte_string(self), - Some('r') => parse_raw_byte_string(self), + Some('\'') => self.scan_byte(), + Some('"') => self.scan_byte_string(), + Some('r') => self.scan_raw_byte_string(), _ => unreachable!() // Should have been a token::IDENT above. }; - fn parse_byte(self_: &mut StringReader) -> token::Token { - self_.bump(); - let start = self_.last_pos; - - // the eof will be picked up by the final `'` check below - let mut c2 = self_.curr.unwrap_or('\x00'); - self_.bump(); - - c2 = self_.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap(); - if !self_.curr_is('\'') { - // Byte offsetting here is okay because the - // character before position `start` are an - // ascii single quote and ascii 'b'. - let last_pos = self_.last_pos; - self_.fatal_span_verbose( - start - BytePos(2), last_pos, - "unterminated byte constant".to_string()); - } - self_.bump(); // advance curr past token - return token::LIT_BYTE(c2 as u8); - } - - fn parse_byte_string(self_: &mut StringReader) -> token::Token { - self_.bump(); - let start = self_.last_pos; - let mut value = Vec::new(); - while !self_.curr_is('"') { - if self_.is_eof() { - let last_pos = self_.last_pos; - self_.fatal_span_(start, last_pos, - "unterminated double quote byte string"); - } - - let ch_start = self_.last_pos; - let ch = self_.curr.unwrap(); - self_.bump(); - self_.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"') - .map(|ch| value.push(ch as u8)); - } - self_.bump(); - return token::LIT_BINARY(Rc::new(value)); - } - - fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token { - let start_bpos = self_.last_pos; - self_.bump(); - let mut hash_count = 0u; - while self_.curr_is('#') { - self_.bump(); - hash_count += 1; - } - - if self_.is_eof() { - let last_pos = self_.last_pos; - self_.fatal_span_(start_bpos, last_pos, "unterminated raw string"); - } else if !self_.curr_is('"') { - let last_pos = self_.last_pos; - let ch = self_.curr.unwrap(); - self_.fatal_span_char(start_bpos, last_pos, - "only `#` is allowed in raw string delimitation; \ - found illegal character", - ch); - } - self_.bump(); - let content_start_bpos = self_.last_pos; - let mut content_end_bpos; - 'outer: loop { - match self_.curr { - None => { - let last_pos = self_.last_pos; - self_.fatal_span_(start_bpos, last_pos, "unterminated raw string") - }, - Some('"') => { - content_end_bpos = self_.last_pos; - for _ in range(0, hash_count) { - self_.bump(); - if !self_.curr_is('#') { - continue 'outer; - } - } - break; - }, - Some(c) => if c > '\x7F' { - let last_pos = self_.last_pos; - self_.err_span_char( - last_pos, last_pos, "raw byte string must be ASCII", c); - } - } - self_.bump(); - } - self_.bump(); - let bytes = self_.with_str_from_to(content_start_bpos, - content_end_bpos, - |s| s.as_bytes().to_owned()); - return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count); - } } '"' => { - let mut accum_str = String::new(); let start_bpos = self.last_pos; + let mut valid = true; self.bump(); while !self.curr_is('"') { if self.is_eof() { @@ -1085,11 +1007,13 @@ impl<'a> StringReader<'a> { let ch_start = self.last_pos; let ch = self.curr.unwrap(); self.bump(); - self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"') - .map(|ch| accum_str.push_char(ch)); + valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"'); } + // adjust for the ACSII " at the start of the literal + let id = if valid { self.name_from(start_bpos + BytePos(1)) } + else { token::intern("??") }; self.bump(); - return token::LIT_STR(str_to_ident(accum_str.as_slice())); + return token::LIT_STR(id); } 'r' => { let start_bpos = self.last_pos; @@ -1114,7 +1038,7 @@ impl<'a> StringReader<'a> { self.bump(); let content_start_bpos = self.last_pos; let mut content_end_bpos; - let mut has_cr = false; + let mut valid = true; 'outer: loop { if self.is_eof() { let last_bpos = self.last_pos; @@ -1137,23 +1061,26 @@ impl<'a> StringReader<'a> { } } break; - } + }, '\r' => { - has_cr = true; + if !self.nextch_is('\n') { + let last_bpos = self.last_pos; + self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \ + string, use \\r instead"); + valid = false; + } } _ => () } self.bump(); } self.bump(); - let str_content = self.with_str_from_to(content_start_bpos, content_end_bpos, |string| { - let string = if has_cr { - self.translate_crlf(content_start_bpos, string, - "bare CR not allowed in raw string") - } else { string.into_maybe_owned() }; - str_to_ident(string.as_slice()) - }); - return token::LIT_STR_RAW(str_content, hash_count); + let id = if valid { + self.name_from_to(content_start_bpos, content_end_bpos) + } else { + token::intern("??") + }; + return token::LIT_STR_RAW(id, hash_count); } '-' => { if self.nextch_is('>') { @@ -1221,6 +1148,104 @@ impl<'a> StringReader<'a> { // consider shebangs comments, but not inner attributes || (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) } + + fn scan_byte(&mut self) -> token::Token { + self.bump(); + let start = self.last_pos; + + // the eof will be picked up by the final `'` check below + let c2 = self.curr.unwrap_or('\x00'); + self.bump(); + + let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\''); + if !self.curr_is('\'') { + // Byte offsetting here is okay because the + // character before position `start` are an + // ascii single quote and ascii 'b'. + let last_pos = self.last_pos; + self.fatal_span_verbose( + start - BytePos(2), last_pos, + "unterminated byte constant".to_string()); + } + + let id = if valid { self.name_from(start) } else { token::intern("??") }; + self.bump(); // advance curr past token + return token::LIT_BYTE(id); + } + + fn scan_byte_string(&mut self) -> token::Token { + self.bump(); + let start = self.last_pos; + let mut valid = true; + + while !self.curr_is('"') { + if self.is_eof() { + let last_pos = self.last_pos; + self.fatal_span_(start, last_pos, + "unterminated double quote byte string"); + } + + let ch_start = self.last_pos; + let ch = self.curr.unwrap(); + self.bump(); + valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"'); + } + let id = if valid { self.name_from(start) } else { token::intern("??") }; + self.bump(); + return token::LIT_BINARY(id); + } + + fn scan_raw_byte_string(&mut self) -> token::Token { + let start_bpos = self.last_pos; + self.bump(); + let mut hash_count = 0u; + while self.curr_is('#') { + self.bump(); + hash_count += 1; + } + + if self.is_eof() { + let last_pos = self.last_pos; + self.fatal_span_(start_bpos, last_pos, "unterminated raw string"); + } else if !self.curr_is('"') { + let last_pos = self.last_pos; + let ch = self.curr.unwrap(); + self.fatal_span_char(start_bpos, last_pos, + "only `#` is allowed in raw string delimitation; \ + found illegal character", + ch); + } + self.bump(); + let content_start_bpos = self.last_pos; + let mut content_end_bpos; + 'outer: loop { + match self.curr { + None => { + let last_pos = self.last_pos; + self.fatal_span_(start_bpos, last_pos, "unterminated raw string") + }, + Some('"') => { + content_end_bpos = self.last_pos; + for _ in range(0, hash_count) { + self.bump(); + if !self.curr_is('#') { + continue 'outer; + } + } + break; + }, + Some(c) => if c > '\x7F' { + let last_pos = self.last_pos; + self.err_span_char( + last_pos, last_pos, "raw byte string must be ASCII", c); + } + } + self.bump(); + } + self.bump(); + return token::LIT_BINARY_RAW(self.name_from_to(content_start_bpos, content_end_bpos), + hash_count); + } } pub fn is_whitespace(c: Option) -> bool { @@ -1239,12 +1264,18 @@ fn in_range(c: Option, lo: char, hi: char) -> bool { fn is_dec_digit(c: Option) -> bool { return in_range(c, '0', '9'); } -pub fn is_line_non_doc_comment(s: &str) -> bool { - s.starts_with("////") +pub fn is_doc_comment(s: &str) -> bool { + let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') + || s.starts_with("//!"); + debug!("is `{}` a doc comment? {}", s, res); + res } -pub fn is_block_non_doc_comment(s: &str) -> bool { - s.starts_with("/***") +pub fn is_block_doc_comment(s: &str) -> bool { + let res = (s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') + || s.starts_with("/*!"); + debug!("is `{}` a doc comment? {}", s, res); + res } fn ident_start(c: Option) -> bool { @@ -1295,11 +1326,14 @@ mod test { "/* my source file */ \ fn main() { println!(\"zebra\"); }\n".to_string()); let id = str_to_ident("fn"); + assert_eq!(string_reader.next_token().tok, token::COMMENT); + assert_eq!(string_reader.next_token().tok, token::WS); let tok1 = string_reader.next_token(); let tok2 = TokenAndSpan{ tok:token::IDENT(id, false), sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}}; assert_eq!(tok1,tok2); + assert_eq!(string_reader.next_token().tok, token::WS); // the 'main' id is already read: assert_eq!(string_reader.last_pos.clone(), BytePos(28)); // read another token: @@ -1328,6 +1362,7 @@ mod test { #[test] fn doublecolonparsing () { check_tokenization(setup(&mk_sh(), "a b".to_string()), vec!(mk_ident("a",false), + token::WS, mk_ident("b",false))); } @@ -1341,6 +1376,7 @@ mod test { #[test] fn dcparsing_3 () { check_tokenization(setup(&mk_sh(), "a ::b".to_string()), vec!(mk_ident("a",false), + token::WS, token::MOD_SEP, mk_ident("b",false))); } @@ -1349,22 +1385,23 @@ mod test { check_tokenization(setup(&mk_sh(), "a:: b".to_string()), vec!(mk_ident("a",true), token::MOD_SEP, + token::WS, mk_ident("b",false))); } #[test] fn character_a() { assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok, - token::LIT_CHAR('a')); + token::LIT_CHAR(token::intern("a"))); } #[test] fn character_space() { assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok, - token::LIT_CHAR(' ')); + token::LIT_CHAR(token::intern(" "))); } #[test] fn character_escaped() { assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok, - token::LIT_CHAR('\n')); + token::LIT_CHAR(token::intern("\\n"))); } #[test] fn lifetime_name() { @@ -1376,19 +1413,23 @@ mod test { assert_eq!(setup(&mk_sh(), "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token() .tok, - token::LIT_STR_RAW(token::str_to_ident("\"#a\\b\x00c\""), 3)); + token::LIT_STR_RAW(token::intern("\"#a\\b\x00c\""), 3)); } #[test] fn line_doc_comments() { - assert!(!is_line_non_doc_comment("///")); - assert!(!is_line_non_doc_comment("/// blah")); - assert!(is_line_non_doc_comment("////")); + assert!(is_doc_comment("///")); + assert!(is_doc_comment("/// blah")); + assert!(!is_doc_comment("////")); } #[test] fn nested_block_comments() { - assert_eq!(setup(&mk_sh(), - "/* /* */ */'a'".to_string()).next_token().tok, - token::LIT_CHAR('a')); + let sh = mk_sh(); + let mut lexer = setup(&sh, "/* /* */ */'a'".to_string()); + match lexer.next_token().tok { + token::COMMENT => { }, + _ => fail!("expected a comment!") + } + assert_eq!(lexer.next_token().tok, token::LIT_CHAR(token::intern("a"))); } } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index fb4a23cf326ec..37c84c95af654 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -10,7 +10,6 @@ //! The main parser interface - use ast; use codemap::{Span, CodeMap, FileMap}; use diagnostic::{SpanHandler, mk_span_handler, default_handler, Auto}; @@ -32,7 +31,7 @@ pub mod common; pub mod classify; pub mod obsolete; -// info about a parsing session. +/// Info about a parsing session. pub struct ParseSess { pub span_diagnostic: SpanHandler, // better be the same as the one in the reader! /// Used to determine and report recursive mod inclusions @@ -241,14 +240,14 @@ pub fn file_to_filemap(sess: &ParseSess, path: &Path, spanopt: Option) unreachable!() } -// given a session and a string, add the string to -// the session's codemap and return the new filemap +/// Given a session and a string, add the string to +/// the session's codemap and return the new filemap pub fn string_to_filemap(sess: &ParseSess, source: String, path: String) -> Rc { sess.span_diagnostic.cm.new_filemap(path, source) } -// given a filemap, produce a sequence of token-trees +/// Given a filemap, produce a sequence of token-trees pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) -> Vec { // it appears to me that the cfg doesn't matter here... indeed, @@ -259,7 +258,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) p1.parse_all_token_trees() } -// given tts and cfg, produce a parser +/// Given tts and cfg, produce a parser pub fn tts_to_parser<'a>(sess: &'a ParseSess, tts: Vec, cfg: ast::CrateConfig) -> Parser<'a> { @@ -267,13 +266,354 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess, Parser::new(sess, cfg, box trdr) } -// abort if necessary +/// Abort if necessary pub fn maybe_aborted(result: T, mut p: Parser) -> T { p.abort_if_errors(); result } +/// Parse a string representing a character literal into its final form. +/// Rather than just accepting/rejecting a given literal, unescapes it as +/// well. Can take any slice prefixed by a character escape. Returns the +/// character and the number of characters consumed. +pub fn char_lit(lit: &str) -> (char, int) { + use std::{num, char}; + + let mut chars = lit.chars(); + let c = match (chars.next(), chars.next()) { + (Some(c), None) if c != '\\' => return (c, 1), + (Some('\\'), Some(c)) => match c { + '"' => Some('"'), + 'n' => Some('\n'), + 'r' => Some('\r'), + 't' => Some('\t'), + '\\' => Some('\\'), + '\'' => Some('\''), + '0' => Some('\0'), + _ => { None } + }, + _ => fail!("lexer accepted invalid char escape `{}`", lit) + }; + + match c { + Some(x) => return (x, 2), + None => { } + } + + let msg = format!("lexer should have rejected a bad character escape {}", lit); + let msg2 = msg.as_slice(); + + let esc: |uint| -> Option<(char, int)> = |len| + num::from_str_radix(lit.slice(2, len), 16) + .and_then(char::from_u32) + .map(|x| (x, len as int)); + + // Unicode escapes + return match lit.as_bytes()[1] as char { + 'x' | 'X' => esc(4), + 'u' => esc(6), + 'U' => esc(10), + _ => None, + }.expect(msg2); +} + +/// Parse a string representing a string literal into its final form. Does +/// unescaping. +pub fn str_lit(lit: &str) -> String { + debug!("parse_str_lit: given {}", lit.escape_default()); + let mut res = String::with_capacity(lit.len()); + + // FIXME #8372: This could be a for-loop if it didn't borrow the iterator + let error = |i| format!("lexer should have rejected {} at {}", lit, i); + + /// Eat everything up to a non-whitespace + fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) { + loop { + match it.peek().map(|x| x.val1()) { + Some(' ') | Some('\n') | Some('\r') | Some('\t') => { + it.next(); + }, + _ => { break; } + } + } + } + + let mut chars = lit.char_indices().peekable(); + loop { + match chars.next() { + Some((i, c)) => { + let em = error(i); + match c { + '\\' => { + if chars.peek().expect(em.as_slice()).val1() == '\n' { + eat(&mut chars); + } else if chars.peek().expect(em.as_slice()).val1() == '\r' { + chars.next(); + if chars.peek().expect(em.as_slice()).val1() != '\n' { + fail!("lexer accepted bare CR"); + } + eat(&mut chars); + } else { + // otherwise, a normal escape + let (c, n) = char_lit(lit.slice_from(i)); + for _ in range(0, n - 1) { // we don't need to move past the first \ + chars.next(); + } + res.push_char(c); + } + }, + '\r' => { + if chars.peek().expect(em.as_slice()).val1() != '\n' { + fail!("lexer accepted bare CR"); + } + chars.next(); + res.push_char('\n'); + } + c => res.push_char(c), + } + }, + None => break + } + } + + res.shrink_to_fit(); // probably not going to do anything, unless there was an escape. + debug!("parse_str_lit: returning {}", res); + res +} + +/// Parse a string representing a raw string literal into its final form. The +/// only operation this does is convert embedded CRLF into a single LF. +pub fn raw_str_lit(lit: &str) -> String { + debug!("raw_str_lit: given {}", lit.escape_default()); + let mut res = String::with_capacity(lit.len()); + + // FIXME #8372: This could be a for-loop if it didn't borrow the iterator + let mut chars = lit.chars().peekable(); + loop { + match chars.next() { + Some(c) => { + if c == '\r' { + if *chars.peek().unwrap() != '\n' { + fail!("lexer accepted bare CR"); + } + chars.next(); + res.push_char('\n'); + } else { + res.push_char(c); + } + }, + None => break + } + } + + res.shrink_to_fit(); + res +} + +pub fn float_lit(s: &str) -> ast::Lit_ { + debug!("float_lit: {}", s); + // FIXME #2252: bounds checking float literals is defered until trans + let s2 = s.chars().filter(|&c| c != '_').collect::(); + let s = s2.as_slice(); + + let mut ty = None; + + if s.ends_with("f32") { + ty = Some(ast::TyF32); + } else if s.ends_with("f64") { + ty = Some(ast::TyF64); + } + + + match ty { + Some(t) => { + ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t) + }, + None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s)) + } +} + +/// Parse a string representing a byte literal into its final form. Similar to `char_lit` +pub fn byte_lit(lit: &str) -> (u8, uint) { + let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i); + + if lit.len() == 1 { + (lit.as_bytes()[0], 1) + } else { + assert!(lit.as_bytes()[0] == b'\\', err(0i)); + let b = match lit.as_bytes()[1] { + b'"' => b'"', + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + b'\\' => b'\\', + b'\'' => b'\'', + b'0' => b'\0', + _ => { + match ::std::num::from_str_radix::(lit.slice(2, 4), 16) { + Some(c) => + if c > 0xFF { + fail!(err(2)) + } else { + return (c as u8, 4) + }, + None => fail!(err(3)) + } + } + }; + return (b, 2); + } +} + +pub fn binary_lit(lit: &str) -> Rc> { + let mut res = Vec::with_capacity(lit.len()); + + // FIXME #8372: This could be a for-loop if it didn't borrow the iterator + let error = |i| format!("lexer should have rejected {} at {}", lit, i); + + // binary literals *must* be ASCII, but the escapes don't have to be + let mut chars = lit.as_bytes().iter().enumerate().peekable(); + loop { + match chars.next() { + Some((i, &c)) => { + if c == b'\\' { + if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' { + loop { + // eat everything up to a non-whitespace + match chars.peek().map(|x| *x.val1()) { + Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => { + chars.next(); + }, + _ => { break; } + } + } + } else { + // otherwise, a normal escape + let (c, n) = byte_lit(lit.slice_from(i)); + for _ in range(0, n - 1) { // we don't need to move past the first \ + chars.next(); + } + res.push(c); + } + } else { + res.push(c); + } + }, + None => { break; } + } + } + + Rc::new(res) +} + +pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ { + // s can only be ascii, byte indexing is fine + + let s2 = s.chars().filter(|&c| c != '_').collect::(); + let mut s = s2.as_slice(); + + debug!("parse_integer_lit: {}", s); + + if s.len() == 1 { + return ast::LitIntUnsuffixed((s.char_at(0)).to_digit(10).unwrap() as i64); + } + let mut base = 10; + let orig = s; + + #[deriving(Show)] + enum Result { + Nothing, + Signed(ast::IntTy), + Unsigned(ast::UintTy) + } + + impl Result { + fn suffix_len(&self) -> uint { + match *self { + Nothing => 0, + Signed(s) => s.suffix_len(), + Unsigned(u) => u.suffix_len() + } + } + } + + let mut ty = Nothing; + + + if s.char_at(0) == '0' { + match s.char_at(1) { + 'x' => base = 16, + 'o' => base = 8, + 'b' => base = 2, + _ => { } + } + } + + if base != 10 { + s = s.slice_from(2); + } + + let last = s.len() - 1; + match s.char_at(last) { + 'i' => ty = Signed(ast::TyI), + 'u' => ty = Unsigned(ast::TyU), + '8' => { + if s.len() > 2 { + match s.char_at(last - 1) { + 'i' => ty = Signed(ast::TyI8), + 'u' => ty = Unsigned(ast::TyU8), + _ => { } + } + } + }, + '6' => { + if s.len() > 3 && s.char_at(last - 1) == '1' { + match s.char_at(last - 2) { + 'i' => ty = Signed(ast::TyI16), + 'u' => ty = Unsigned(ast::TyU16), + _ => { } + } + } + }, + '2' => { + if s.len() > 3 && s.char_at(last - 1) == '3' { + match s.char_at(last - 2) { + 'i' => ty = Signed(ast::TyI32), + 'u' => ty = Unsigned(ast::TyU32), + _ => { } + } + } + }, + '4' => { + if s.len() > 3 && s.char_at(last - 1) == '6' { + match s.char_at(last - 2) { + 'i' => ty = Signed(ast::TyI64), + 'u' => ty = Unsigned(ast::TyU64), + _ => { } + } + } + }, + _ => { } + } + + + s = s.slice_to(s.len() - ty.suffix_len()); + + debug!("The suffix is {}, base {}, the new string is {}, the original \ + string was {}", ty, base, s, orig); + + let res: u64 = match ::std::num::from_str_radix(s, base) { + Some(r) => r, + None => { sd.span_err(sp, "int literal is too large"); 0 } + }; + + match ty { + Nothing => ast::LitIntUnsuffixed(res as i64), + Signed(t) => ast::LitInt(res as i64, t), + Unsigned(t) => ast::LitUint(res, t) + } +} #[cfg(test)] mod test { diff --git a/src/libsyntax/parse/obsolete.rs b/src/libsyntax/parse/obsolete.rs index 025684ae71e8c..cadae7ef12f80 100644 --- a/src/libsyntax/parse/obsolete.rs +++ b/src/libsyntax/parse/obsolete.rs @@ -38,8 +38,8 @@ pub enum ObsoleteSyntax { pub trait ParserObsoleteMethods { /// Reports an obsolete syntax non-fatal error. fn obsolete(&mut self, sp: Span, kind: ObsoleteSyntax); - // Reports an obsolete syntax non-fatal error, and returns - // a placeholder expression + /// Reports an obsolete syntax non-fatal error, and returns + /// a placeholder expression fn obsolete_expr(&mut self, sp: Span, kind: ObsoleteSyntax) -> Gc; fn report(&mut self, sp: Span, @@ -83,8 +83,8 @@ impl<'a> ParserObsoleteMethods for parser::Parser<'a> { self.report(sp, kind, kind_str, desc); } - // Reports an obsolete syntax non-fatal error, and returns - // a placeholder expression + /// Reports an obsolete syntax non-fatal error, and returns + /// a placeholder expression fn obsolete_expr(&mut self, sp: Span, kind: ObsoleteSyntax) -> Gc { self.obsolete(sp, kind); self.mk_expr(sp.lo, sp.hi, ExprLit(box(GC) respan(sp, LitNil))) diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index ac4cbf3aa8e55..743eeed9da5e2 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -33,8 +33,8 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod}; use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic}; use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl}; use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_}; -use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte, LitBinary}; -use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet}; +use ast::{LitBool, LitChar, LitByte, LitBinary}; +use ast::{LitNil, LitStr, LitUint, Local, LocalLet}; use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal}; use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability}; use ast::{NamedField, UnNeg, NoReturn, UnNot, P, Pat, PatEnum}; @@ -61,6 +61,7 @@ use ast_util::{as_prec, ident_to_path, lit_is_str, operator_prec}; use ast_util; use codemap::{Span, BytePos, Spanned, spanned, mk_sp}; use codemap; +use parse; use parse::attr::ParserAttr; use parse::classify; use parse::common::{SeqSep, seq_sep_none}; @@ -117,8 +118,8 @@ pub struct PathAndBounds { } enum ItemOrViewItem { - // Indicates a failure to parse any kind of item. The attributes are - // returned. + /// Indicates a failure to parse any kind of item. The attributes are + /// returned. IoviNone(Vec), IoviItem(Gc), IoviForeignItem(Gc), @@ -126,12 +127,12 @@ enum ItemOrViewItem { } -// Possibly accept an `INTERPOLATED` expression (a pre-parsed expression -// dropped into the token stream, which happens while parsing the -// result of macro expansion) -/* Placement of these is not as complex as I feared it would be. -The important thing is to make sure that lookahead doesn't balk -at INTERPOLATED tokens */ +/// Possibly accept an `INTERPOLATED` expression (a pre-parsed expression +/// dropped into the token stream, which happens while parsing the +/// result of macro expansion) +/// Placement of these is not as complex as I feared it would be. +/// The important thing is to make sure that lookahead doesn't balk +/// at INTERPOLATED tokens macro_rules! maybe_whole_expr ( ($p:expr) => ( { @@ -166,7 +167,7 @@ macro_rules! maybe_whole_expr ( ) ) -// As above, but for things other than expressions +/// As maybe_whole_expr, but for things other than expressions macro_rules! maybe_whole ( ($p:expr, $constructor:ident) => ( { @@ -287,14 +288,14 @@ struct ParsedItemsAndViewItems { pub struct Parser<'a> { pub sess: &'a ParseSess, - // the current token: + /// the current token: pub token: token::Token, - // the span of the current token: + /// the span of the current token: pub span: Span, - // the span of the prior token: + /// the span of the prior token: pub last_span: Span, pub cfg: CrateConfig, - // the previous token or None (only stashed sometimes). + /// the previous token or None (only stashed sometimes). pub last_token: Option>, pub buffer: [TokenAndSpan, ..4], pub buffer_start: int, @@ -324,10 +325,24 @@ fn is_plain_ident_or_underscore(t: &token::Token) -> bool { is_plain_ident(t) || *t == token::UNDERSCORE } +/// Get a token the parser cares about +fn real_token(rdr: &mut Reader) -> TokenAndSpan { + let mut t = rdr.next_token(); + loop { + match t.tok { + token::WS | token::COMMENT | token::SHEBANG(_) => { + t = rdr.next_token(); + }, + _ => break + } + } + t +} + impl<'a> Parser<'a> { pub fn new(sess: &'a ParseSess, cfg: ast::CrateConfig, mut rdr: Box) -> Parser<'a> { - let tok0 = rdr.next_token(); + let tok0 = real_token(rdr); let span = tok0.sp; let placeholder = TokenAndSpan { tok: token::UNDERSCORE, @@ -361,12 +376,13 @@ impl<'a> Parser<'a> { root_module_name: None, } } - // convert a token to a string using self's reader + + /// Convert a token to a string using self's reader pub fn token_to_string(token: &token::Token) -> String { token::to_string(token) } - // convert the current token to a string using self's reader + /// Convert the current token to a string using self's reader pub fn this_token_to_string(&mut self) -> String { Parser::token_to_string(&self.token) } @@ -383,8 +399,8 @@ impl<'a> Parser<'a> { self.fatal(format!("unexpected token: `{}`", this_token).as_slice()); } - // expect and consume the token t. Signal an error if - // the next token is not t. + /// Expect and consume the token t. Signal an error if + /// the next token is not t. pub fn expect(&mut self, t: &token::Token) { if self.token == *t { self.bump(); @@ -397,9 +413,9 @@ impl<'a> Parser<'a> { } } - // Expect next token to be edible or inedible token. If edible, - // then consume it; if inedible, then return without consuming - // anything. Signal a fatal error if next token is unexpected. + /// Expect next token to be edible or inedible token. If edible, + /// then consume it; if inedible, then return without consuming + /// anything. Signal a fatal error if next token is unexpected. pub fn expect_one_of(&mut self, edible: &[token::Token], inedible: &[token::Token]) { @@ -437,9 +453,9 @@ impl<'a> Parser<'a> { } } - // Check for erroneous `ident { }`; if matches, signal error and - // recover (without consuming any expected input token). Returns - // true if and only if input was consumed for recovery. + /// Check for erroneous `ident { }`; if matches, signal error and + /// recover (without consuming any expected input token). Returns + /// true if and only if input was consumed for recovery. pub fn check_for_erroneous_unit_struct_expecting(&mut self, expected: &[token::Token]) -> bool { if self.token == token::LBRACE && expected.iter().all(|t| *t != token::LBRACE) @@ -456,9 +472,9 @@ impl<'a> Parser<'a> { } } - // Commit to parsing a complete expression `e` expected to be - // followed by some token from the set edible + inedible. Recover - // from anticipated input errors, discarding erroneous characters. + /// Commit to parsing a complete expression `e` expected to be + /// followed by some token from the set edible + inedible. Recover + /// from anticipated input errors, discarding erroneous characters. pub fn commit_expr(&mut self, e: Gc, edible: &[token::Token], inedible: &[token::Token]) { debug!("commit_expr {:?}", e); @@ -479,9 +495,9 @@ impl<'a> Parser<'a> { self.commit_expr(e, &[edible], &[]) } - // Commit to parsing a complete statement `s`, which expects to be - // followed by some token from the set edible + inedible. Check - // for recoverable input errors, discarding erroneous characters. + /// Commit to parsing a complete statement `s`, which expects to be + /// followed by some token from the set edible + inedible. Check + /// for recoverable input errors, discarding erroneous characters. pub fn commit_stmt(&mut self, s: Gc, edible: &[token::Token], inedible: &[token::Token]) { debug!("commit_stmt {:?}", s); @@ -526,8 +542,8 @@ impl<'a> Parser<'a> { id: ast::DUMMY_NODE_ID }) } - // consume token 'tok' if it exists. Returns true if the given - // token was present, false otherwise. + /// Consume token 'tok' if it exists. Returns true if the given + /// token was present, false otherwise. pub fn eat(&mut self, tok: &token::Token) -> bool { let is_present = self.token == *tok; if is_present { self.bump() } @@ -538,8 +554,8 @@ impl<'a> Parser<'a> { token::is_keyword(kw, &self.token) } - // if the next token is the given keyword, eat it and return - // true. Otherwise, return false. + /// If the next token is the given keyword, eat it and return + /// true. Otherwise, return false. pub fn eat_keyword(&mut self, kw: keywords::Keyword) -> bool { match self.token { token::IDENT(sid, false) if kw.to_name() == sid.name => { @@ -550,9 +566,9 @@ impl<'a> Parser<'a> { } } - // if the given word is not a keyword, signal an error. - // if the next token is not the given word, signal an error. - // otherwise, eat it. + /// If the given word is not a keyword, signal an error. + /// If the next token is not the given word, signal an error. + /// Otherwise, eat it. pub fn expect_keyword(&mut self, kw: keywords::Keyword) { if !self.eat_keyword(kw) { let id_interned_str = token::get_name(kw.to_name()); @@ -562,7 +578,7 @@ impl<'a> Parser<'a> { } } - // signal an error if the given string is a strict keyword + /// Signal an error if the given string is a strict keyword pub fn check_strict_keywords(&mut self) { if token::is_strict_keyword(&self.token) { let token_str = self.this_token_to_string(); @@ -573,7 +589,7 @@ impl<'a> Parser<'a> { } } - // signal an error if the current token is a reserved keyword + /// Signal an error if the current token is a reserved keyword pub fn check_reserved_keywords(&mut self) { if token::is_reserved_keyword(&self.token) { let token_str = self.this_token_to_string(); @@ -582,8 +598,8 @@ impl<'a> Parser<'a> { } } - // Expect and consume an `&`. If `&&` is seen, replace it with a single - // `&` and continue. If an `&` is not seen, signal an error. + /// Expect and consume an `&`. If `&&` is seen, replace it with a single + /// `&` and continue. If an `&` is not seen, signal an error. fn expect_and(&mut self) { match self.token { token::BINOP(token::AND) => self.bump(), @@ -603,8 +619,8 @@ impl<'a> Parser<'a> { } } - // Expect and consume a `|`. If `||` is seen, replace it with a single - // `|` and continue. If a `|` is not seen, signal an error. + /// Expect and consume a `|`. If `||` is seen, replace it with a single + /// `|` and continue. If a `|` is not seen, signal an error. fn expect_or(&mut self) { match self.token { token::BINOP(token::OR) => self.bump(), @@ -624,26 +640,26 @@ impl<'a> Parser<'a> { } } - // Attempt to consume a `<`. If `<<` is seen, replace it with a single - // `<` and continue. If a `<` is not seen, return false. - // - // This is meant to be used when parsing generics on a path to get the - // starting token. The `force` parameter is used to forcefully break up a - // `<<` token. If `force` is false, then `<<` is only broken when a lifetime - // shows up next. For example, consider the expression: - // - // foo as bar << test - // - // The parser needs to know if `bar <<` is the start of a generic path or if - // it's a left-shift token. If `test` were a lifetime, then it's impossible - // for the token to be a left-shift, but if it's not a lifetime, then it's - // considered a left-shift. - // - // The reason for this is that the only current ambiguity with `<<` is when - // parsing closure types: - // - // foo::<<'a> ||>(); - // impl Foo<<'a> ||>() { ... } + /// Attempt to consume a `<`. If `<<` is seen, replace it with a single + /// `<` and continue. If a `<` is not seen, return false. + /// + /// This is meant to be used when parsing generics on a path to get the + /// starting token. The `force` parameter is used to forcefully break up a + /// `<<` token. If `force` is false, then `<<` is only broken when a lifetime + /// shows up next. For example, consider the expression: + /// + /// foo as bar << test + /// + /// The parser needs to know if `bar <<` is the start of a generic path or if + /// it's a left-shift token. If `test` were a lifetime, then it's impossible + /// for the token to be a left-shift, but if it's not a lifetime, then it's + /// considered a left-shift. + /// + /// The reason for this is that the only current ambiguity with `<<` is when + /// parsing closure types: + /// + /// foo::<<'a> ||>(); + /// impl Foo<<'a> ||>() { ... } fn eat_lt(&mut self, force: bool) -> bool { match self.token { token::LT => { self.bump(); true } @@ -675,7 +691,7 @@ impl<'a> Parser<'a> { } } - // Parse a sequence bracketed by `|` and `|`, stopping before the `|`. + /// Parse a sequence bracketed by `|` and `|`, stopping before the `|`. fn parse_seq_to_before_or( &mut self, sep: &token::Token, @@ -696,9 +712,9 @@ impl<'a> Parser<'a> { vector } - // expect and consume a GT. if a >> is seen, replace it - // with a single > and continue. If a GT is not seen, - // signal an error. + /// Expect and consume a GT. if a >> is seen, replace it + /// with a single > and continue. If a GT is not seen, + /// signal an error. pub fn expect_gt(&mut self) { match self.token { token::GT => self.bump(), @@ -727,8 +743,8 @@ impl<'a> Parser<'a> { } } - // parse a sequence bracketed by '<' and '>', stopping - // before the '>'. + /// Parse a sequence bracketed by '<' and '>', stopping + /// before the '>'. pub fn parse_seq_to_before_gt( &mut self, sep: Option, @@ -762,9 +778,9 @@ impl<'a> Parser<'a> { return v; } - // parse a sequence, including the closing delimiter. The function - // f must consume tokens until reaching the next separator or - // closing bracket. + /// Parse a sequence, including the closing delimiter. The function + /// f must consume tokens until reaching the next separator or + /// closing bracket. pub fn parse_seq_to_end( &mut self, ket: &token::Token, @@ -776,9 +792,9 @@ impl<'a> Parser<'a> { val } - // parse a sequence, not including the closing delimiter. The function - // f must consume tokens until reaching the next separator or - // closing bracket. + /// Parse a sequence, not including the closing delimiter. The function + /// f must consume tokens until reaching the next separator or + /// closing bracket. pub fn parse_seq_to_before_end( &mut self, ket: &token::Token, @@ -801,9 +817,9 @@ impl<'a> Parser<'a> { return v; } - // parse a sequence, including the closing delimiter. The function - // f must consume tokens until reaching the next separator or - // closing bracket. + /// Parse a sequence, including the closing delimiter. The function + /// f must consume tokens until reaching the next separator or + /// closing bracket. pub fn parse_unspanned_seq( &mut self, bra: &token::Token, @@ -817,8 +833,8 @@ impl<'a> Parser<'a> { result } - // parse a sequence parameter of enum variant. For consistency purposes, - // these should not be empty. + /// Parse a sequence parameter of enum variant. For consistency purposes, + /// these should not be empty. pub fn parse_enum_variant_seq( &mut self, bra: &token::Token, @@ -852,7 +868,7 @@ impl<'a> Parser<'a> { spanned(lo, hi, result) } - // advance the parser by one token + /// Advance the parser by one token pub fn bump(&mut self) { self.last_span = self.span; // Stash token for error recovery (sometimes; clone is not necessarily cheap). @@ -862,7 +878,7 @@ impl<'a> Parser<'a> { None }; let next = if self.buffer_start == self.buffer_end { - self.reader.next_token() + real_token(self.reader) } else { // Avoid token copies with `replace`. let buffer_start = self.buffer_start as uint; @@ -880,14 +896,14 @@ impl<'a> Parser<'a> { self.tokens_consumed += 1u; } - // Advance the parser by one token and return the bumped token. + /// Advance the parser by one token and return the bumped token. pub fn bump_and_get(&mut self) -> token::Token { let old_token = replace(&mut self.token, token::UNDERSCORE); self.bump(); old_token } - // EFFECT: replace the current token and span with the given one + /// EFFECT: replace the current token and span with the given one pub fn replace_token(&mut self, next: token::Token, lo: BytePos, @@ -906,7 +922,7 @@ impl<'a> Parser<'a> { -> R { let dist = distance as int; while self.buffer_length() < dist { - self.buffer[self.buffer_end as uint] = self.reader.next_token(); + self.buffer[self.buffer_end as uint] = real_token(self.reader); self.buffer_end = (self.buffer_end + 1) & 3; } f(&self.buffer[((self.buffer_start + dist - 1) & 3) as uint].tok) @@ -940,8 +956,8 @@ impl<'a> Parser<'a> { token::get_ident(id) } - // Is the current token one of the keywords that signals a bare function - // type? + /// Is the current token one of the keywords that signals a bare function + /// type? pub fn token_is_bare_fn_keyword(&mut self) -> bool { if token::is_keyword(keywords::Fn, &self.token) { return true @@ -955,14 +971,14 @@ impl<'a> Parser<'a> { false } - // Is the current token one of the keywords that signals a closure type? + /// Is the current token one of the keywords that signals a closure type? pub fn token_is_closure_keyword(&mut self) -> bool { token::is_keyword(keywords::Unsafe, &self.token) || token::is_keyword(keywords::Once, &self.token) } - // Is the current token one of the keywords that signals an old-style - // closure type (with explicit sigil)? + /// Is the current token one of the keywords that signals an old-style + /// closure type (with explicit sigil)? pub fn token_is_old_style_closure_keyword(&mut self) -> bool { token::is_keyword(keywords::Unsafe, &self.token) || token::is_keyword(keywords::Once, &self.token) || @@ -983,7 +999,7 @@ impl<'a> Parser<'a> { } } - // parse a TyBareFn type: + /// parse a TyBareFn type: pub fn parse_ty_bare_fn(&mut self) -> Ty_ { /* @@ -1014,8 +1030,8 @@ impl<'a> Parser<'a> { }); } - // Parses a procedure type (`proc`). The initial `proc` keyword must - // already have been parsed. + /// Parses a procedure type (`proc`). The initial `proc` keyword must + /// already have been parsed. pub fn parse_proc_type(&mut self) -> Ty_ { /* @@ -1063,7 +1079,7 @@ impl<'a> Parser<'a> { }) } - // parse a TyClosure type + /// Parse a TyClosure type pub fn parse_ty_closure(&mut self) -> Ty_ { /* @@ -1154,7 +1170,7 @@ impl<'a> Parser<'a> { } } - // parse a function type (following the 'fn') + /// Parse a function type (following the 'fn') pub fn parse_ty_fn_decl(&mut self, allow_variadic: bool) -> (P, Vec) { /* @@ -1186,7 +1202,7 @@ impl<'a> Parser<'a> { (decl, lifetimes) } - // parse the methods in a trait declaration + /// Parse the methods in a trait declaration pub fn parse_trait_methods(&mut self) -> Vec { self.parse_unspanned_seq( &token::LBRACE, @@ -1255,15 +1271,15 @@ impl<'a> Parser<'a> { }) } - // parse a possibly mutable type + /// Parse a possibly mutable type pub fn parse_mt(&mut self) -> MutTy { let mutbl = self.parse_mutability(); let t = self.parse_ty(true); MutTy { ty: t, mutbl: mutbl } } - // parse [mut/const/imm] ID : TY - // now used only by obsolete record syntax parser... + /// Parse [mut/const/imm] ID : TY + /// now used only by obsolete record syntax parser... pub fn parse_ty_field(&mut self) -> TypeField { let lo = self.span.lo; let mutbl = self.parse_mutability(); @@ -1278,7 +1294,7 @@ impl<'a> Parser<'a> { } } - // parse optional return type [ -> TY ] in function decl + /// Parse optional return type [ -> TY ] in function decl pub fn parse_ret_ty(&mut self) -> (RetStyle, P) { return if self.eat(&token::RARROW) { let lo = self.span.lo; @@ -1478,8 +1494,8 @@ impl<'a> Parser<'a> { } } - // This version of parse arg doesn't necessarily require - // identifier names. + /// This version of parse arg doesn't necessarily require + /// identifier names. pub fn parse_arg_general(&mut self, require_name: bool) -> Arg { let pat = if require_name || self.is_named_argument() { debug!("parse_arg_general parse_pat (require_name:{:?})", @@ -1504,12 +1520,12 @@ impl<'a> Parser<'a> { } } - // parse a single function argument + /// Parse a single function argument pub fn parse_arg(&mut self) -> Arg { self.parse_arg_general(true) } - // parse an argument in a lambda header e.g. |arg, arg| + /// Parse an argument in a lambda header e.g. |arg, arg| pub fn parse_fn_block_arg(&mut self) -> Arg { let pat = self.parse_pat(); let t = if self.eat(&token::COLON) { @@ -1539,34 +1555,32 @@ impl<'a> Parser<'a> { } } - // matches token_lit = LIT_INT | ... + /// Matches token_lit = LIT_INTEGER | ... pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ { match *tok { - token::LIT_BYTE(i) => LitByte(i), - token::LIT_CHAR(i) => LitChar(i), - token::LIT_INT(i, it) => LitInt(i, it), - token::LIT_UINT(u, ut) => LitUint(u, ut), - token::LIT_INT_UNSUFFIXED(i) => LitIntUnsuffixed(i), - token::LIT_FLOAT(s, ft) => { - LitFloat(self.id_to_interned_str(s), ft) - } - token::LIT_FLOAT_UNSUFFIXED(s) => { - LitFloatUnsuffixed(self.id_to_interned_str(s)) - } + token::LIT_BYTE(i) => LitByte(parse::byte_lit(i.as_str()).val0()), + token::LIT_CHAR(i) => LitChar(parse::char_lit(i.as_str()).val0()), + token::LIT_INTEGER(s) => parse::integer_lit(s.as_str(), + &self.sess.span_diagnostic, self.span), + token::LIT_FLOAT(s) => parse::float_lit(s.as_str()), token::LIT_STR(s) => { - LitStr(self.id_to_interned_str(s), ast::CookedStr) + LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), + ast::CookedStr) } token::LIT_STR_RAW(s, n) => { - LitStr(self.id_to_interned_str(s), ast::RawStr(n)) + LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()), + ast::RawStr(n)) } - token::LIT_BINARY_RAW(ref v, _) | - token::LIT_BINARY(ref v) => LitBinary(v.clone()), + token::LIT_BINARY(i) => + LitBinary(parse::binary_lit(i.as_str())), + token::LIT_BINARY_RAW(i, _) => + LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())), token::LPAREN => { self.expect(&token::RPAREN); LitNil }, _ => { self.unexpected_last(tok); } } } - // matches lit = true | false | token_lit + /// Matches lit = true | false | token_lit pub fn parse_lit(&mut self) -> Lit { let lo = self.span.lo; let lit = if self.eat_keyword(keywords::True) { @@ -1581,7 +1595,7 @@ impl<'a> Parser<'a> { codemap::Spanned { node: lit, span: mk_sp(lo, self.last_span.hi) } } - // matches '-' lit | lit + /// matches '-' lit | lit pub fn parse_literal_maybe_minus(&mut self) -> Gc { let minus_lo = self.span.lo; let minus_present = self.eat(&token::BINOP(token::MINUS)); @@ -1719,7 +1733,7 @@ impl<'a> Parser<'a> { } /// Parses a single lifetime - // matches lifetime = LIFETIME + /// Matches lifetime = LIFETIME pub fn parse_lifetime(&mut self) -> ast::Lifetime { match self.token { token::LIFETIME(i) => { @@ -1779,7 +1793,7 @@ impl<'a> Parser<'a> { token::is_keyword(keywords::Const, tok) } - // parse mutability declaration (mut/const/imm) + /// Parse mutability declaration (mut/const/imm) pub fn parse_mutability(&mut self) -> Mutability { if self.eat_keyword(keywords::Mut) { MutMutable @@ -1788,7 +1802,7 @@ impl<'a> Parser<'a> { } } - // parse ident COLON expr + /// Parse ident COLON expr pub fn parse_field(&mut self) -> Field { let lo = self.span.lo; let i = self.parse_ident(); @@ -1867,9 +1881,9 @@ impl<'a> Parser<'a> { } } - // at the bottom (top?) of the precedence hierarchy, - // parse things like parenthesized exprs, - // macros, return, etc. + /// At the bottom (top?) of the precedence hierarchy, + /// parse things like parenthesized exprs, + /// macros, return, etc. pub fn parse_bottom_expr(&mut self) -> Gc { maybe_whole_expr!(self); @@ -1934,7 +1948,12 @@ impl<'a> Parser<'a> { }); return self.mk_expr(lo, body.span.hi, ExprProc(decl, fakeblock)); }, - token::IDENT(id @ ast::Ident{name:token::SELF_KEYWORD_NAME,ctxt:_},false) => { + // FIXME #13626: Should be able to stick in + // token::SELF_KEYWORD_NAME + token::IDENT(id @ ast::Ident{ + name: ast::Name(token::SELF_KEYWORD_NAME_NUM), + ctxt: _ + } ,false) => { self.bump(); let path = ast_util::ident_to_path(mk_sp(lo, hi), id); ex = ExprPath(path); @@ -2107,7 +2126,7 @@ impl<'a> Parser<'a> { return self.mk_expr(lo, hi, ex); } - // parse a block or unsafe block + /// Parse a block or unsafe block pub fn parse_block_expr(&mut self, lo: BytePos, blk_mode: BlockCheckMode) -> Gc { self.expect(&token::LBRACE); @@ -2115,7 +2134,7 @@ impl<'a> Parser<'a> { return self.mk_expr(blk.span.lo, blk.span.hi, ExprBlock(blk)); } - // parse a.b or a(13) or a[4] or just a + /// parse a.b or a(13) or a[4] or just a pub fn parse_dot_or_call_expr(&mut self) -> Gc { let b = self.parse_bottom_expr(); self.parse_dot_or_call_expr_with(b) @@ -2199,8 +2218,8 @@ impl<'a> Parser<'a> { return e; } - // parse an optional separator followed by a kleene-style - // repetition token (+ or *). + /// Parse an optional separator followed by a kleene-style + /// repetition token (+ or *). pub fn parse_sep_and_zerok(&mut self) -> (Option, bool) { fn parse_zerok(parser: &mut Parser) -> Option { match parser.token { @@ -2225,7 +2244,7 @@ impl<'a> Parser<'a> { } } - // parse a single token tree from the input. + /// parse a single token tree from the input. pub fn parse_token_tree(&mut self) -> TokenTree { // FIXME #6994: currently, this is too eager. It // parses token trees but also identifies TTSeq's @@ -2341,9 +2360,9 @@ impl<'a> Parser<'a> { } } - // This goofy function is necessary to correctly match parens in Matcher's. - // Otherwise, `$( ( )` would be a valid Matcher, and `$( () )` would be - // invalid. It's similar to common::parse_seq. + /// This goofy function is necessary to correctly match parens in Matcher's. + /// Otherwise, `$( ( )` would be a valid Matcher, and `$( () )` would be + /// invalid. It's similar to common::parse_seq. pub fn parse_matcher_subseq_upto(&mut self, name_idx: &mut uint, ket: &token::Token) @@ -2392,7 +2411,7 @@ impl<'a> Parser<'a> { return spanned(lo, self.span.hi, m); } - // parse a prefix-operator expr + /// Parse a prefix-operator expr pub fn parse_prefix_expr(&mut self) -> Gc { let lo = self.span.lo; let hi; @@ -2500,13 +2519,13 @@ impl<'a> Parser<'a> { return self.mk_expr(lo, hi, ex); } - // parse an expression of binops + /// Parse an expression of binops pub fn parse_binops(&mut self) -> Gc { let prefix_expr = self.parse_prefix_expr(); self.parse_more_binops(prefix_expr, 0) } - // parse an expression of binops of at least min_prec precedence + /// Parse an expression of binops of at least min_prec precedence pub fn parse_more_binops(&mut self, lhs: Gc, min_prec: uint) -> Gc { if self.expr_is_complete(lhs) { return lhs; } @@ -2554,9 +2573,9 @@ impl<'a> Parser<'a> { } } - // parse an assignment expression.... - // actually, this seems to be the main entry point for - // parsing an arbitrary expression. + /// Parse an assignment expression.... + /// actually, this seems to be the main entry point for + /// parsing an arbitrary expression. pub fn parse_assign_expr(&mut self) -> Gc { let lo = self.span.lo; let lhs = self.parse_binops(); @@ -2590,7 +2609,7 @@ impl<'a> Parser<'a> { } } - // parse an 'if' expression ('if' token already eaten) + /// Parse an 'if' expression ('if' token already eaten) pub fn parse_if_expr(&mut self) -> Gc { let lo = self.last_span.lo; let cond = self.parse_expr_res(RESTRICT_NO_STRUCT_LITERAL); @@ -2605,7 +2624,7 @@ impl<'a> Parser<'a> { self.mk_expr(lo, hi, ExprIf(cond, thn, els)) } - // `|args| { ... }` or `{ ...}` like in `do` expressions + /// `|args| { ... }` or `{ ...}` like in `do` expressions pub fn parse_lambda_block_expr(&mut self) -> Gc { self.parse_lambda_expr_( |p| { @@ -2634,15 +2653,15 @@ impl<'a> Parser<'a> { }) } - // `|args| expr` + /// `|args| expr` pub fn parse_lambda_expr(&mut self) -> Gc { self.parse_lambda_expr_(|p| p.parse_fn_block_decl(), |p| p.parse_expr()) } - // parse something of the form |args| expr - // this is used both in parsing a lambda expr - // and in parsing a block expr as e.g. in for... + /// parse something of the form |args| expr + /// this is used both in parsing a lambda expr + /// and in parsing a block expr as e.g. in for... pub fn parse_lambda_expr_(&mut self, parse_decl: |&mut Parser| -> P, parse_body: |&mut Parser| -> Gc) @@ -2671,7 +2690,7 @@ impl<'a> Parser<'a> { } } - // parse a 'for' .. 'in' expression ('for' token already eaten) + /// Parse a 'for' .. 'in' expression ('for' token already eaten) pub fn parse_for_expr(&mut self, opt_ident: Option) -> Gc { // Parse: `for in ` @@ -2737,12 +2756,12 @@ impl<'a> Parser<'a> { return self.mk_expr(lo, hi, ExprMatch(discriminant, arms)); } - // parse an expression + /// Parse an expression pub fn parse_expr(&mut self) -> Gc { return self.parse_expr_res(UNRESTRICTED); } - // parse an expression, subject to the given restriction + /// Parse an expression, subject to the given restriction pub fn parse_expr_res(&mut self, r: restriction) -> Gc { let old = self.restriction; self.restriction = r; @@ -2751,7 +2770,7 @@ impl<'a> Parser<'a> { return e; } - // parse the RHS of a local variable declaration (e.g. '= 14;') + /// Parse the RHS of a local variable declaration (e.g. '= 14;') fn parse_initializer(&mut self) -> Option> { if self.token == token::EQ { self.bump(); @@ -2761,7 +2780,7 @@ impl<'a> Parser<'a> { } } - // parse patterns, separated by '|' s + /// Parse patterns, separated by '|' s fn parse_pats(&mut self) -> Vec> { let mut pats = Vec::new(); loop { @@ -2824,7 +2843,7 @@ impl<'a> Parser<'a> { (before, slice, after) } - // parse the fields of a struct-like pattern + /// Parse the fields of a struct-like pattern fn parse_pat_fields(&mut self) -> (Vec , bool) { let mut fields = Vec::new(); let mut etc = false; @@ -2884,7 +2903,7 @@ impl<'a> Parser<'a> { return (fields, etc); } - // parse a pattern. + /// Parse a pattern. pub fn parse_pat(&mut self) -> Gc { maybe_whole!(self, NtPat); @@ -3126,9 +3145,9 @@ impl<'a> Parser<'a> { } } - // parse ident or ident @ pat - // used by the copy foo and ref foo patterns to give a good - // error message when parsing mistakes like ref foo(a,b) + /// Parse ident or ident @ pat + /// used by the copy foo and ref foo patterns to give a good + /// error message when parsing mistakes like ref foo(a,b) fn parse_pat_ident(&mut self, binding_mode: ast::BindingMode) -> ast::Pat_ { @@ -3162,7 +3181,7 @@ impl<'a> Parser<'a> { PatIdent(binding_mode, name, sub) } - // parse a local variable declaration + /// Parse a local variable declaration fn parse_local(&mut self) -> Gc { let lo = self.span.lo; let pat = self.parse_pat(); @@ -3186,14 +3205,14 @@ impl<'a> Parser<'a> { } } - // parse a "let" stmt + /// Parse a "let" stmt fn parse_let(&mut self) -> Gc { let lo = self.span.lo; let local = self.parse_local(); box(GC) spanned(lo, self.last_span.hi, DeclLocal(local)) } - // parse a structure field + /// Parse a structure field fn parse_name_and_ty(&mut self, pr: Visibility, attrs: Vec ) -> StructField { let lo = self.span.lo; @@ -3211,8 +3230,8 @@ impl<'a> Parser<'a> { }) } - // parse a statement. may include decl. - // precondition: any attributes are parsed already + /// Parse a statement. may include decl. + /// Precondition: any attributes are parsed already pub fn parse_stmt(&mut self, item_attrs: Vec) -> Gc { maybe_whole!(self, NtStmt); @@ -3315,13 +3334,13 @@ impl<'a> Parser<'a> { } } - // is this expression a successfully-parsed statement? + /// Is this expression a successfully-parsed statement? fn expr_is_complete(&mut self, e: Gc) -> bool { return self.restriction == RESTRICT_STMT_EXPR && !classify::expr_requires_semi_to_be_stmt(e); } - // parse a block. No inner attrs are allowed. + /// Parse a block. No inner attrs are allowed. pub fn parse_block(&mut self) -> P { maybe_whole!(no_clone self, NtBlock); @@ -3331,7 +3350,7 @@ impl<'a> Parser<'a> { return self.parse_block_tail_(lo, DefaultBlock, Vec::new()); } - // parse a block. Inner attrs are allowed. + /// Parse a block. Inner attrs are allowed. fn parse_inner_attrs_and_block(&mut self) -> (Vec , P) { @@ -3344,15 +3363,15 @@ impl<'a> Parser<'a> { (inner, self.parse_block_tail_(lo, DefaultBlock, next)) } - // Precondition: already parsed the '{' or '#{' - // I guess that also means "already parsed the 'impure'" if - // necessary, and this should take a qualifier. - // some blocks start with "#{"... + /// Precondition: already parsed the '{' or '#{' + /// I guess that also means "already parsed the 'impure'" if + /// necessary, and this should take a qualifier. + /// Some blocks start with "#{"... fn parse_block_tail(&mut self, lo: BytePos, s: BlockCheckMode) -> P { self.parse_block_tail_(lo, s, Vec::new()) } - // parse the rest of a block expression or function body + /// Parse the rest of a block expression or function body fn parse_block_tail_(&mut self, lo: BytePos, s: BlockCheckMode, first_item_attrs: Vec ) -> P { let mut stmts = Vec::new(); @@ -3510,18 +3529,18 @@ impl<'a> Parser<'a> { } } - // matches bounds = ( boundseq )? - // where boundseq = ( bound + boundseq ) | bound - // and bound = 'static | ty - // Returns "None" if there's no colon (e.g. "T"); - // Returns "Some(Empty)" if there's a colon but nothing after (e.g. "T:") - // Returns "Some(stuff)" otherwise (e.g. "T:stuff"). - // NB: The None/Some distinction is important for issue #7264. - // - // Note that the `allow_any_lifetime` argument is a hack for now while the - // AST doesn't support arbitrary lifetimes in bounds on type parameters. In - // the future, this flag should be removed, and the return value of this - // function should be Option<~[TyParamBound]> + /// matches optbounds = ( ( : ( boundseq )? )? ) + /// where boundseq = ( bound + boundseq ) | bound + /// and bound = 'static | ty + /// Returns "None" if there's no colon (e.g. "T"); + /// Returns "Some(Empty)" if there's a colon but nothing after (e.g. "T:") + /// Returns "Some(stuff)" otherwise (e.g. "T:stuff"). + /// NB: The None/Some distinction is important for issue #7264. + /// + /// Note that the `allow_any_lifetime` argument is a hack for now while the + /// AST doesn't support arbitrary lifetimes in bounds on type parameters. In + /// the future, this flag should be removed, and the return value of this + /// function should be Option<~[TyParamBound]> fn parse_ty_param_bounds(&mut self, allow_any_lifetime: bool) -> (Option, OwnedSlice) { @@ -3588,7 +3607,7 @@ impl<'a> Parser<'a> { } } - // matches typaram = (unbound`?`)? IDENT optbounds ( EQ ty )? + /// Matches typaram = (unbound`?`)? IDENT optbounds ( EQ ty )? fn parse_ty_param(&mut self) -> TyParam { // This is a bit hacky. Currently we are only interested in a single // unbound, and it may only be `Sized`. To avoid backtracking and other @@ -3632,10 +3651,10 @@ impl<'a> Parser<'a> { } } - // parse a set of optional generic type parameter declarations - // matches generics = ( ) | ( < > ) | ( < typaramseq ( , )? > ) | ( < lifetimes ( , )? > ) - // | ( < lifetimes , typaramseq ( , )? > ) - // where typaramseq = ( typaram ) | ( typaram , typaramseq ) + /// Parse a set of optional generic type parameter declarations + /// matches generics = ( ) | ( < > ) | ( < typaramseq ( , )? > ) | ( < lifetimes ( , )? > ) + /// | ( < lifetimes , typaramseq ( , )? > ) + /// where typaramseq = ( typaram ) | ( typaram , typaramseq ) pub fn parse_generics(&mut self) -> ast::Generics { if self.eat(&token::LT) { let lifetimes = self.parse_lifetimes(); @@ -3727,7 +3746,7 @@ impl<'a> Parser<'a> { (args, variadic) } - // parse the argument list and result type of a function declaration + /// Parse the argument list and result type of a function declaration pub fn parse_fn_decl(&mut self, allow_variadic: bool) -> P { let (args, variadic) = self.parse_fn_args(true, allow_variadic); @@ -3762,8 +3781,8 @@ impl<'a> Parser<'a> { } } - // parse the argument list and result type of a function - // that may have a self type. + /// Parse the argument list and result type of a function + /// that may have a self type. fn parse_fn_decl_with_self(&mut self, parse_arg_fn: |&mut Parser| -> Arg) -> (ExplicitSelf, P) { fn maybe_parse_borrowed_explicit_self(this: &mut Parser) @@ -3921,7 +3940,7 @@ impl<'a> Parser<'a> { (spanned(lo, hi, explicit_self), fn_decl) } - // parse the |arg, arg| header on a lambda + /// Parse the |arg, arg| header on a lambda fn parse_fn_block_decl(&mut self) -> P { let inputs_captures = { if self.eat(&token::OROR) { @@ -3953,7 +3972,7 @@ impl<'a> Parser<'a> { }) } - // Parses the `(arg, arg) -> return_type` header on a procedure. + /// Parses the `(arg, arg) -> return_type` header on a procedure. fn parse_proc_decl(&mut self) -> P { let inputs = self.parse_unspanned_seq(&token::LPAREN, @@ -3979,7 +3998,7 @@ impl<'a> Parser<'a> { }) } - // parse the name and optional generic types of a function header. + /// Parse the name and optional generic types of a function header. fn parse_fn_header(&mut self) -> (Ident, ast::Generics) { let id = self.parse_ident(); let generics = self.parse_generics(); @@ -3999,7 +4018,7 @@ impl<'a> Parser<'a> { } } - // parse an item-position function declaration. + /// Parse an item-position function declaration. fn parse_item_fn(&mut self, fn_style: FnStyle, abi: abi::Abi) -> ItemInfo { let (ident, generics) = self.parse_fn_header(); let decl = self.parse_fn_decl(false); @@ -4007,7 +4026,7 @@ impl<'a> Parser<'a> { (ident, ItemFn(decl, fn_style, abi, generics, body), Some(inner_attrs)) } - // parse a method in a trait impl, starting with `attrs` attributes. + /// Parse a method in a trait impl, starting with `attrs` attributes. fn parse_method(&mut self, already_parsed_attrs: Option>) -> Gc { let next_attrs = self.parse_outer_attributes(); @@ -4043,7 +4062,7 @@ impl<'a> Parser<'a> { } } - // parse trait Foo { ... } + /// Parse trait Foo { ... } fn parse_item_trait(&mut self) -> ItemInfo { let ident = self.parse_ident(); let tps = self.parse_generics(); @@ -4062,9 +4081,9 @@ impl<'a> Parser<'a> { (ident, ItemTrait(tps, sized, traits, meths), None) } - // Parses two variants (with the region/type params always optional): - // impl Foo { ... } - // impl ToString for ~[T] { ... } + /// Parses two variants (with the region/type params always optional): + /// impl Foo { ... } + /// impl ToString for ~[T] { ... } fn parse_item_impl(&mut self) -> ItemInfo { // First, parse type parameters if necessary. let generics = self.parse_generics(); @@ -4117,7 +4136,7 @@ impl<'a> Parser<'a> { (ident, ItemImpl(generics, opt_trait, ty, meths), Some(inner_attrs)) } - // parse a::B + /// Parse a::B fn parse_trait_ref(&mut self) -> TraitRef { ast::TraitRef { path: self.parse_path(LifetimeAndTypesWithoutColons).path, @@ -4125,7 +4144,7 @@ impl<'a> Parser<'a> { } } - // parse B + C + D + /// Parse B + C + D fn parse_trait_ref_list(&mut self, ket: &token::Token) -> Vec { self.parse_seq_to_before_end( ket, @@ -4134,7 +4153,7 @@ impl<'a> Parser<'a> { ) } - // parse struct Foo { ... } + /// Parse struct Foo { ... } fn parse_item_struct(&mut self, is_virtual: bool) -> ItemInfo { let class_name = self.parse_ident(); let generics = self.parse_generics(); @@ -4217,7 +4236,7 @@ impl<'a> Parser<'a> { None) } - // parse a structure field declaration + /// Parse a structure field declaration pub fn parse_single_struct_field(&mut self, vis: Visibility, attrs: Vec ) @@ -4239,7 +4258,7 @@ impl<'a> Parser<'a> { a_var } - // parse an element of a struct definition + /// Parse an element of a struct definition fn parse_struct_decl_field(&mut self) -> StructField { let attrs = self.parse_outer_attributes(); @@ -4251,7 +4270,7 @@ impl<'a> Parser<'a> { return self.parse_single_struct_field(Inherited, attrs); } - // parse visiility: PUB, PRIV, or nothing + /// Parse visiility: PUB, PRIV, or nothing fn parse_visibility(&mut self) -> Visibility { if self.eat_keyword(keywords::Pub) { Public } else { Inherited } @@ -4273,8 +4292,8 @@ impl<'a> Parser<'a> { } } - // given a termination token and a vector of already-parsed - // attributes (of length 0 or 1), parse all of the items in a module + /// Given a termination token and a vector of already-parsed + /// attributes (of length 0 or 1), parse all of the items in a module fn parse_mod_items(&mut self, term: token::Token, first_item_attrs: Vec, @@ -4342,7 +4361,7 @@ impl<'a> Parser<'a> { (id, ItemStatic(ty, m, e), None) } - // parse a `mod { ... }` or `mod ;` item + /// Parse a `mod { ... }` or `mod ;` item fn parse_item_mod(&mut self, outer_attrs: &[Attribute]) -> ItemInfo { let id_span = self.span; let id = self.parse_ident(); @@ -4380,7 +4399,7 @@ impl<'a> Parser<'a> { self.mod_path_stack.pop().unwrap(); } - // read a module from a source file. + /// Read a module from a source file. fn eval_src_mod(&mut self, id: ast::Ident, outer_attrs: &[ast::Attribute], @@ -4488,7 +4507,7 @@ impl<'a> Parser<'a> { return (ast::ItemMod(m0), mod_attrs); } - // parse a function declaration from a foreign module + /// Parse a function declaration from a foreign module fn parse_item_foreign_fn(&mut self, vis: ast::Visibility, attrs: Vec) -> Gc { let lo = self.span.lo; @@ -4506,7 +4525,7 @@ impl<'a> Parser<'a> { vis: vis } } - // parse a static item from a foreign module + /// Parse a static item from a foreign module fn parse_item_foreign_static(&mut self, vis: ast::Visibility, attrs: Vec ) -> Gc { let lo = self.span.lo; @@ -4529,7 +4548,7 @@ impl<'a> Parser<'a> { } } - // parse safe/unsafe and fn + /// Parse safe/unsafe and fn fn parse_fn_style(&mut self) -> FnStyle { if self.eat_keyword(keywords::Fn) { NormalFn } else if self.eat_keyword(keywords::Unsafe) { @@ -4540,8 +4559,8 @@ impl<'a> Parser<'a> { } - // at this point, this is essentially a wrapper for - // parse_foreign_items. + /// At this point, this is essentially a wrapper for + /// parse_foreign_items. fn parse_foreign_mod_items(&mut self, abi: abi::Abi, first_item_attrs: Vec ) @@ -4642,7 +4661,7 @@ impl<'a> Parser<'a> { return IoviItem(item); } - // parse type Foo = Bar; + /// Parse type Foo = Bar; fn parse_item_type(&mut self) -> ItemInfo { let ident = self.parse_ident(); let tps = self.parse_generics(); @@ -4652,8 +4671,8 @@ impl<'a> Parser<'a> { (ident, ItemTy(ty, tps), None) } - // parse a structure-like enum variant definition - // this should probably be renamed or refactored... + /// Parse a structure-like enum variant definition + /// this should probably be renamed or refactored... fn parse_struct_def(&mut self) -> Gc { let mut fields: Vec = Vec::new(); while self.token != token::RBRACE { @@ -4669,7 +4688,7 @@ impl<'a> Parser<'a> { }; } - // parse the part of an "enum" decl following the '{' + /// Parse the part of an "enum" decl following the '{' fn parse_enum_def(&mut self, _generics: &ast::Generics) -> EnumDef { let mut variants = Vec::new(); let mut all_nullary = true; @@ -4733,7 +4752,7 @@ impl<'a> Parser<'a> { ast::EnumDef { variants: variants } } - // parse an "enum" declaration + /// Parse an "enum" declaration fn parse_item_enum(&mut self) -> ItemInfo { let id = self.parse_ident(); let generics = self.parse_generics(); @@ -4750,14 +4769,13 @@ impl<'a> Parser<'a> { } } - // Parses a string as an ABI spec on an extern type or module. Consumes - // the `extern` keyword, if one is found. + /// Parses a string as an ABI spec on an extern type or module. Consumes + /// the `extern` keyword, if one is found. fn parse_opt_abi(&mut self) -> Option { match self.token { token::LIT_STR(s) | token::LIT_STR_RAW(s, _) => { self.bump(); - let identifier_string = token::get_ident(s); - let the_string = identifier_string.get(); + let the_string = s.as_str(); match abi::lookup(the_string) { Some(abi) => Some(abi), None => { @@ -4777,10 +4795,10 @@ impl<'a> Parser<'a> { } } - // parse one of the items or view items allowed by the - // flags; on failure, return IoviNone. - // NB: this function no longer parses the items inside an - // extern crate. + /// Parse one of the items or view items allowed by the + /// flags; on failure, return IoviNone. + /// NB: this function no longer parses the items inside an + /// extern crate. fn parse_item_or_view_item(&mut self, attrs: Vec , macros_allowed: bool) @@ -4988,7 +5006,7 @@ impl<'a> Parser<'a> { self.parse_macro_use_or_failure(attrs,macros_allowed,lo,visibility) } - // parse a foreign item; on failure, return IoviNone. + /// Parse a foreign item; on failure, return IoviNone. fn parse_foreign_item(&mut self, attrs: Vec , macros_allowed: bool) @@ -5011,7 +5029,7 @@ impl<'a> Parser<'a> { self.parse_macro_use_or_failure(attrs,macros_allowed,lo,visibility) } - // this is the fall-through for parsing items. + /// This is the fall-through for parsing items. fn parse_macro_use_or_failure( &mut self, attrs: Vec , @@ -5095,17 +5113,17 @@ impl<'a> Parser<'a> { } } - // parse, e.g., "use a::b::{z,y}" + /// Parse, e.g., "use a::b::{z,y}" fn parse_use(&mut self) -> ViewItem_ { return ViewItemUse(self.parse_view_path()); } - // matches view_path : MOD? IDENT EQ non_global_path - // | MOD? non_global_path MOD_SEP LBRACE RBRACE - // | MOD? non_global_path MOD_SEP LBRACE ident_seq RBRACE - // | MOD? non_global_path MOD_SEP STAR - // | MOD? non_global_path + /// Matches view_path : MOD? IDENT EQ non_global_path + /// | MOD? non_global_path MOD_SEP LBRACE RBRACE + /// | MOD? non_global_path MOD_SEP LBRACE ident_seq RBRACE + /// | MOD? non_global_path MOD_SEP STAR + /// | MOD? non_global_path fn parse_view_path(&mut self) -> Gc { let lo = self.span.lo; @@ -5228,10 +5246,10 @@ impl<'a> Parser<'a> { ViewPathSimple(last, path, ast::DUMMY_NODE_ID)); } - // Parses a sequence of items. Stops when it finds program - // text that can't be parsed as an item - // - mod_items uses extern_mod_allowed = true - // - block_tail_ uses extern_mod_allowed = false + /// Parses a sequence of items. Stops when it finds program + /// text that can't be parsed as an item + /// - mod_items uses extern_mod_allowed = true + /// - block_tail_ uses extern_mod_allowed = false fn parse_items_and_view_items(&mut self, first_item_attrs: Vec , mut extern_mod_allowed: bool, @@ -5313,8 +5331,8 @@ impl<'a> Parser<'a> { } } - // Parses a sequence of foreign items. Stops when it finds program - // text that can't be parsed as an item + /// Parses a sequence of foreign items. Stops when it finds program + /// text that can't be parsed as an item fn parse_foreign_items(&mut self, first_item_attrs: Vec , macros_allowed: bool) -> ParsedItemsAndViewItems { @@ -5353,8 +5371,8 @@ impl<'a> Parser<'a> { } } - // Parses a source module as a crate. This is the main - // entry point for the parser. + /// Parses a source module as a crate. This is the main + /// entry point for the parser. pub fn parse_crate_mod(&mut self) -> Crate { let lo = self.span.lo; // parse the crate's inner attrs, maybe (oops) one @@ -5375,9 +5393,9 @@ impl<'a> Parser<'a> { pub fn parse_optional_str(&mut self) -> Option<(InternedString, ast::StrStyle)> { let (s, style) = match self.token { - token::LIT_STR(s) => (self.id_to_interned_str(s), ast::CookedStr), + token::LIT_STR(s) => (self.id_to_interned_str(s.ident()), ast::CookedStr), token::LIT_STR_RAW(s, n) => { - (self.id_to_interned_str(s), ast::RawStr(n)) + (self.id_to_interned_str(s.ident()), ast::RawStr(n)) } _ => return None }; @@ -5392,3 +5410,4 @@ impl<'a> Parser<'a> { } } } + diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 55db3482a61a7..5839df6702245 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -10,7 +10,6 @@ use ast; use ast::{P, Ident, Name, Mrk}; -use ast_util; use ext::mtwt; use parse::token; use util::interner::{RcStr, StrInterner}; @@ -79,30 +78,37 @@ pub enum Token { QUESTION, /* Literals */ - LIT_BYTE(u8), - LIT_CHAR(char), - LIT_INT(i64, ast::IntTy), - LIT_UINT(u64, ast::UintTy), - LIT_INT_UNSUFFIXED(i64), - LIT_FLOAT(ast::Ident, ast::FloatTy), - LIT_FLOAT_UNSUFFIXED(ast::Ident), - LIT_STR(ast::Ident), - LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */ - LIT_BINARY(Rc>), - LIT_BINARY_RAW(Rc>, uint), /* raw binary str delimited by n hash symbols */ + LIT_BYTE(Name), + LIT_CHAR(Name), + LIT_INTEGER(Name), + LIT_FLOAT(Name), + LIT_STR(Name), + LIT_STR_RAW(Name, uint), /* raw str delimited by n hash symbols */ + LIT_BINARY(Name), + LIT_BINARY_RAW(Name, uint), /* raw binary str delimited by n hash symbols */ /* Name components */ - // an identifier contains an "is_mod_name" boolean, - // indicating whether :: follows this token with no - // whitespace in between. - IDENT(ast::Ident, bool), + /// An identifier contains an "is_mod_name" boolean, + /// indicating whether :: follows this token with no + /// whitespace in between. + IDENT(Ident, bool), UNDERSCORE, - LIFETIME(ast::Ident), + LIFETIME(Ident), /* For interpolation */ INTERPOLATED(Nonterminal), + DOC_COMMENT(Name), + + // Junk. These carry no data because we don't really care about the data + // they *would* carry, and don't really want to allocate a new ident for + // them. Instead, users could extract that from the associated span. + + /// Whitespace + WS, + /// Comment + COMMENT, + SHEBANG(Name), - DOC_COMMENT(ast::Ident), EOF, } @@ -115,11 +121,12 @@ pub enum Nonterminal { NtPat( Gc), NtExpr(Gc), NtTy( P), - // see IDENT, above, for meaning of bool in NtIdent: - NtIdent(Box, bool), - NtMeta(Gc), // stuff inside brackets for attributes + /// See IDENT, above, for meaning of bool in NtIdent: + NtIdent(Box, bool), + /// Stuff inside brackets for attributes + NtMeta(Gc), NtPath(Box), - NtTT( Gc), // needs @ed to break a circularity + NtTT( Gc), // needs Gc'd to break a circularity NtMatchers(Vec ) } @@ -200,54 +207,28 @@ pub fn to_string(t: &Token) -> String { /* Literals */ LIT_BYTE(b) => { - let mut res = String::from_str("b'"); - (b as char).escape_default(|c| { - res.push_char(c); - }); - res.push_char('\''); - res + format!("b'{}'", b.as_str()) } LIT_CHAR(c) => { - let mut res = String::from_str("'"); - c.escape_default(|c| { - res.push_char(c); - }); - res.push_char('\''); - res - } - LIT_INT(i, t) => ast_util::int_ty_to_string(t, Some(i)), - LIT_UINT(u, t) => ast_util::uint_ty_to_string(t, Some(u)), - LIT_INT_UNSUFFIXED(i) => { (i as u64).to_string() } - LIT_FLOAT(s, t) => { - let mut body = String::from_str(get_ident(s).get()); - if body.as_slice().ends_with(".") { - body.push_char('0'); // `10.f` is not a float literal - } - body.push_str(ast_util::float_ty_to_string(t).as_slice()); - body + format!("'{}'", c.as_str()) } - LIT_FLOAT_UNSUFFIXED(s) => { - let mut body = String::from_str(get_ident(s).get()); - if body.as_slice().ends_with(".") { - body.push_char('0'); // `10.f` is not a float literal - } - body + LIT_INTEGER(c) | LIT_FLOAT(c) => { + c.as_str().to_string() } + LIT_STR(s) => { - format!("\"{}\"", get_ident(s).get().escape_default()) + format!("\"{}\"", s.as_str()) } LIT_STR_RAW(s, n) => { format!("r{delim}\"{string}\"{delim}", - delim="#".repeat(n), string=get_ident(s)) + delim="#".repeat(n), string=s.as_str()) } - LIT_BINARY(ref v) => { - format!( - "b\"{}\"", - v.iter().map(|&b| b as char).collect::().escape_default()) + LIT_BINARY(v) => { + format!("b\"{}\"", v.as_str()) } - LIT_BINARY_RAW(ref s, n) => { + LIT_BINARY_RAW(s, n) => { format!("br{delim}\"{string}\"{delim}", - delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii()) + delim="#".repeat(n), string=s.as_str()) } /* Name components */ @@ -258,8 +239,12 @@ pub fn to_string(t: &Token) -> String { UNDERSCORE => "_".to_string(), /* Other */ - DOC_COMMENT(s) => get_ident(s).get().to_string(), + DOC_COMMENT(s) => s.as_str().to_string(), EOF => "".to_string(), + WS => " ".to_string(), + COMMENT => "/* */".to_string(), + SHEBANG(s) => format!("/* shebang: {}*/", s.as_str()), + INTERPOLATED(ref nt) => { match nt { &NtExpr(ref e) => ::print::pprust::expr_to_string(&**e), @@ -296,11 +281,8 @@ pub fn can_begin_expr(t: &Token) -> bool { TILDE => true, LIT_BYTE(_) => true, LIT_CHAR(_) => true, - LIT_INT(_, _) => true, - LIT_UINT(_, _) => true, - LIT_INT_UNSUFFIXED(_) => true, - LIT_FLOAT(_, _) => true, - LIT_FLOAT_UNSUFFIXED(_) => true, + LIT_INTEGER(_) => true, + LIT_FLOAT(_) => true, LIT_STR(_) => true, LIT_STR_RAW(_, _) => true, LIT_BINARY(_) => true, @@ -337,11 +319,8 @@ pub fn is_lit(t: &Token) -> bool { match *t { LIT_BYTE(_) => true, LIT_CHAR(_) => true, - LIT_INT(_, _) => true, - LIT_UINT(_, _) => true, - LIT_INT_UNSUFFIXED(_) => true, - LIT_FLOAT(_, _) => true, - LIT_FLOAT_UNSUFFIXED(_) => true, + LIT_INTEGER(_) => true, + LIT_FLOAT(_) => true, LIT_STR(_) => true, LIT_STR_RAW(_, _) => true, LIT_BINARY(_) => true, @@ -395,19 +374,19 @@ macro_rules! declare_special_idents_and_keywords {( $( ($rk_name:expr, $rk_variant:ident, $rk_str:expr); )* } ) => { - static STRICT_KEYWORD_START: Name = first!($( $sk_name, )*); - static STRICT_KEYWORD_FINAL: Name = last!($( $sk_name, )*); - static RESERVED_KEYWORD_START: Name = first!($( $rk_name, )*); - static RESERVED_KEYWORD_FINAL: Name = last!($( $rk_name, )*); + static STRICT_KEYWORD_START: Name = first!($( Name($sk_name), )*); + static STRICT_KEYWORD_FINAL: Name = last!($( Name($sk_name), )*); + static RESERVED_KEYWORD_START: Name = first!($( Name($rk_name), )*); + static RESERVED_KEYWORD_FINAL: Name = last!($( Name($rk_name), )*); pub mod special_idents { - use ast::Ident; - $( pub static $si_static: Ident = Ident { name: $si_name, ctxt: 0 }; )* + use ast::{Ident, Name}; + $( pub static $si_static: Ident = Ident { name: Name($si_name), ctxt: 0 }; )* } pub mod special_names { use ast::Name; - $( pub static $si_static: Name = $si_name; )* + $( pub static $si_static: Name = Name($si_name); )* } /** @@ -428,8 +407,8 @@ macro_rules! declare_special_idents_and_keywords {( impl Keyword { pub fn to_name(&self) -> Name { match *self { - $( $sk_variant => $sk_name, )* - $( $rk_variant => $rk_name, )* + $( $sk_variant => Name($sk_name), )* + $( $rk_variant => Name($rk_name), )* } } } @@ -448,8 +427,11 @@ macro_rules! declare_special_idents_and_keywords {( }} // If the special idents get renumbered, remember to modify these two as appropriate -pub static SELF_KEYWORD_NAME: Name = 1; -static STATIC_KEYWORD_NAME: Name = 2; +pub static SELF_KEYWORD_NAME: Name = Name(SELF_KEYWORD_NAME_NUM); +static STATIC_KEYWORD_NAME: Name = Name(STATIC_KEYWORD_NAME_NUM); + +pub static SELF_KEYWORD_NAME_NUM: u32 = 1; +static STATIC_KEYWORD_NAME_NUM: u32 = 2; // NB: leaving holes in the ident table is bad! a different ident will get // interned with the id from the hole, but it will be between the min and max @@ -459,8 +441,8 @@ declare_special_idents_and_keywords! { pub mod special_idents { // These ones are statics (0, invalid, ""); - (super::SELF_KEYWORD_NAME, self_, "self"); - (super::STATIC_KEYWORD_NAME, statik, "static"); + (super::SELF_KEYWORD_NAME_NUM, self_, "self"); + (super::STATIC_KEYWORD_NAME_NUM, statik, "static"); (3, static_lifetime, "'static"); // for matcher NTs @@ -500,8 +482,8 @@ declare_special_idents_and_keywords! { (29, Ref, "ref"); (30, Return, "return"); // Static and Self are also special idents (prefill de-dupes) - (super::STATIC_KEYWORD_NAME, Static, "static"); - (super::SELF_KEYWORD_NAME, Self, "self"); + (super::STATIC_KEYWORD_NAME_NUM, Static, "static"); + (super::SELF_KEYWORD_NAME_NUM, Self, "self"); (31, Struct, "struct"); (32, Super, "super"); (33, True, "true"); @@ -683,20 +665,20 @@ pub fn gensym(s: &str) -> Name { /// Maps a string to an identifier with an empty syntax context. #[inline] -pub fn str_to_ident(s: &str) -> ast::Ident { - ast::Ident::new(intern(s)) +pub fn str_to_ident(s: &str) -> Ident { + Ident::new(intern(s)) } /// Maps a string to a gensym'ed identifier. #[inline] -pub fn gensym_ident(s: &str) -> ast::Ident { - ast::Ident::new(gensym(s)) +pub fn gensym_ident(s: &str) -> Ident { + Ident::new(gensym(s)) } // create a fresh name that maps to the same string as the old one. // note that this guarantees that str_ptr_eq(ident_to_string(src),interner_get(fresh_name(src))); // that is, that the new name and the old one are connected to ptr_eq strings. -pub fn fresh_name(src: &ast::Ident) -> Name { +pub fn fresh_name(src: &Ident) -> Name { let interner = get_ident_interner(); interner.gensym_copy(src.name) // following: debug version. Could work in final except that it's incompatible with @@ -708,7 +690,7 @@ pub fn fresh_name(src: &ast::Ident) -> Name { // create a fresh mark. pub fn fresh_mark() -> Mrk { - gensym("mark") + gensym("mark").uint() as u32 } // See the macro above about the types of keywords @@ -722,10 +704,13 @@ pub fn is_keyword(kw: keywords::Keyword, tok: &Token) -> bool { pub fn is_any_keyword(tok: &Token) -> bool { match *tok { - token::IDENT(sid, false) => match sid.name { - SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME | - STRICT_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true, - _ => false, + token::IDENT(sid, false) => { + let n = sid.name; + + n == SELF_KEYWORD_NAME + || n == STATIC_KEYWORD_NAME + || STRICT_KEYWORD_START <= n + && n <= RESERVED_KEYWORD_FINAL }, _ => false } @@ -733,10 +718,13 @@ pub fn is_any_keyword(tok: &Token) -> bool { pub fn is_strict_keyword(tok: &Token) -> bool { match *tok { - token::IDENT(sid, false) => match sid.name { - SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME | - STRICT_KEYWORD_START .. STRICT_KEYWORD_FINAL => true, - _ => false, + token::IDENT(sid, false) => { + let n = sid.name; + + n == SELF_KEYWORD_NAME + || n == STATIC_KEYWORD_NAME + || STRICT_KEYWORD_START <= n + && n <= STRICT_KEYWORD_FINAL }, _ => false, } @@ -744,9 +732,11 @@ pub fn is_strict_keyword(tok: &Token) -> bool { pub fn is_reserved_keyword(tok: &Token) -> bool { match *tok { - token::IDENT(sid, false) => match sid.name { - RESERVED_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true, - _ => false, + token::IDENT(sid, false) => { + let n = sid.name; + + RESERVED_KEYWORD_START <= n + && n <= RESERVED_KEYWORD_FINAL }, _ => false, } @@ -768,7 +758,7 @@ mod test { use ext::mtwt; fn mark_ident(id : ast::Ident, m : ast::Mrk) -> ast::Ident { - ast::Ident{name:id.name,ctxt:mtwt::apply_mark(m,id.ctxt)} + ast::Ident { name: id.name, ctxt:mtwt::apply_mark(m, id.ctxt) } } #[test] fn mtwt_token_eq_test() { diff --git a/src/libsyntax/print/pp.rs b/src/libsyntax/print/pp.rs index 24ab4b38e54b8..fe84eeff4f87f 100644 --- a/src/libsyntax/print/pp.rs +++ b/src/libsyntax/print/pp.rs @@ -8,58 +8,56 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/* - * This pretty-printer is a direct reimplementation of Philip Karlton's - * Mesa pretty-printer, as described in appendix A of - * - * STAN-CS-79-770: "Pretty Printing", by Derek C. Oppen. - * Stanford Department of Computer Science, 1979. - * - * The algorithm's aim is to break a stream into as few lines as possible - * while respecting the indentation-consistency requirements of the enclosing - * block, and avoiding breaking at silly places on block boundaries, for - * example, between "x" and ")" in "x)". - * - * I am implementing this algorithm because it comes with 20 pages of - * documentation explaining its theory, and because it addresses the set of - * concerns I've seen other pretty-printers fall down on. Weirdly. Even though - * it's 32 years old. What can I say? - * - * Despite some redundancies and quirks in the way it's implemented in that - * paper, I've opted to keep the implementation here as similar as I can, - * changing only what was blatantly wrong, a typo, or sufficiently - * non-idiomatic rust that it really stuck out. - * - * In particular you'll see a certain amount of churn related to INTEGER vs. - * CARDINAL in the Mesa implementation. Mesa apparently interconverts the two - * somewhat readily? In any case, I've used uint for indices-in-buffers and - * ints for character-sizes-and-indentation-offsets. This respects the need - * for ints to "go negative" while carrying a pending-calculation balance, and - * helps differentiate all the numbers flying around internally (slightly). - * - * I also inverted the indentation arithmetic used in the print stack, since - * the Mesa implementation (somewhat randomly) stores the offset on the print - * stack in terms of margin-col rather than col itself. I store col. - * - * I also implemented a small change in the String token, in that I store an - * explicit length for the string. For most tokens this is just the length of - * the accompanying string. But it's necessary to permit it to differ, for - * encoding things that are supposed to "go on their own line" -- certain - * classes of comment and blank-line -- where relying on adjacent - * hardbreak-like Break tokens with long blankness indication doesn't actually - * work. To see why, consider when there is a "thing that should be on its own - * line" between two long blocks, say functions. If you put a hardbreak after - * each function (or before each) and the breaking algorithm decides to break - * there anyways (because the functions themselves are long) you wind up with - * extra blank lines. If you don't put hardbreaks you can wind up with the - * "thing which should be on its own line" not getting its own line in the - * rare case of "really small functions" or such. This re-occurs with comments - * and explicit blank lines. So in those cases we use a string with a payload - * we want isolated to a line and an explicit length that's huge, surrounded - * by two zero-length breaks. The algorithm will try its best to fit it on a - * line (which it can't) and so naturally place the content on its own line to - * avoid combining it with other lines and making matters even worse. - */ +//! This pretty-printer is a direct reimplementation of Philip Karlton's +//! Mesa pretty-printer, as described in appendix A of +//! +//! STAN-CS-79-770: "Pretty Printing", by Derek C. Oppen. +//! Stanford Department of Computer Science, 1979. +//! +//! The algorithm's aim is to break a stream into as few lines as possible +//! while respecting the indentation-consistency requirements of the enclosing +//! block, and avoiding breaking at silly places on block boundaries, for +//! example, between "x" and ")" in "x)". +//! +//! I am implementing this algorithm because it comes with 20 pages of +//! documentation explaining its theory, and because it addresses the set of +//! concerns I've seen other pretty-printers fall down on. Weirdly. Even though +//! it's 32 years old. What can I say? +//! +//! Despite some redundancies and quirks in the way it's implemented in that +//! paper, I've opted to keep the implementation here as similar as I can, +//! changing only what was blatantly wrong, a typo, or sufficiently +//! non-idiomatic rust that it really stuck out. +//! +//! In particular you'll see a certain amount of churn related to INTEGER vs. +//! CARDINAL in the Mesa implementation. Mesa apparently interconverts the two +//! somewhat readily? In any case, I've used uint for indices-in-buffers and +//! ints for character-sizes-and-indentation-offsets. This respects the need +//! for ints to "go negative" while carrying a pending-calculation balance, and +//! helps differentiate all the numbers flying around internally (slightly). +//! +//! I also inverted the indentation arithmetic used in the print stack, since +//! the Mesa implementation (somewhat randomly) stores the offset on the print +//! stack in terms of margin-col rather than col itself. I store col. +//! +//! I also implemented a small change in the String token, in that I store an +//! explicit length for the string. For most tokens this is just the length of +//! the accompanying string. But it's necessary to permit it to differ, for +//! encoding things that are supposed to "go on their own line" -- certain +//! classes of comment and blank-line -- where relying on adjacent +//! hardbreak-like Break tokens with long blankness indication doesn't actually +//! work. To see why, consider when there is a "thing that should be on its own +//! line" between two long blocks, say functions. If you put a hardbreak after +//! each function (or before each) and the breaking algorithm decides to break +//! there anyways (because the functions themselves are long) you wind up with +//! extra blank lines. If you don't put hardbreaks you can wind up with the +//! "thing which should be on its own line" not getting its own line in the +//! rare case of "really small functions" or such. This re-occurs with comments +//! and explicit blank lines. So in those cases we use a string with a payload +//! we want isolated to a line and an explicit length that's huge, surrounded +//! by two zero-length breaks. The algorithm will try its best to fit it on a +//! line (which it can't) and so naturally place the content on its own line to +//! avoid combining it with other lines and making matters even worse. use std::io; use std::string::String; @@ -186,107 +184,116 @@ pub fn mk_printer(out: Box, linewidth: uint) -> Printer { } -/* - * In case you do not have the paper, here is an explanation of what's going - * on. - * - * There is a stream of input tokens flowing through this printer. - * - * The printer buffers up to 3N tokens inside itself, where N is linewidth. - * Yes, linewidth is chars and tokens are multi-char, but in the worst - * case every token worth buffering is 1 char long, so it's ok. - * - * Tokens are String, Break, and Begin/End to delimit blocks. - * - * Begin tokens can carry an offset, saying "how far to indent when you break - * inside here", as well as a flag indicating "consistent" or "inconsistent" - * breaking. Consistent breaking means that after the first break, no attempt - * will be made to flow subsequent breaks together onto lines. Inconsistent - * is the opposite. Inconsistent breaking example would be, say: - * - * foo(hello, there, good, friends) - * - * breaking inconsistently to become - * - * foo(hello, there - * good, friends); - * - * whereas a consistent breaking would yield: - * - * foo(hello, - * there - * good, - * friends); - * - * That is, in the consistent-break blocks we value vertical alignment - * more than the ability to cram stuff onto a line. But in all cases if it - * can make a block a one-liner, it'll do so. - * - * Carrying on with high-level logic: - * - * The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and - * 'right' indices denote the active portion of the ring buffer as well as - * describing hypothetical points-in-the-infinite-stream at most 3N tokens - * apart (i.e. "not wrapped to ring-buffer boundaries"). The paper will switch - * between using 'left' and 'right' terms to denote the wrapepd-to-ring-buffer - * and point-in-infinite-stream senses freely. - * - * There is a parallel ring buffer, 'size', that holds the calculated size of - * each token. Why calculated? Because for Begin/End pairs, the "size" - * includes everything between the pair. That is, the "size" of Begin is - * actually the sum of the sizes of everything between Begin and the paired - * End that follows. Since that is arbitrarily far in the future, 'size' is - * being rewritten regularly while the printer runs; in fact most of the - * machinery is here to work out 'size' entries on the fly (and give up when - * they're so obviously over-long that "infinity" is a good enough - * approximation for purposes of line breaking). - * - * The "input side" of the printer is managed as an abstract process called - * SCAN, which uses 'scan_stack', 'scan_stack_empty', 'top' and 'bottom', to - * manage calculating 'size'. SCAN is, in other words, the process of - * calculating 'size' entries. - * - * The "output side" of the printer is managed by an abstract process called - * PRINT, which uses 'print_stack', 'margin' and 'space' to figure out what to - * do with each token/size pair it consumes as it goes. It's trying to consume - * the entire buffered window, but can't output anything until the size is >= - * 0 (sizes are set to negative while they're pending calculation). - * - * So SCAN takes input and buffers tokens and pending calculations, while - * PRINT gobbles up completed calculations and tokens from the buffer. The - * theory is that the two can never get more than 3N tokens apart, because - * once there's "obviously" too much data to fit on a line, in a size - * calculation, SCAN will write "infinity" to the size and let PRINT consume - * it. - * - * In this implementation (following the paper, again) the SCAN process is - * the method called 'pretty_print', and the 'PRINT' process is the method - * called 'print'. - */ +/// In case you do not have the paper, here is an explanation of what's going +/// on. +/// +/// There is a stream of input tokens flowing through this printer. +/// +/// The printer buffers up to 3N tokens inside itself, where N is linewidth. +/// Yes, linewidth is chars and tokens are multi-char, but in the worst +/// case every token worth buffering is 1 char long, so it's ok. +/// +/// Tokens are String, Break, and Begin/End to delimit blocks. +/// +/// Begin tokens can carry an offset, saying "how far to indent when you break +/// inside here", as well as a flag indicating "consistent" or "inconsistent" +/// breaking. Consistent breaking means that after the first break, no attempt +/// will be made to flow subsequent breaks together onto lines. Inconsistent +/// is the opposite. Inconsistent breaking example would be, say: +/// +/// foo(hello, there, good, friends) +/// +/// breaking inconsistently to become +/// +/// foo(hello, there +/// good, friends); +/// +/// whereas a consistent breaking would yield: +/// +/// foo(hello, +/// there +/// good, +/// friends); +/// +/// That is, in the consistent-break blocks we value vertical alignment +/// more than the ability to cram stuff onto a line. But in all cases if it +/// can make a block a one-liner, it'll do so. +/// +/// Carrying on with high-level logic: +/// +/// The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and +/// 'right' indices denote the active portion of the ring buffer as well as +/// describing hypothetical points-in-the-infinite-stream at most 3N tokens +/// apart (i.e. "not wrapped to ring-buffer boundaries"). The paper will switch +/// between using 'left' and 'right' terms to denote the wrapepd-to-ring-buffer +/// and point-in-infinite-stream senses freely. +/// +/// There is a parallel ring buffer, 'size', that holds the calculated size of +/// each token. Why calculated? Because for Begin/End pairs, the "size" +/// includes everything betwen the pair. That is, the "size" of Begin is +/// actually the sum of the sizes of everything between Begin and the paired +/// End that follows. Since that is arbitrarily far in the future, 'size' is +/// being rewritten regularly while the printer runs; in fact most of the +/// machinery is here to work out 'size' entries on the fly (and give up when +/// they're so obviously over-long that "infinity" is a good enough +/// approximation for purposes of line breaking). +/// +/// The "input side" of the printer is managed as an abstract process called +/// SCAN, which uses 'scan_stack', 'scan_stack_empty', 'top' and 'bottom', to +/// manage calculating 'size'. SCAN is, in other words, the process of +/// calculating 'size' entries. +/// +/// The "output side" of the printer is managed by an abstract process called +/// PRINT, which uses 'print_stack', 'margin' and 'space' to figure out what to +/// do with each token/size pair it consumes as it goes. It's trying to consume +/// the entire buffered window, but can't output anything until the size is >= +/// 0 (sizes are set to negative while they're pending calculation). +/// +/// So SCAN takes input and buffers tokens and pending calculations, while +/// PRINT gobbles up completed calculations and tokens from the buffer. The +/// theory is that the two can never get more than 3N tokens apart, because +/// once there's "obviously" too much data to fit on a line, in a size +/// calculation, SCAN will write "infinity" to the size and let PRINT consume +/// it. +/// +/// In this implementation (following the paper, again) the SCAN process is +/// the method called 'pretty_print', and the 'PRINT' process is the method +/// called 'print'. pub struct Printer { pub out: Box, buf_len: uint, - margin: int, // width of lines we're constrained to - space: int, // number of spaces left on line - left: uint, // index of left side of input stream - right: uint, // index of right side of input stream - token: Vec , // ring-buffr stream goes through - size: Vec , // ring-buffer of calculated sizes - left_total: int, // running size of stream "...left" - right_total: int, // running size of stream "...right" - // pseudo-stack, really a ring too. Holds the - // primary-ring-buffers index of the Begin that started the - // current block, possibly with the most recent Break after that - // Begin (if there is any) on top of it. Stuff is flushed off the - // bottom as it becomes irrelevant due to the primary ring-buffer - // advancing. + /// Width of lines we're constrained to + margin: int, + /// Number of spaces left on line + space: int, + /// Index of left side of input stream + left: uint, + /// Index of right side of input stream + right: uint, + /// Ring-buffr stream goes through + token: Vec , + /// Ring-buffer of calculated sizes + size: Vec , + /// Running size of stream "...left" + left_total: int, + /// Running size of stream "...right" + right_total: int, + /// Pseudo-stack, really a ring too. Holds the + /// primary-ring-buffers index of the Begin that started the + /// current block, possibly with the most recent Break after that + /// Begin (if there is any) on top of it. Stuff is flushed off the + /// bottom as it becomes irrelevant due to the primary ring-buffer + /// advancing. scan_stack: Vec , - scan_stack_empty: bool, // top==bottom disambiguator - top: uint, // index of top of scan_stack - bottom: uint, // index of bottom of scan_stack - // stack of blocks-in-progress being flushed by print + /// Top==bottom disambiguator + scan_stack_empty: bool, + /// Index of top of scan_stack + top: uint, + /// Index of bottom of scan_stack + bottom: uint, + /// Stack of blocks-in-progress being flushed by print print_stack: Vec , - // buffered indentation to avoid writing trailing whitespace + /// Buffered indentation to avoid writing trailing whitespace pending_indentation: int, } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index a5d70a9333dde..170cb7a249c4b 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -88,9 +88,9 @@ pub static indent_unit: uint = 4u; pub static default_columns: uint = 78u; -// Requires you to pass an input filename and reader so that -// it can scan the input text for comments and literals to -// copy forward. +/// Requires you to pass an input filename and reader so that +/// it can scan the input text for comments and literals to +/// copy forward. pub fn print_crate<'a>(cm: &'a CodeMap, span_diagnostic: &diagnostic::SpanHandler, krate: &ast::Crate, diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs index 4d88aaca7486b..452b5a5251222 100644 --- a/src/libsyntax/util/interner.rs +++ b/src/libsyntax/util/interner.rs @@ -8,9 +8,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// An "interner" is a data structure that associates values with uint tags and -// allows bidirectional lookup; i.e. given a value, one can easily find the -// type, and vice versa. +//! An "interner" is a data structure that associates values with uint tags and +//! allows bidirectional lookup; i.e. given a value, one can easily find the +//! type, and vice versa. use ast::Name; @@ -52,7 +52,7 @@ impl Interner { } let mut vect = self.vect.borrow_mut(); - let new_idx = (*vect).len() as Name; + let new_idx = Name((*vect).len() as u32); (*map).insert(val.clone(), new_idx); (*vect).push(val); new_idx @@ -60,7 +60,7 @@ impl Interner { pub fn gensym(&self, val: T) -> Name { let mut vect = self.vect.borrow_mut(); - let new_idx = (*vect).len() as Name; + let new_idx = Name((*vect).len() as u32); // leave out of .map to avoid colliding (*vect).push(val); new_idx @@ -68,7 +68,7 @@ impl Interner { pub fn get(&self, idx: Name) -> T { let vect = self.vect.borrow(); - (*(*vect).get(idx as uint)).clone() + (*(*vect).get(idx.uint())).clone() } pub fn len(&self) -> uint { @@ -155,7 +155,7 @@ impl StrInterner { None => (), } - let new_idx = self.len() as Name; + let new_idx = Name(self.len() as u32); let val = RcStr::new(val); map.insert(val.clone(), new_idx); self.vect.borrow_mut().push(val); @@ -163,7 +163,7 @@ impl StrInterner { } pub fn gensym(&self, val: &str) -> Name { - let new_idx = self.len() as Name; + let new_idx = Name(self.len() as u32); // leave out of .map to avoid colliding self.vect.borrow_mut().push(RcStr::new(val)); new_idx @@ -180,23 +180,23 @@ impl StrInterner { /// Create a gensym with the same name as an existing /// entry. pub fn gensym_copy(&self, idx : Name) -> Name { - let new_idx = self.len() as Name; + let new_idx = Name(self.len() as u32); // leave out of map to avoid colliding let mut vect = self.vect.borrow_mut(); - let existing = (*vect.get(idx as uint)).clone(); + let existing = (*vect.get(idx.uint())).clone(); vect.push(existing); new_idx } pub fn get(&self, idx: Name) -> RcStr { - (*self.vect.borrow().get(idx as uint)).clone() + (*self.vect.borrow().get(idx.uint())).clone() } /// Returns this string with lifetime tied to the interner. Since /// strings may never be removed from the interner, this is safe. pub fn get_ref<'a>(&'a self, idx: Name) -> &'a str { let vect = self.vect.borrow(); - let s: &str = vect.get(idx as uint).as_slice(); + let s: &str = vect.get(idx.uint()).as_slice(); unsafe { mem::transmute(s) } @@ -222,36 +222,38 @@ impl StrInterner { #[cfg(test)] mod tests { use super::*; + use ast::Name; + #[test] #[should_fail] fn i1 () { let i : Interner = Interner::new(); - i.get(13); + i.get(Name(13)); } #[test] fn interner_tests () { let i : Interner = Interner::new(); // first one is zero: - assert_eq!(i.intern(RcStr::new("dog")), 0); + assert_eq!(i.intern(RcStr::new("dog")), Name(0)); // re-use gets the same entry: - assert_eq!(i.intern(RcStr::new("dog")), 0); + assert_eq!(i.intern(RcStr::new("dog")), Name(0)); // different string gets a different #: - assert_eq!(i.intern(RcStr::new("cat")), 1); - assert_eq!(i.intern(RcStr::new("cat")), 1); + assert_eq!(i.intern(RcStr::new("cat")), Name(1)); + assert_eq!(i.intern(RcStr::new("cat")), Name(1)); // dog is still at zero - assert_eq!(i.intern(RcStr::new("dog")), 0); + assert_eq!(i.intern(RcStr::new("dog")), Name(0)); // gensym gets 3 - assert_eq!(i.gensym(RcStr::new("zebra") ), 2); + assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2)); // gensym of same string gets new number : - assert_eq!(i.gensym (RcStr::new("zebra") ), 3); + assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3)); // gensym of *existing* string gets new number: - assert_eq!(i.gensym(RcStr::new("dog")), 4); - assert_eq!(i.get(0), RcStr::new("dog")); - assert_eq!(i.get(1), RcStr::new("cat")); - assert_eq!(i.get(2), RcStr::new("zebra")); - assert_eq!(i.get(3), RcStr::new("zebra")); - assert_eq!(i.get(4), RcStr::new("dog")); + assert_eq!(i.gensym(RcStr::new("dog")), Name(4)); + assert_eq!(i.get(Name(0)), RcStr::new("dog")); + assert_eq!(i.get(Name(1)), RcStr::new("cat")); + assert_eq!(i.get(Name(2)), RcStr::new("zebra")); + assert_eq!(i.get(Name(3)), RcStr::new("zebra")); + assert_eq!(i.get(Name(4)), RcStr::new("dog")); } #[test] @@ -261,39 +263,39 @@ mod tests { RcStr::new("Bob"), RcStr::new("Carol") ]); - assert_eq!(i.get(0), RcStr::new("Alan")); - assert_eq!(i.get(1), RcStr::new("Bob")); - assert_eq!(i.get(2), RcStr::new("Carol")); - assert_eq!(i.intern(RcStr::new("Bob")), 1); + assert_eq!(i.get(Name(0)), RcStr::new("Alan")); + assert_eq!(i.get(Name(1)), RcStr::new("Bob")); + assert_eq!(i.get(Name(2)), RcStr::new("Carol")); + assert_eq!(i.intern(RcStr::new("Bob")), Name(1)); } #[test] fn string_interner_tests() { let i : StrInterner = StrInterner::new(); // first one is zero: - assert_eq!(i.intern("dog"), 0); + assert_eq!(i.intern("dog"), Name(0)); // re-use gets the same entry: - assert_eq!(i.intern ("dog"), 0); + assert_eq!(i.intern ("dog"), Name(0)); // different string gets a different #: - assert_eq!(i.intern("cat"), 1); - assert_eq!(i.intern("cat"), 1); + assert_eq!(i.intern("cat"), Name(1)); + assert_eq!(i.intern("cat"), Name(1)); // dog is still at zero - assert_eq!(i.intern("dog"), 0); + assert_eq!(i.intern("dog"), Name(0)); // gensym gets 3 - assert_eq!(i.gensym("zebra"), 2); + assert_eq!(i.gensym("zebra"), Name(2)); // gensym of same string gets new number : - assert_eq!(i.gensym("zebra"), 3); + assert_eq!(i.gensym("zebra"), Name(3)); // gensym of *existing* string gets new number: - assert_eq!(i.gensym("dog"), 4); + assert_eq!(i.gensym("dog"), Name(4)); // gensym tests again with gensym_copy: - assert_eq!(i.gensym_copy(2), 5); - assert_eq!(i.get(5), RcStr::new("zebra")); - assert_eq!(i.gensym_copy(2), 6); - assert_eq!(i.get(6), RcStr::new("zebra")); - assert_eq!(i.get(0), RcStr::new("dog")); - assert_eq!(i.get(1), RcStr::new("cat")); - assert_eq!(i.get(2), RcStr::new("zebra")); - assert_eq!(i.get(3), RcStr::new("zebra")); - assert_eq!(i.get(4), RcStr::new("dog")); + assert_eq!(i.gensym_copy(Name(2)), Name(5)); + assert_eq!(i.get(Name(5)), RcStr::new("zebra")); + assert_eq!(i.gensym_copy(Name(2)), Name(6)); + assert_eq!(i.get(Name(6)), RcStr::new("zebra")); + assert_eq!(i.get(Name(0)), RcStr::new("dog")); + assert_eq!(i.get(Name(1)), RcStr::new("cat")); + assert_eq!(i.get(Name(2)), RcStr::new("zebra")); + assert_eq!(i.get(Name(3)), RcStr::new("zebra")); + assert_eq!(i.get(Name(4)), RcStr::new("dog")); } } diff --git a/src/libsyntax/util/parser_testing.rs b/src/libsyntax/util/parser_testing.rs index 04116dec60e31..f50739a7069e0 100644 --- a/src/libsyntax/util/parser_testing.rs +++ b/src/libsyntax/util/parser_testing.rs @@ -17,14 +17,14 @@ use parse::token; use std::gc::Gc; -// map a string to tts, using a made-up filename: +/// Map a string to tts, using a made-up filename: pub fn string_to_tts(source_str: String) -> Vec { let ps = new_parse_sess(); filemap_to_tts(&ps, string_to_filemap(&ps, source_str, "bogofile".to_string())) } -// map string to parser (via tts) +/// Map string to parser (via tts) pub fn string_to_parser<'a>(ps: &'a ParseSess, source_str: String) -> Parser<'a> { new_parser_from_source_str(ps, Vec::new(), @@ -40,51 +40,51 @@ fn with_error_checking_parse(s: String, f: |&mut Parser| -> T) -> T { x } -// parse a string, return a crate. +/// Parse a string, return a crate. pub fn string_to_crate (source_str : String) -> ast::Crate { with_error_checking_parse(source_str, |p| { p.parse_crate_mod() }) } -// parse a string, return an expr +/// Parse a string, return an expr pub fn string_to_expr (source_str : String) -> Gc { with_error_checking_parse(source_str, |p| { p.parse_expr() }) } -// parse a string, return an item +/// Parse a string, return an item pub fn string_to_item (source_str : String) -> Option> { with_error_checking_parse(source_str, |p| { p.parse_item(Vec::new()) }) } -// parse a string, return a stmt +/// Parse a string, return a stmt pub fn string_to_stmt(source_str : String) -> Gc { with_error_checking_parse(source_str, |p| { p.parse_stmt(Vec::new()) }) } -// parse a string, return a pat. Uses "irrefutable"... which doesn't -// (currently) affect parsing. +/// Parse a string, return a pat. Uses "irrefutable"... which doesn't +/// (currently) affect parsing. pub fn string_to_pat(source_str: String) -> Gc { string_to_parser(&new_parse_sess(), source_str).parse_pat() } -// convert a vector of strings to a vector of ast::Ident's +/// Convert a vector of strings to a vector of ast::Ident's pub fn strs_to_idents(ids: Vec<&str> ) -> Vec { ids.iter().map(|u| token::str_to_ident(*u)).collect() } -// does the given string match the pattern? whitespace in the first string -// may be deleted or replaced with other whitespace to match the pattern. -// this function is unicode-ignorant; fortunately, the careful design of -// UTF-8 mitigates this ignorance. In particular, this function only collapses -// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode -// chars. Unsurprisingly, it doesn't do NKF-normalization(?). +/// Does the given string match the pattern? whitespace in the first string +/// may be deleted or replaced with other whitespace to match the pattern. +/// this function is unicode-ignorant; fortunately, the careful design of +/// UTF-8 mitigates this ignorance. In particular, this function only collapses +/// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode +/// chars. Unsurprisingly, it doesn't do NKF-normalization(?). pub fn matches_codepattern(a : &str, b : &str) -> bool { let mut idx_a = 0; let mut idx_b = 0; @@ -122,9 +122,9 @@ pub fn matches_codepattern(a : &str, b : &str) -> bool { } } -// given a string and an index, return the first uint >= idx -// that is a non-ws-char or is outside of the legal range of -// the string. +/// Given a string and an index, return the first uint >= idx +/// that is a non-ws-char or is outside of the legal range of +/// the string. fn scan_for_non_ws_or_end(a : &str, idx: uint) -> uint { let mut i = idx; let len = a.len(); @@ -134,7 +134,7 @@ fn scan_for_non_ws_or_end(a : &str, idx: uint) -> uint { i } -// copied from lexer. +/// Copied from lexer. pub fn is_whitespace(c: char) -> bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } diff --git a/src/libsyntax/visit.rs b/src/libsyntax/visit.rs index df34ff30db67f..9298b58c4267d 100644 --- a/src/libsyntax/visit.rs +++ b/src/libsyntax/visit.rs @@ -8,6 +8,18 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +//! Context-passing AST walker. Each overridden visit method has full control +//! over what happens with its node, it can do its own traversal of the node's +//! children (potentially passing in different contexts to each), call +//! `visit::visit_*` to apply the default traversal algorithm (again, it can +//! override the context), or prevent deeper traversal by doing nothing. +//! +//! Note: it is an important invariant that the default visitor walks the body +//! of a function in "execution order" (more concretely, reverse post-order +//! with respect to the CFG implied by the AST), meaning that if AST node A may +//! execute before AST node B, then A is visited first. The borrow checker in +//! particular relies on this property. +//! use abi::Abi; use ast::*; use ast; @@ -17,27 +29,15 @@ use owned_slice::OwnedSlice; use std::gc::Gc; -// Context-passing AST walker. Each overridden visit method has full control -// over what happens with its node, it can do its own traversal of the node's -// children (potentially passing in different contexts to each), call -// visit::visit_* to apply the default traversal algorithm (again, it can -// override the context), or prevent deeper traversal by doing nothing. -// -// Note: it is an important invariant that the default visitor walks the body -// of a function in "execution order" (more concretely, reverse post-order -// with respect to the CFG implied by the AST), meaning that if AST node A may -// execute before AST node B, then A is visited first. The borrow checker in -// particular relies on this property. - pub enum FnKind<'a> { - // fn foo() or extern "Abi" fn foo() + /// fn foo() or extern "Abi" fn foo() FkItemFn(Ident, &'a Generics, FnStyle, Abi), - // fn foo(&self) + /// fn foo(&self) FkMethod(Ident, &'a Generics, &'a Method), - // |x, y| ... - // proc(x, y) ... + /// |x, y| ... + /// proc(x, y) ... FkFnBlock, } diff --git a/src/test/compile-fail/lex-illegal-num-char-escape.rs b/src/test/compile-fail/lex-bad-char-literals.rs similarity index 75% rename from src/test/compile-fail/lex-illegal-num-char-escape.rs rename to src/test/compile-fail/lex-bad-char-literals.rs index 8f4c756c891d5..0eaa81bd6ab95 100644 --- a/src/test/compile-fail/lex-illegal-num-char-escape.rs +++ b/src/test/compile-fail/lex-bad-char-literals.rs @@ -31,5 +31,19 @@ static s: &'static str = static s2: &'static str = "\u23q" //~ ERROR: illegal character in numeric character escape + //~^ ERROR: numeric character escape is too short +; + +static c: char = + '\●' //~ ERROR: unknown character escape +; + +static s: &'static str = + "\●" //~ ERROR: unknown character escape +; + +// THIS MUST BE LAST, since unterminated character constants kill the lexer + +static c: char = + '● //~ ERROR: unterminated character constant ; -//~^^ ERROR: numeric character escape is too short diff --git a/src/test/compile-fail/lex-bad-fp-base-3.rs b/src/test/compile-fail/lex-bad-fp-base-3.rs deleted file mode 100644 index 79c42360adb2f..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-3.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let c = 0o3.0f32; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-4.rs b/src/test/compile-fail/lex-bad-fp-base-4.rs deleted file mode 100644 index eaea61b0089af..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-4.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let d = 0o4e4; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-5.rs b/src/test/compile-fail/lex-bad-fp-base-5.rs deleted file mode 100644 index ee25ed95639e2..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-5.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let e = 0o5.0e5; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-6.rs b/src/test/compile-fail/lex-bad-fp-base-6.rs deleted file mode 100644 index bf08ec1eae5fe..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-6.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let f = 0o6e6f32; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-7.rs b/src/test/compile-fail/lex-bad-fp-base-7.rs deleted file mode 100644 index 921ed8f1b69e8..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-7.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let g = 0o7.0e7f64; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-8.rs b/src/test/compile-fail/lex-bad-fp-base-8.rs deleted file mode 100644 index 10e334ede01c2..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-8.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let h = 0x8.0e+9; //~ ERROR: hexadecimal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-9.rs b/src/test/compile-fail/lex-bad-fp-base-9.rs deleted file mode 100644 index 3ea151cb9826a..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-9.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let i = 0x9.0e-9; //~ ERROR: hexadecimal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-lit.rs b/src/test/compile-fail/lex-bad-fp-lit.rs deleted file mode 100644 index 5a5e9d7d8f238..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-lit.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static f: float = - 1e+ //~ ERROR: scan_exponent: bad fp literal -; diff --git a/src/test/compile-fail/lex-bad-numeric-literals.rs b/src/test/compile-fail/lex-bad-numeric-literals.rs new file mode 100644 index 0000000000000..9a490be6a0169 --- /dev/null +++ b/src/test/compile-fail/lex-bad-numeric-literals.rs @@ -0,0 +1,35 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + 0o1.0; //~ ERROR: octal float literal is not supported + 0o2f32; //~ ERROR: octal float literal is not supported + 0o3.0f32; //~ ERROR: octal float literal is not supported + 0o4e4; //~ ERROR: octal float literal is not supported + 0o5.0e5; //~ ERROR: octal float literal is not supported + 0o6e6f32; //~ ERROR: octal float literal is not supported + 0o7.0e7f64; //~ ERROR: octal float literal is not supported + 0x8.0e+9; //~ ERROR: hexadecimal float literal is not supported + 0x9.0e-9; //~ ERROR: hexadecimal float literal is not supported + 0o; //~ ERROR: no valid digits + 1e+; //~ ERROR: expected at least one digit in exponent + 0x539.0; //~ ERROR: hexadecimal float literal is not supported + 99999999999999999999999999999999; //~ ERROR: int literal is too large + 99999999999999999999999999999999u32; //~ ERROR: int literal is too large + 0x; //~ ERROR: no valid digits + 0xu32; //~ ERROR: no valid digits + 0ou32; //~ ERROR: no valid digits + 0bu32; //~ ERROR: no valid digits + 0b; //~ ERROR: no valid digits + 0o123f64; //~ ERROR: octal float literal is not supported + 0o123.456; //~ ERROR: octal float literal is not supported + 0b101f64; //~ ERROR: binary float literal is not supported + 0b111.101; //~ ERROR: binary float literal is not supported +} diff --git a/src/test/compile-fail/lex-bad-fp-base-1.rs b/src/test/compile-fail/lex-bad-token.rs similarity index 85% rename from src/test/compile-fail/lex-bad-fp-base-1.rs rename to src/test/compile-fail/lex-bad-token.rs index 659cb5c837955..d28d9a20c6eed 100644 --- a/src/test/compile-fail/lex-bad-fp-base-1.rs +++ b/src/test/compile-fail/lex-bad-token.rs @@ -8,6 +8,4 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -fn main() { - let a = 0o1.0; //~ ERROR: octal float literal is not supported -} +● //~ ERROR: unknown start of token diff --git a/src/test/compile-fail/lex-hex-float-lit.rs b/src/test/compile-fail/lex-hex-float-lit.rs deleted file mode 100644 index 457c6126c44a5..0000000000000 --- a/src/test/compile-fail/lex-hex-float-lit.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static f: float = - 0x539.0 //~ ERROR: hexadecimal float literal is not supported -; diff --git a/src/test/compile-fail/lex-int-lit-too-large-2.rs b/src/test/compile-fail/lex-int-lit-too-large-2.rs deleted file mode 100644 index 39d1cba64b08b..0000000000000 --- a/src/test/compile-fail/lex-int-lit-too-large-2.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static i: int = - 99999999999999999999999999999999u32 //~ ERROR: int literal is too large -; diff --git a/src/test/compile-fail/lex-int-lit-too-large.rs b/src/test/compile-fail/lex-int-lit-too-large.rs deleted file mode 100644 index 6343be651fa59..0000000000000 --- a/src/test/compile-fail/lex-int-lit-too-large.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static i: int = - 99999999999999999999999999999999 //~ ERROR: int literal is too large -; diff --git a/src/test/compile-fail/lex-no-valid-digits-2.rs b/src/test/compile-fail/lex-no-valid-digits-2.rs deleted file mode 100644 index 549dbf5bc8c6c..0000000000000 --- a/src/test/compile-fail/lex-no-valid-digits-2.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static i: int = - 0xu32 //~ ERROR: no valid digits -; diff --git a/src/test/compile-fail/lex-no-valid-digits.rs b/src/test/compile-fail/lex-no-valid-digits.rs deleted file mode 100644 index 6a5b8e93f010a..0000000000000 --- a/src/test/compile-fail/lex-no-valid-digits.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static i: int = - 0x //~ ERROR: no valid digits -; diff --git a/src/test/compile-fail/lex-unknown-char-escape.rs b/src/test/compile-fail/lex-unknown-char-escape.rs deleted file mode 100644 index f2445c2b60eba..0000000000000 --- a/src/test/compile-fail/lex-unknown-char-escape.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static c: char = - '\●' //~ ERROR: unknown character escape -; diff --git a/src/test/compile-fail/lex-unknown-start-tok.rs b/src/test/compile-fail/lex-unknown-start-tok.rs deleted file mode 100644 index 1bb682303451b..0000000000000 --- a/src/test/compile-fail/lex-unknown-start-tok.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - ● //~ ERROR: unknown start of token -} diff --git a/src/test/compile-fail/lex-unknown-str-escape.rs b/src/test/compile-fail/lex-unknown-str-escape.rs deleted file mode 100644 index 9a59c4227114b..0000000000000 --- a/src/test/compile-fail/lex-unknown-str-escape.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static s: &'static str = - "\●" //~ ERROR: unknown character escape -; diff --git a/src/test/compile-fail/lex-unterminated-char-const.rs b/src/test/compile-fail/lex-unterminated-char-const.rs deleted file mode 100644 index 551360ff9e095..0000000000000 --- a/src/test/compile-fail/lex-unterminated-char-const.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static c: char = - '● //~ ERROR: unterminated character constant -; diff --git a/src/test/compile-fail/no-oct-float-literal.rs b/src/test/compile-fail/no-oct-float-literal.rs deleted file mode 100644 index 511116b1c559c..0000000000000 --- a/src/test/compile-fail/no-oct-float-literal.rs +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// error-pattern:octal float literal is not supported - -fn main() { - 0o123f64; - 0o123.456; - 0o123p4f; -} diff --git a/src/test/compile-fail/lex-bad-fp-base-2.rs b/src/test/run-pass/string-escapes.rs similarity index 84% rename from src/test/compile-fail/lex-bad-fp-base-2.rs rename to src/test/run-pass/string-escapes.rs index b1d45f78e4a5b..7abe8276a9782 100644 --- a/src/test/compile-fail/lex-bad-fp-base-2.rs +++ b/src/test/run-pass/string-escapes.rs @@ -9,5 +9,7 @@ // except according to those terms. fn main() { - let b = 0o2f32; //~ ERROR: octal float literal is not supported + let x = "\\\\\ + "; + assert!(x == r"\\"); // extraneous whitespace stripped }