Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rustc_ast: Stop using "string typing" for doc comment tokens #74627

Merged
merged 5 commits into from
Aug 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/librustc_ast/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub use GenericArgs::*;
pub use UnsafeSource::*;

use crate::ptr::P;
use crate::token::{self, DelimToken};
use crate::token::{self, CommentKind, DelimToken};
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree};

use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
Expand Down Expand Up @@ -2365,7 +2365,7 @@ pub enum AttrKind {
/// A doc comment (e.g. `/// ...`, `//! ...`, `/** ... */`, `/*! ... */`).
/// Doc attributes (e.g. `#[doc="..."]`) are represented with the `Normal`
/// variant (which is much less compact and thus more expensive).
DocComment(Symbol),
DocComment(CommentKind, Symbol),
}

/// `TraitRef`s appear in impls.
Expand Down
25 changes: 15 additions & 10 deletions src/librustc_ast/attr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::ast::{MacArgs, MacDelimiter, MetaItem, MetaItemKind, NestedMetaItem};
use crate::ast::{Path, PathSegment};
use crate::mut_visit::visit_clobber;
use crate::ptr::P;
use crate::token::{self, Token};
use crate::token::{self, CommentKind, Token};
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree, TreeAndJoint};

use rustc_data_structures::sync::Lock;
Expand Down Expand Up @@ -169,7 +169,7 @@ impl Attribute {
pub fn has_name(&self, name: Symbol) -> bool {
match self.kind {
AttrKind::Normal(ref item) => item.path == name,
AttrKind::DocComment(_) => false,
AttrKind::DocComment(..) => false,
}
}

Expand Down Expand Up @@ -198,7 +198,7 @@ impl Attribute {
None
}
}
AttrKind::DocComment(_) => None,
AttrKind::DocComment(..) => None,
}
}
pub fn name_or_empty(&self) -> Symbol {
Expand All @@ -218,7 +218,7 @@ impl Attribute {
Some(MetaItem { kind: MetaItemKind::List(list), .. }) => Some(list),
_ => None,
},
AttrKind::DocComment(_) => None,
AttrKind::DocComment(..) => None,
}
}

Expand Down Expand Up @@ -314,13 +314,13 @@ impl Attribute {
pub fn is_doc_comment(&self) -> bool {
match self.kind {
AttrKind::Normal(_) => false,
AttrKind::DocComment(_) => true,
AttrKind::DocComment(..) => true,
}
}

pub fn doc_str(&self) -> Option<Symbol> {
match self.kind {
AttrKind::DocComment(symbol) => Some(symbol),
AttrKind::DocComment(.., data) => Some(data),
AttrKind::Normal(ref item) if item.path == sym::doc => {
item.meta(self.span).and_then(|meta| meta.value_str())
}
Expand All @@ -331,14 +331,14 @@ impl Attribute {
pub fn get_normal_item(&self) -> &AttrItem {
match self.kind {
AttrKind::Normal(ref item) => item,
AttrKind::DocComment(_) => panic!("unexpected doc comment"),
AttrKind::DocComment(..) => panic!("unexpected doc comment"),
}
}

pub fn unwrap_normal_item(self) -> AttrItem {
match self.kind {
AttrKind::Normal(item) => item,
AttrKind::DocComment(_) => panic!("unexpected doc comment"),
AttrKind::DocComment(..) => panic!("unexpected doc comment"),
}
}

Expand Down Expand Up @@ -405,8 +405,13 @@ pub fn mk_attr_outer(item: MetaItem) -> Attribute {
mk_attr(AttrStyle::Outer, item.path, item.kind.mac_args(item.span), item.span)
}

pub fn mk_doc_comment(style: AttrStyle, comment: Symbol, span: Span) -> Attribute {
Attribute { kind: AttrKind::DocComment(comment), id: mk_attr_id(), style, span }
pub fn mk_doc_comment(
comment_kind: CommentKind,
style: AttrStyle,
data: Symbol,
span: Span,
) -> Attribute {
Attribute { kind: AttrKind::DocComment(comment_kind, data), id: mk_attr_id(), style, span }
}

pub fn list_contains_name(items: &[NestedMetaItem], name: Symbol) -> bool {
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_ast/mut_visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ pub fn noop_visit_attribute<T: MutVisitor>(attr: &mut Attribute, vis: &mut T) {
vis.visit_path(path);
visit_mac_args(args, vis);
}
AttrKind::DocComment(_) => {}
AttrKind::DocComment(..) => {}
}
vis.visit_span(span);
}
Expand Down
13 changes: 10 additions & 3 deletions src/librustc_ast/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ use rustc_span::{self, Span, DUMMY_SP};
use std::borrow::Cow;
use std::{fmt, mem};

#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug, HashStable_Generic)]
pub enum CommentKind {
Line,
Block,
}

#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
#[derive(HashStable_Generic)]
pub enum BinOpToken {
Expand Down Expand Up @@ -238,9 +244,10 @@ pub enum TokenKind {

Interpolated(Lrc<Nonterminal>),

// Can be expanded into several tokens.
/// A doc comment.
DocComment(Symbol),
/// A doc comment token.
/// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)
/// similarly to symbols in string literal tokens.
DocComment(CommentKind, ast::AttrStyle, Symbol),

// Junk. These carry no data because we don't really care about the data
// they *would* carry, and don't really want to allocate a new ident for
Expand Down
120 changes: 57 additions & 63 deletions src/librustc_ast/util/comments.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
pub use CommentStyle::*;

use crate::ast;
use crate::ast::AttrStyle;
use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};

use log::debug;

#[cfg(test)]
mod tests;

Expand All @@ -28,43 +24,48 @@ pub struct Comment {
pub pos: BytePos,
}

pub fn is_line_doc_comment(s: &str) -> bool {
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/')
|| s.starts_with("//!");
debug!("is {:?} a doc comment? {}", s, res);
res
}

pub fn is_block_doc_comment(s: &str) -> bool {
// Prevent `/**/` from being parsed as a doc comment
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*')
|| s.starts_with("/*!"))
&& s.len() >= 5;
debug!("is {:?} a doc comment? {}", s, res);
res
}

// FIXME(#64197): Try to privatize this again.
pub fn is_doc_comment(s: &str) -> bool {
(s.starts_with("///") && is_line_doc_comment(s))
|| s.starts_with("//!")
|| (s.starts_with("/**") && is_block_doc_comment(s))
|| s.starts_with("/*!")
/// For a full line comment string returns its doc comment style if it's a doc comment
/// and returns `None` if it's a regular comment.
pub fn line_doc_comment_style(line_comment: &str) -> Option<AttrStyle> {
let line_comment = line_comment.as_bytes();
assert!(line_comment.starts_with(b"//"));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the choice is to panic instead of return None?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are not using this function on arbitrary strings, only on actual comments, so we can make it stricter.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case I think unwrapping at call site seems to be better.

match line_comment.get(2) {
// `//!` is an inner line doc comment.
Some(b'!') => Some(AttrStyle::Inner),
Some(b'/') => match line_comment.get(3) {
// `////` (more than 3 slashes) is not considered a doc comment.
Some(b'/') => None,
// Otherwise `///` is an outer line doc comment.
_ => Some(AttrStyle::Outer),
},
_ => None,
}
}

pub fn doc_comment_style(comment: Symbol) -> ast::AttrStyle {
let comment = &comment.as_str();
assert!(is_doc_comment(comment));
if comment.starts_with("//!") || comment.starts_with("/*!") {
ast::AttrStyle::Inner
} else {
ast::AttrStyle::Outer
/// For a full block comment string returns its doc comment style if it's a doc comment
/// and returns `None` if it's a regular comment.
pub fn block_doc_comment_style(block_comment: &str, terminated: bool) -> Option<AttrStyle> {
let block_comment = block_comment.as_bytes();
assert!(block_comment.starts_with(b"/*"));
assert!(!terminated || block_comment.ends_with(b"*/"));
match block_comment.get(2) {
// `/*!` is an inner block doc comment.
Some(b'!') => Some(AttrStyle::Inner),
Some(b'*') => match block_comment.get(3) {
// `/***` (more than 2 stars) is not considered a doc comment.
Some(b'*') => None,
// `/**/` is not considered a doc comment.
Some(b'/') if block_comment.len() == 4 => None,
// Otherwise `/**` is an outer block doc comment.
_ => Some(AttrStyle::Outer),
},
_ => None,
}
}

pub fn strip_doc_comment_decoration(comment: Symbol) -> String {
let comment = &comment.as_str();

/// Makes a doc string more presentable to users.
/// Used by rustdoc and perhaps other tools, but not by rustc.
pub fn beautify_doc_string(data: Symbol) -> String {
/// remove whitespace-only lines from the start/end of lines
fn vertical_trim(lines: Vec<String>) -> Vec<String> {
let mut i = 0;
Expand Down Expand Up @@ -126,26 +127,15 @@ pub fn strip_doc_comment_decoration(comment: Symbol) -> String {
}
}

// one-line comments lose their prefix
const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];

for prefix in ONELINERS {
if comment.starts_with(*prefix) {
return (&comment[prefix.len()..]).to_string();
}
}

if comment.starts_with("/*") {
let lines =
comment[3..comment.len() - 2].lines().map(|s| s.to_string()).collect::<Vec<String>>();

let data = data.as_str();
if data.contains('\n') {
let lines = data.lines().map(|s| s.to_string()).collect::<Vec<String>>();
let lines = vertical_trim(lines);
let lines = horizontal_trim(lines);

return lines.join("\n");
lines.join("\n")
} else {
data.to_string()
}

panic!("not a doc-comment: {}", comment);
}

/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
Expand Down Expand Up @@ -203,7 +193,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme

if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
comments.push(Comment {
style: Isolated,
style: CommentStyle::Isolated,
lines: vec![text[..shebang_len].to_string()],
pos: start_bpos,
});
Expand All @@ -219,23 +209,23 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
idx = idx + 1 + next_newline;
comments.push(Comment {
style: BlankLine,
style: CommentStyle::BlankLine,
lines: vec![],
pos: start_bpos + BytePos((pos + idx) as u32),
});
}
}
}
rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
if !is_block_doc_comment(token_text) {
rustc_lexer::TokenKind::BlockComment { terminated } => {
if block_doc_comment_style(token_text, terminated).is_none() {
let code_to_the_right = match text[pos + token.len..].chars().next() {
Some('\r' | '\n') => false,
_ => true,
};
let style = match (code_to_the_left, code_to_the_right) {
(_, true) => Mixed,
(false, false) => Isolated,
(true, false) => Trailing,
(_, true) => CommentStyle::Mixed,
(false, false) => CommentStyle::Isolated,
(true, false) => CommentStyle::Trailing,
};

// Count the number of chars since the start of the line by rescanning.
Expand All @@ -249,9 +239,13 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
}
}
rustc_lexer::TokenKind::LineComment => {
if !is_doc_comment(token_text) {
if line_doc_comment_style(token_text).is_none() {
comments.push(Comment {
style: if code_to_the_left { Trailing } else { Isolated },
style: if code_to_the_left {
CommentStyle::Trailing
} else {
CommentStyle::Isolated
},
lines: vec![token_text.to_string()],
pos: start_bpos + BytePos(pos as u32),
})
Expand Down
46 changes: 19 additions & 27 deletions src/librustc_ast/util/comments/tests.rs
Original file line number Diff line number Diff line change
@@ -1,58 +1,50 @@
use super::*;
use crate::with_default_session_globals;

#[test]
fn line_doc_comments() {
assert!(line_doc_comment_style("///").is_some());
assert!(line_doc_comment_style("/// blah").is_some());
assert!(line_doc_comment_style("////").is_none());
}

#[test]
fn test_block_doc_comment_1() {
with_default_session_globals(|| {
let comment = "/**\n * Test \n ** Test\n * Test\n*/";
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
let comment = "\n * Test \n ** Test\n * Test\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " Test \n* Test\n Test");
})
}

#[test]
fn test_block_doc_comment_2() {
with_default_session_globals(|| {
let comment = "/**\n * Test\n * Test\n*/";
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
let comment = "\n * Test\n * Test\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " Test\n Test");
})
}

#[test]
fn test_block_doc_comment_3() {
with_default_session_globals(|| {
let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
let comment = "\n let a: *i32;\n *a = 5;\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " let a: *i32;\n *a = 5;");
})
}

#[test]
fn test_block_doc_comment_4() {
with_default_session_globals(|| {
let comment = "/*******************\n test\n *********************/";
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
assert_eq!(stripped, " test");
})
}

#[test]
fn test_line_doc_comment() {
with_default_session_globals(|| {
let stripped = strip_doc_comment_decoration(Symbol::intern("/// test"));
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration(Symbol::intern("///! test"));
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration(Symbol::intern("// test"));
let stripped = beautify_doc_string(Symbol::intern(" test"));
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration(Symbol::intern("// test"));
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration(Symbol::intern("///test"));
assert_eq!(stripped, "test");
let stripped = strip_doc_comment_decoration(Symbol::intern("///!test"));
assert_eq!(stripped, "test");
let stripped = strip_doc_comment_decoration(Symbol::intern("//test"));
let stripped = beautify_doc_string(Symbol::intern("! test"));
assert_eq!(stripped, "! test");
let stripped = beautify_doc_string(Symbol::intern("test"));
assert_eq!(stripped, "test");
let stripped = beautify_doc_string(Symbol::intern("!test"));
assert_eq!(stripped, "!test");
})
}
Loading