Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move doc comment parsing to rustc_lexer #75642

Merged
merged 2 commits into from
Aug 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 4 additions & 44 deletions src/librustc_ast/util/comments.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use crate::ast::AttrStyle;
use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};

Expand All @@ -24,45 +23,6 @@ pub struct Comment {
pub pos: BytePos,
}

/// For a full line comment string returns its doc comment style if it's a doc comment
/// and returns `None` if it's a regular comment.
pub fn line_doc_comment_style(line_comment: &str) -> Option<AttrStyle> {
let line_comment = line_comment.as_bytes();
assert!(line_comment.starts_with(b"//"));
match line_comment.get(2) {
// `//!` is an inner line doc comment.
Some(b'!') => Some(AttrStyle::Inner),
Some(b'/') => match line_comment.get(3) {
// `////` (more than 3 slashes) is not considered a doc comment.
Some(b'/') => None,
// Otherwise `///` is an outer line doc comment.
_ => Some(AttrStyle::Outer),
},
_ => None,
}
}

/// For a full block comment string returns its doc comment style if it's a doc comment
/// and returns `None` if it's a regular comment.
pub fn block_doc_comment_style(block_comment: &str, terminated: bool) -> Option<AttrStyle> {
let block_comment = block_comment.as_bytes();
assert!(block_comment.starts_with(b"/*"));
assert!(!terminated || block_comment.ends_with(b"*/"));
match block_comment.get(2) {
// `/*!` is an inner block doc comment.
Some(b'!') => Some(AttrStyle::Inner),
Some(b'*') => match block_comment.get(3) {
// `/***` (more than 2 stars) is not considered a doc comment.
Some(b'*') => None,
// `/**/` is not considered a doc comment.
Some(b'/') if block_comment.len() == 4 => None,
// Otherwise `/**` is an outer block doc comment.
_ => Some(AttrStyle::Outer),
},
_ => None,
}
}

/// Makes a doc string more presentable to users.
/// Used by rustdoc and perhaps other tools, but not by rustc.
pub fn beautify_doc_string(data: Symbol) -> String {
Expand Down Expand Up @@ -216,8 +176,8 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
}
}
}
rustc_lexer::TokenKind::BlockComment { terminated } => {
if block_doc_comment_style(token_text, terminated).is_none() {
rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
if doc_style.is_none() {
let code_to_the_right = match text[pos + token.len..].chars().next() {
Some('\r' | '\n') => false,
_ => true,
Expand All @@ -238,8 +198,8 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
comments.push(Comment { style, lines, pos: pos_in_file })
}
}
rustc_lexer::TokenKind::LineComment => {
if line_doc_comment_style(token_text).is_none() {
rustc_lexer::TokenKind::LineComment { doc_style } => {
if doc_style.is_none() {
comments.push(Comment {
style: if code_to_the_left {
CommentStyle::Trailing
Expand Down
7 changes: 0 additions & 7 deletions src/librustc_ast/util/comments/tests.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
use super::*;
use rustc_span::with_default_session_globals;

#[test]
fn line_doc_comments() {
assert!(line_doc_comment_style("///").is_some());
assert!(line_doc_comment_style("/// blah").is_some());
assert!(line_doc_comment_style("////").is_none());
}

#[test]
fn test_block_doc_comment_1() {
with_default_session_globals(|| {
Expand Down
35 changes: 30 additions & 5 deletions src/librustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ impl Token {
pub enum TokenKind {
// Multi-char tokens:
/// "// comment"
LineComment,
LineComment { doc_style: Option<DocStyle> },
/// `/* block comment */`
///
/// Block comments can be recursive, so the sequence like `/* /* */`
/// will not be considered terminated and will result in a parsing error.
BlockComment { terminated: bool },
BlockComment { doc_style: Option<DocStyle>, terminated: bool },
/// Any whitespace characters sequence.
Whitespace,
/// "ident" or "continue"
Expand Down Expand Up @@ -129,6 +129,12 @@ pub enum TokenKind {
Unknown,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum DocStyle {
Outer,
Inner,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LiteralKind {
/// "12_u8", "0o100", "0b120i99"
Expand Down Expand Up @@ -188,7 +194,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
// a doc comment (due to `TokenKind::(Line,Block)Comment` ambiguity at lexer level),
// then it may be valid Rust code, so consider it Rust code.
let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok|
!matches!(tok, TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment { .. })
!matches!(tok, TokenKind::Whitespace | TokenKind::LineComment { .. } | TokenKind::BlockComment { .. })
);
if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
// No other choice than to consider this a shebang.
Expand Down Expand Up @@ -410,13 +416,32 @@ impl Cursor<'_> {
fn line_comment(&mut self) -> TokenKind {
debug_assert!(self.prev() == '/' && self.first() == '/');
self.bump();

let doc_style = match self.first() {
// `//!` is an inner line doc comment.
'!' => Some(DocStyle::Inner),
// `////` (more than 3 slashes) is not considered a doc comment.
'/' if self.second() != '/' => Some(DocStyle::Outer),
_ => None,
};

self.eat_while(|c| c != '\n');
LineComment
LineComment { doc_style }
}

fn block_comment(&mut self) -> TokenKind {
debug_assert!(self.prev() == '/' && self.first() == '*');
self.bump();

let doc_style = match self.first() {
// `/*!` is an inner block doc comment.
'!' => Some(DocStyle::Inner),
// `/***` (more than 2 stars) is not considered a doc comment.
// `/**/` is not considered a doc comment.
'*' if !matches!(self.second(), '*' | '/') => Some(DocStyle::Outer),
_ => None,
};

let mut depth = 1usize;
while let Some(c) = self.bump() {
match c {
Expand All @@ -438,7 +463,7 @@ impl Cursor<'_> {
}
}

BlockComment { terminated: depth == 0 }
BlockComment { doc_style, terminated: depth == 0 }
}

fn whitespace(&mut self) -> TokenKind {
Expand Down
96 changes: 55 additions & 41 deletions src/librustc_parse/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use rustc_ast::ast::AttrStyle;
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
use rustc_ast::util::comments;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError};
use rustc_lexer::Base;
Expand All @@ -15,7 +15,7 @@ mod tokentrees;
mod unescape_error_reporting;
mod unicode_chars;

use rustc_lexer::unescape::Mode;
use rustc_lexer::{unescape::Mode, DocStyle};
use unescape_error_reporting::{emit_unescape_error, push_escaped_char};

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -168,25 +168,23 @@ impl<'a> StringReader<'a> {
/// symbols and runs additional validation.
fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> TokenKind {
match token {
rustc_lexer::TokenKind::LineComment => {
let string = self.str_from(start);
if let Some(attr_style) = comments::line_doc_comment_style(string) {
self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment");
// Opening delimiter of the length 3 is not included into the symbol.
token::DocComment(CommentKind::Line, attr_style, Symbol::intern(&string[3..]))
} else {
token::Comment
rustc_lexer::TokenKind::LineComment { doc_style } => {
match doc_style {
Some(doc_style) => {
// Opening delimiter of the length 3 is not included into the symbol.
let content_start = start + BytePos(3);
let content = self.str_from(content_start);

self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
}
None => token::Comment,
}
}
rustc_lexer::TokenKind::BlockComment { terminated } => {
let string = self.str_from(start);
let attr_style = comments::block_doc_comment_style(string, terminated);

rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
if !terminated {
let msg = if attr_style.is_some() {
"unterminated block doc-comment"
} else {
"unterminated block comment"
let msg = match doc_style {
Some(_) => "unterminated block doc-comment",
None => "unterminated block comment",
};
let last_bpos = self.pos;
self.sess
Expand All @@ -199,18 +197,17 @@ impl<'a> StringReader<'a> {
.emit();
FatalError.raise();
}

if let Some(attr_style) = attr_style {
self.forbid_bare_cr(start, string, "bare CR not allowed in block doc-comment");
// Opening delimiter of the length 3 and closing delimiter of the length 2
// are not included into the symbol.
token::DocComment(
CommentKind::Block,
attr_style,
Symbol::intern(&string[3..string.len() - if terminated { 2 } else { 0 }]),
)
} else {
token::Comment
match doc_style {
Some(doc_style) => {
// Opening delimiter of the length 3 and closing delimiter of the length 2
// are not included into the symbol.
let content_start = start + BytePos(3);
let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
let content = self.str_from_to(content_start, content_end);

self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
}
None => token::Comment,
}
}
rustc_lexer::TokenKind::Whitespace => token::Whitespace,
Expand Down Expand Up @@ -319,6 +316,34 @@ impl<'a> StringReader<'a> {
}
}

fn cook_doc_comment(
&self,
content_start: BytePos,
content: &str,
comment_kind: CommentKind,
doc_style: DocStyle,
) -> TokenKind {
if content.contains('\r') {
for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
self.err_span_(
content_start + BytePos(idx as u32),
content_start + BytePos(idx as u32 + 1),
match comment_kind {
CommentKind::Line => "bare CR not allowed in doc-comment",
CommentKind::Block => "bare CR not allowed in block doc-comment",
},
);
}
}

let attr_style = match doc_style {
DocStyle::Outer => AttrStyle::Outer,
DocStyle::Inner => AttrStyle::Inner,
};

token::DocComment(comment_kind, attr_style, Symbol::intern(content))
}

fn cook_lexer_literal(
&self,
start: BytePos,
Expand Down Expand Up @@ -472,17 +497,6 @@ impl<'a> StringReader<'a> {
&self.src[self.src_index(start)..self.src_index(end)]
}

fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) {
let mut idx = 0;
loop {
idx = match s[idx..].find('\r') {
None => break,
Some(it) => idx + it + 1,
};
self.err_span_(start + BytePos(idx as u32 - 1), start + BytePos(idx as u32), errmsg);
}
}

fn report_raw_str_error(&self, start: BytePos, opt_err: Option<RawStrError>) {
match opt_err {
Some(RawStrError::InvalidStarter { bad_char }) => {
Expand Down