@@ -14,7 +14,7 @@ use rustc_ast::ptr::P;
1414use rustc_ast:: token:: { self , BinOpToken , CommentKind , Delimiter , Nonterminal , Token , TokenKind } ;
1515use rustc_ast:: tokenstream:: { Spacing , TokenStream , TokenTree } ;
1616use rustc_ast:: util:: classify;
17- use rustc_ast:: util:: comments:: { gather_comments , Comment , CommentStyle } ;
17+ use rustc_ast:: util:: comments:: { Comment , CommentStyle } ;
1818use rustc_ast:: util:: parser;
1919use rustc_ast:: { self as ast, AttrArgs , AttrArgsEq , BlockCheckMode , PatKind } ;
2020use rustc_ast:: { attr, BindingAnnotation , ByRef , DelimArgs , RangeEnd , RangeSyntax , Term } ;
@@ -24,7 +24,7 @@ use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
2424use rustc_span:: edition:: Edition ;
2525use rustc_span:: source_map:: { SourceMap , Spanned } ;
2626use rustc_span:: symbol:: { kw, sym, Ident , IdentPrinter , Symbol } ;
27- use rustc_span:: { BytePos , FileName , Span , DUMMY_SP } ;
27+ use rustc_span:: { BytePos , CharPos , FileName , Pos , Span , DUMMY_SP } ;
2828use std:: borrow:: Cow ;
2929use thin_vec:: ThinVec ;
3030
@@ -59,6 +59,127 @@ pub struct Comments<'a> {
5959 current : usize ,
6060}
6161
62+ /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
63+ /// Otherwise returns `Some(k)` where `k` is first char offset after that leading
64+ /// whitespace. Note that `k` may be outside bounds of `s`.
65+ fn all_whitespace ( s : & str , col : CharPos ) -> Option < usize > {
66+ let mut idx = 0 ;
67+ for ( i, ch) in s. char_indices ( ) . take ( col. to_usize ( ) ) {
68+ if !ch. is_whitespace ( ) {
69+ return None ;
70+ }
71+ idx = i + ch. len_utf8 ( ) ;
72+ }
73+ Some ( idx)
74+ }
75+
76+ fn trim_whitespace_prefix ( s : & str , col : CharPos ) -> & str {
77+ let len = s. len ( ) ;
78+ match all_whitespace ( s, col) {
79+ Some ( col) => {
80+ if col < len {
81+ & s[ col..]
82+ } else {
83+ ""
84+ }
85+ }
86+ None => s,
87+ }
88+ }
89+
90+ fn split_block_comment_into_lines ( text : & str , col : CharPos ) -> Vec < String > {
91+ let mut res: Vec < String > = vec ! [ ] ;
92+ let mut lines = text. lines ( ) ;
93+ // just push the first line
94+ res. extend ( lines. next ( ) . map ( |it| it. to_string ( ) ) ) ;
95+ // for other lines, strip common whitespace prefix
96+ for line in lines {
97+ res. push ( trim_whitespace_prefix ( line, col) . to_string ( ) )
98+ }
99+ res
100+ }
101+
102+ fn gather_comments ( sm : & SourceMap , path : FileName , src : String ) -> Vec < Comment > {
103+ let sm = SourceMap :: new ( sm. path_mapping ( ) . clone ( ) ) ;
104+ let source_file = sm. new_source_file ( path, src) ;
105+ let text = ( * source_file. src . as_ref ( ) . unwrap ( ) ) . clone ( ) ;
106+
107+ let text: & str = text. as_str ( ) ;
108+ let start_bpos = source_file. start_pos ;
109+ let mut pos = 0 ;
110+ let mut comments: Vec < Comment > = Vec :: new ( ) ;
111+ let mut code_to_the_left = false ;
112+
113+ if let Some ( shebang_len) = rustc_lexer:: strip_shebang ( text) {
114+ comments. push ( Comment {
115+ style : CommentStyle :: Isolated ,
116+ lines : vec ! [ text[ ..shebang_len] . to_string( ) ] ,
117+ pos : start_bpos,
118+ } ) ;
119+ pos += shebang_len;
120+ }
121+
122+ for token in rustc_lexer:: tokenize ( & text[ pos..] ) {
123+ let token_text = & text[ pos..pos + token. len as usize ] ;
124+ match token. kind {
125+ rustc_lexer:: TokenKind :: Whitespace => {
126+ if let Some ( mut idx) = token_text. find ( '\n' ) {
127+ code_to_the_left = false ;
128+ while let Some ( next_newline) = & token_text[ idx + 1 ..] . find ( '\n' ) {
129+ idx += 1 + next_newline;
130+ comments. push ( Comment {
131+ style : CommentStyle :: BlankLine ,
132+ lines : vec ! [ ] ,
133+ pos : start_bpos + BytePos ( ( pos + idx) as u32 ) ,
134+ } ) ;
135+ }
136+ }
137+ }
138+ rustc_lexer:: TokenKind :: BlockComment { doc_style, .. } => {
139+ if doc_style. is_none ( ) {
140+ let code_to_the_right = !matches ! (
141+ text[ pos + token. len as usize ..] . chars( ) . next( ) ,
142+ Some ( '\r' | '\n' )
143+ ) ;
144+ let style = match ( code_to_the_left, code_to_the_right) {
145+ ( _, true ) => CommentStyle :: Mixed ,
146+ ( false , false ) => CommentStyle :: Isolated ,
147+ ( true , false ) => CommentStyle :: Trailing ,
148+ } ;
149+
150+ // Count the number of chars since the start of the line by rescanning.
151+ let pos_in_file = start_bpos + BytePos ( pos as u32 ) ;
152+ let line_begin_in_file = source_file. line_begin_pos ( pos_in_file) ;
153+ let line_begin_pos = ( line_begin_in_file - start_bpos) . to_usize ( ) ;
154+ let col = CharPos ( text[ line_begin_pos..pos] . chars ( ) . count ( ) ) ;
155+
156+ let lines = split_block_comment_into_lines ( token_text, col) ;
157+ comments. push ( Comment { style, lines, pos : pos_in_file } )
158+ }
159+ }
160+ rustc_lexer:: TokenKind :: LineComment { doc_style } => {
161+ if doc_style. is_none ( ) {
162+ comments. push ( Comment {
163+ style : if code_to_the_left {
164+ CommentStyle :: Trailing
165+ } else {
166+ CommentStyle :: Isolated
167+ } ,
168+ lines : vec ! [ token_text. to_string( ) ] ,
169+ pos : start_bpos + BytePos ( pos as u32 ) ,
170+ } )
171+ }
172+ }
173+ _ => {
174+ code_to_the_left = true ;
175+ }
176+ }
177+ pos += token. len as usize ;
178+ }
179+
180+ comments
181+ }
182+
62183impl < ' a > Comments < ' a > {
63184 pub fn new ( sm : & ' a SourceMap , filename : FileName , input : String ) -> Comments < ' a > {
64185 let comments = gather_comments ( sm, filename, input) ;
0 commit comments