From 9d25283b11dc5100af0e5c2d76bb6896d29b4714 Mon Sep 17 00:00:00 2001 From: Stephen Crane Date: Fri, 26 Jul 2019 13:59:07 -0700 Subject: [PATCH 01/15] Track source ranges for each C AST element for comment handling Comments which fall into a source range, e.g. of a function, but are not attached before any other AST elements should be attached to the end of the Rust AST element they are contained in. This change makes that happen, instead of attaching trailing comments to the next Item after it. --- Cargo.lock | 1 + c2rust-ast-exporter/src/AstExporter.cpp | 84 ++++--- c2rust-ast-exporter/src/clang_ast.rs | 58 ++++- c2rust-transpile/Cargo.toml | 1 + c2rust-transpile/src/c_ast/conversion.rs | 2 +- c2rust-transpile/src/c_ast/mod.rs | 210 +++++++----------- c2rust-transpile/src/cfg/mod.rs | 72 ++++-- c2rust-transpile/src/cfg/structures.rs | 37 +-- c2rust-transpile/src/diagnostics.rs | 9 +- .../src/rust_ast/comment_store.rs | 177 +++++++++------ c2rust-transpile/src/rust_ast/mod.rs | 8 + c2rust-transpile/src/translator/mod.rs | 98 ++++++-- 12 files changed, 462 insertions(+), 295 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4a9c122916..541f03ac7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -286,6 +286,7 @@ dependencies = [ "serde_cbor 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.39 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", "strum 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", "strum_macros 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/c2rust-ast-exporter/src/AstExporter.cpp b/c2rust-ast-exporter/src/AstExporter.cpp index cb99623a5f..9c62df4f68 100644 --- a/c2rust-ast-exporter/src/AstExporter.cpp +++ b/c2rust-ast-exporter/src/AstExporter.cpp @@ -460,9 +460,9 @@ class TranslateASTVisitor final } // Template required because Decl and Stmt don't share a common base class - void encode_entry_raw(void *ast, ASTEntryTag tag, SourceLocation loc, - const QualType ty, bool rvalue, bool isVaList, - bool encodeMacroExpansions, + void encode_entry_raw(void *ast, ASTEntryTag tag, SourceRange loc, + const QualType ty, bool rvalue, + bool isVaList, bool encodeMacroExpansions, const std::vector &childIds, std::function extra) { if (!markForExport(ast, tag)) @@ -489,17 +489,19 @@ class TranslateASTVisitor final cbor_encoder_close_container(&local, &childEnc); // 3 - File number - // 4 - Line number - // 5 - Column number - encodeSourcePos(&local, loc, isVaList); + // 4 - Begin Line number + // 5 - Begin Column number + // 6 - End Line number + // 7 - End Column number + encodeSourceSpan(&local, loc, isVaList); - // 6 - Type ID (only for expressions) + // 8 - Type ID (only for expressions) encode_qualtype(&local, ty); - // 7 - Is Rvalue (only for expressions) + // 9 - Is Rvalue (only for expressions) cbor_encode_boolean(&local, rvalue); - // 8 - Macro expansion stack, starting with initial macro call and ending + // 10 - Macro expansion stack, starting with initial macro call and ending // with the innermost replacement. cbor_encoder_create_array(&local, &childEnc, encodeMacroExpansions ? curMacroExpansionStack.size() : 0); @@ -511,7 +513,7 @@ class TranslateASTVisitor final } cbor_encoder_close_container(&local, &childEnc); - // 9.. - Extra entries + // 11.. - Extra entries extra(&local); cbor_encoder_close_container(encoder, &local); @@ -531,13 +533,8 @@ class TranslateASTVisitor final auto ty = ast->getType(); auto isVaList = false; auto encodeMacroExpansions = true; -#if CLANG_VERSION_MAJOR < 8 - SourceLocation loc = ast->getLocStart(); -#else - SourceLocation loc = ast->getBeginLoc(); -#endif // CLANG_VERSION_MAJOR - encode_entry_raw(ast, tag, loc, ty, ast->isRValue(), isVaList, encodeMacroExpansions, - childIds, extra); + encode_entry_raw(ast, tag, ast->getSourceRange(), ty, ast->isRValue(), isVaList, + encodeMacroExpansions, childIds, extra); typeEncoder.VisitQualType(ty); } @@ -548,13 +545,8 @@ class TranslateASTVisitor final auto rvalue = false; auto isVaList = false; auto encodeMacroExpansions = false; -#if CLANG_VERSION_MAJOR < 8 - SourceLocation loc = ast->getLocStart(); -#else - SourceLocation loc = ast->getBeginLoc(); -#endif // CLANG_VERSION_MAJOR - encode_entry_raw(ast, tag, loc, s, rvalue, isVaList, encodeMacroExpansions, - childIds, extra); + encode_entry_raw(ast, tag, ast->getSourceRange(), s, rvalue, isVaList, + encodeMacroExpansions, childIds, extra); } void encode_entry( @@ -563,7 +555,7 @@ class TranslateASTVisitor final std::function extra = [](CborEncoder *) {}) { auto rvalue = false; auto encodeMacroExpansions = false; - encode_entry_raw(ast, tag, ast->getLocation(), T, rvalue, + encode_entry_raw(ast, tag, ast->getSourceRange(), T, rvalue, isVaList(ast, T), encodeMacroExpansions, childIds, extra); } @@ -571,7 +563,7 @@ class TranslateASTVisitor final /// definition location is not the same as the canonical declaration /// location. void encode_entry( - Decl *ast, ASTEntryTag tag, SourceLocation loc, + Decl *ast, ASTEntryTag tag, SourceRange loc, const std::vector &childIds, const QualType T, std::function extra = [](CborEncoder *) {}) { auto rvalue = false; @@ -702,7 +694,8 @@ class TranslateASTVisitor final std::vector childIds(Info.Expressions.begin(), Info.Expressions.end()); - encode_entry_raw(Mac, tag, Mac->getDefinitionLoc(), QualType(), false, + auto range = SourceRange(Mac->getDefinitionLoc(), Mac->getDefinitionEndLoc()); + encode_entry_raw(Mac, tag, range, QualType(), false, false, false, childIds, [Name](CborEncoder *local) { cbor_encode_string(local, Name.str()); }); @@ -719,15 +712,41 @@ class TranslateASTVisitor final manager.isMacroBodyExpansion(loc)) loc = manager.getFileLoc(loc); + auto fileid = getExporterFileId(manager.getFileID(loc), isVaList); auto line = manager.getPresumedLineNumber(loc); auto col = manager.getPresumedColumnNumber(loc); - auto fileid = getExporterFileId(manager.getFileID(loc), isVaList); cbor_encode_uint(enc, fileid); cbor_encode_uint(enc, line); cbor_encode_uint(enc, col); } + void encodeSourceSpan(CborEncoder *enc, SourceRange loc, bool isVaList = false) { + auto &manager = Context->getSourceManager(); + + auto begin = loc.getBegin(); + auto end = loc.getEnd(); + // A check to see if the Source Location is a Macro + if (manager.isMacroArgExpansion(begin) || + manager.isMacroBodyExpansion(begin)) + begin = manager.getFileLoc(begin); + if (manager.isMacroArgExpansion(end) || + manager.isMacroBodyExpansion(end)) + end = manager.getFileLoc(end); + + auto fileid = getExporterFileId(manager.getFileID(begin), isVaList); + auto begin_line = manager.getPresumedLineNumber(begin); + auto begin_col = manager.getPresumedColumnNumber(begin); + auto end_line = manager.getPresumedLineNumber(end); + auto end_col = manager.getPresumedColumnNumber(end); + + cbor_encode_uint(enc, fileid); + cbor_encode_uint(enc, begin_line); + cbor_encode_uint(enc, begin_col); + cbor_encode_uint(enc, end_line); + cbor_encode_uint(enc, end_col); + } + uint64_t getExporterFileId(FileID id, bool isVaList) { if (id.isInvalid()) return 0; @@ -1560,10 +1579,10 @@ class TranslateASTVisitor final if (!FD->isCanonicalDecl()) { // Emit non-canonical decl so we have a placeholder to attach comments to std::vector childIds = {FD->getCanonicalDecl()}; - auto loc = FD->getLocation(); + auto span = FD->getSourceRange(); if (FD->doesThisDeclarationHaveABody()) - loc = FD->getCanonicalDecl()->getLocation(); - encode_entry(FD, TagNonCanonicalDecl, loc, childIds, FD->getType()); + span = FD->getCanonicalDecl()->getSourceRange(); + encode_entry(FD, TagNonCanonicalDecl, span, childIds, FD->getType()); return true; } @@ -1588,8 +1607,9 @@ class TranslateASTVisitor final childIds.push_back(body); auto functionType = FD->getType(); + auto span = paramsFD->getSourceRange(); encode_entry( - FD, TagFunctionDecl, paramsFD->getLocation(), childIds, functionType, + FD, TagFunctionDecl, span, childIds, functionType, [this, FD](CborEncoder *array) { auto name = FD->getNameAsString(); cbor_encode_string(array, name); diff --git a/c2rust-ast-exporter/src/clang_ast.rs b/c2rust-ast-exporter/src/clang_ast.rs index 8f3eb42759..056a1eb4bf 100644 --- a/c2rust-ast-exporter/src/clang_ast.rs +++ b/c2rust-ast-exporter/src/clang_ast.rs @@ -22,18 +22,56 @@ impl LRValue { } } -#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] +#[derive(Copy, Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] pub struct SrcLoc { pub fileid: u64, pub line: u64, pub column: u64, } +#[derive(Copy, Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] +pub struct SrcSpan { + pub fileid: u64, + pub begin_line: u64, + pub begin_column: u64, + pub end_line: u64, + pub end_column: u64, +} + +impl From for SrcSpan { + fn from(loc: SrcLoc) -> Self { + Self { + fileid: loc.fileid, + begin_line: loc.line, + begin_column: loc.column, + end_line: loc.line, + end_column: loc.column, + } + } +} + +impl SrcSpan { + pub fn begin(&self) -> SrcLoc { + SrcLoc { + fileid: self.fileid, + line: self.begin_line, + column: self.begin_column, + } + } + pub fn end(&self) -> SrcLoc { + SrcLoc { + fileid: self.fileid, + line: self.end_line, + column: self.end_column, + } + } +} + #[derive(Debug, Clone)] pub struct AstNode { pub tag: ASTEntryTag, pub children: Vec>, - pub loc: SrcLoc, + pub loc: SrcSpan, pub type_id: Option, pub rvalue: LRValue, @@ -152,10 +190,10 @@ pub fn process(items: Value) -> error::Result { .map(|x| expect_opt_u64(x).unwrap()) .collect::>>(); - let type_id: Option = expect_opt_u64(&entry[6]).unwrap(); + let type_id: Option = expect_opt_u64(&entry[8]).unwrap(); let fileid = entry[3].as_u64().unwrap(); - let macro_expansions = entry[8] + let macro_expansions = entry[10] .as_array() .unwrap() .iter() @@ -165,19 +203,21 @@ pub fn process(items: Value) -> error::Result { let node = AstNode { tag: import_ast_tag(tag), children, - loc: SrcLoc { + loc: SrcSpan { fileid, - line: entry[4].as_u64().unwrap(), - column: entry[5].as_u64().unwrap(), + begin_line: entry[4].as_u64().unwrap(), + begin_column: entry[5].as_u64().unwrap(), + end_line: entry[6].as_u64().unwrap(), + end_column: entry[7].as_u64().unwrap(), }, type_id, - rvalue: if entry[7].as_boolean().unwrap() { + rvalue: if entry[9].as_boolean().unwrap() { LRValue::RValue } else { LRValue::LValue }, macro_expansions, - extras: entry[9..].to_vec(), + extras: entry[11..].to_vec(), }; asts.insert(entry_id, node); diff --git a/c2rust-transpile/Cargo.toml b/c2rust-transpile/Cargo.toml index e96cd34fa7..10d35757f6 100644 --- a/c2rust-transpile/Cargo.toml +++ b/c2rust-transpile/Cargo.toml @@ -27,6 +27,7 @@ handlebars = "1.1.0" itertools = "0.8" pathdiff = "0.1.0" regex = "1" +smallvec = "0.6" strum = "0.15" strum_macros = "0.15" log = "0.4" diff --git a/c2rust-transpile/src/c_ast/conversion.rs b/c2rust-transpile/src/c_ast/conversion.rs index f0fc4c2af8..bdb1a2d84b 100644 --- a/c2rust-transpile/src/c_ast/conversion.rs +++ b/c2rust-transpile/src/c_ast/conversion.rs @@ -343,7 +343,7 @@ impl ConversionContext { fn convert(&mut self, untyped_context: &AstContext) -> () { for raw_comment in &untyped_context.comments { let comment = Located { - loc: Some(raw_comment.loc.clone()), + loc: Some(raw_comment.loc.into()), kind: raw_comment.string.clone(), }; self.typed_context.comments.push(comment); diff --git a/c2rust-transpile/src/c_ast/mod.rs b/c2rust-transpile/src/c_ast/mod.rs index 5d510d62ea..b4c3a84df6 100644 --- a/c2rust-transpile/src/c_ast/mod.rs +++ b/c2rust-transpile/src/c_ast/mod.rs @@ -1,14 +1,14 @@ use c2rust_ast_exporter::clang_ast::LRValue; use indexmap::{IndexMap, IndexSet}; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::cell::RefCell; +use std::collections::{HashMap, HashSet}; use std::cmp::Ordering; use std::fmt::{self, Debug, Display}; -use std::iter::FromIterator; use std::mem; use std::ops::Index; use std::path::{Path, PathBuf}; -pub use c2rust_ast_exporter::clang_ast::{SrcFile, SrcLoc}; +pub use c2rust_ast_exporter::clang_ast::{SrcFile, SrcLoc, SrcSpan}; #[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Copy, Clone)] pub struct CTypeId(pub u64); @@ -34,7 +34,6 @@ pub type CEnumConstantId = CDeclId; // Enum's need to point to child 'DeclKind:: pub use self::conversion::*; pub use self::print::Printer; -use super::diagnostics::Diagnostic; mod conversion; pub mod iterators; @@ -78,20 +77,19 @@ pub struct TypedAstContext { /// Comments associated with a typed AST context #[derive(Debug, Clone)] pub struct CommentContext { - decl_comments: HashMap>, - stmt_comments: HashMap>, + comments_by_file: HashMap>>>, } #[derive(Debug, Clone)] -pub struct DisplaySrcLoc { +pub struct DisplaySrcSpan { file: Option, - loc: SrcLoc, + loc: SrcSpan, } -impl Display for DisplaySrcLoc { +impl Display for DisplaySrcSpan { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if let Some(ref file) = self.file { - write!(f, "{}:{}:{}", file.display(), self.loc.line, self.loc.column) + write!(f, "{}:{}:{}", file.display(), self.loc.begin_line, self.loc.begin_column) } else { Debug::fmt(self, f) } @@ -103,10 +101,19 @@ pub type FileId = usize; /// Represents some AST node possibly with source location information bundled with it #[derive(Debug, Clone)] pub struct Located { - pub loc: Option, + pub loc: Option, pub kind: T, } +impl Located { + pub fn begin_loc(&self) -> Option { + self.loc.map(|loc| loc.begin()) + } + pub fn end_loc(&self) -> Option { + self.loc.map(|loc| loc.end()) + } +} + impl TypedAstContext { pub fn new(clang_files: &[SrcFile]) -> TypedAstContext { let mut files: Vec = vec![]; @@ -155,9 +162,9 @@ impl TypedAstContext { } } - pub fn display_loc(&self, loc: &Option) -> Option { + pub fn display_loc(&self, loc: &Option) -> Option { loc.as_ref().map(|loc| { - DisplaySrcLoc { + DisplaySrcSpan { file: self.files[self.file_map[loc.fileid as usize]].path.clone(), loc: loc.clone(), } @@ -209,7 +216,7 @@ impl TypedAstContext { } pub fn file_id(&self, located: &Located) -> Option { - located.loc.as_ref().map(|loc| self.file_map[loc.fileid as usize]) + located.loc.as_ref().and_then(|loc| self.file_map.get(loc.fileid as usize).copied()) } pub fn iter_decls(&self) -> indexmap::map::Iter { @@ -568,7 +575,7 @@ impl TypedAstContext { (None, None) => Ordering::Equal, (None, _) => Ordering::Less, (_, None) => Ordering::Greater, - (Some(a), Some(b)) => self.compare_src_locs(a, b), + (Some(a), Some(b)) => self.compare_src_locs(&a.begin(), &b.begin()), } }); self.c_decls_top = decls_top; @@ -578,133 +585,86 @@ impl TypedAstContext { impl CommentContext { pub fn empty() -> CommentContext { CommentContext { - decl_comments: HashMap::new(), - stmt_comments: HashMap::new(), + comments_by_file: HashMap::new(), } } - // Try to match up every comment with a declaration or a statement + /// Build a CommentContext from the comments in this `ast_context` pub fn new(ast_context: &mut TypedAstContext) -> CommentContext { - // Group and sort declarations by file and by position - let mut decls: HashMap> = HashMap::new(); - for (decl_id, ref loc_decl) in &ast_context.c_decls { - if let Some(ref loc) = loc_decl.loc { - decls - .entry(loc.fileid) - .or_insert(vec![]) - .push((loc.clone(), *decl_id)); - } - } - decls.iter_mut().for_each(|(_, v)| v.sort()); - - // Group and sort statements by file and by position - let mut stmts: HashMap> = HashMap::new(); - for (stmt_id, ref loc_stmt) in &ast_context.c_stmts { - if let Some(ref loc) = loc_stmt.loc { - stmts - .entry(loc.fileid) - .or_insert(vec![]) - .push((loc.clone(), *stmt_id)); - } - } - stmts.iter_mut().for_each(|(_, v)| v.sort()); - - let mut decl_comments_map: HashMap> = HashMap::new(); - let mut stmt_comments_map: HashMap> = HashMap::new(); - - let empty_vec1 = &vec![]; - let empty_vec2 = &vec![]; - - // Match comments to declarations and statements - while let Some(Located { loc, kind: comment }) = ast_context.comments.pop() { - if let Some(loc) = loc { - let this_file_decls = decls.get(&loc.fileid).unwrap_or(empty_vec1); - let this_file_stmts = stmts.get(&loc.fileid).unwrap_or(empty_vec2); - - // Find the closest declaration and statement - let decl_ix = this_file_decls - .binary_search_by_key(&loc.line, |&(ref l, _)| l.line) - .unwrap_or_else(|x| x); - let stmt_ix = this_file_stmts - .binary_search_by_key(&loc.line, |&(ref l, _)| l.line) - .unwrap_or_else(|x| x); - - let mut insert_decl_comment = |decl_id: CDeclId, loc, comment| { - let decl_id = if let CDeclKind::NonCanonicalDecl { canonical_decl } = ast_context[decl_id].kind { - canonical_decl - } else { - decl_id - }; - decl_comments_map - .entry(decl_id) - .or_insert(BTreeMap::new()) - .insert(loc, comment); - }; - let mut insert_stmt_comment = |stmt_id: CStmtId, loc, comment| { - stmt_comments_map - .entry(stmt_id) - .or_insert(BTreeMap::new()) - .insert(loc, comment); - }; - - // Prefer the one that is higher up (biasing towards declarations if there is a tie) - match (this_file_decls.get(decl_ix), this_file_stmts.get(stmt_ix)) { - (Some(&(ref l1, d)), Some(&(ref l2, s))) => { - if l1 > l2 { - insert_stmt_comment(s, loc, comment); - } else { - insert_decl_comment(d, loc, comment); - } - } - (Some(&(_, d)), None) => { - insert_decl_comment(d, loc, comment); - } - (None, Some(&(_, s))) => { - insert_stmt_comment(s, loc, comment); - } - (None, None) => { - diag!( - Diagnostic::Comments, - "Didn't find a target node for the comment '{}'", - comment, - ); - } - }; + let mut comments_by_file: HashMap>> = HashMap::new(); + + // Group comments by their file + for comment in &ast_context.comments { + // Comments without a valid FileId are probably clang + // compiler-internal definitions + if let Some(file_id) = ast_context.file_id(&comment) { + comments_by_file + .entry(file_id) + .or_default() + .push(comment.clone()); } } - // Flatten out the nested comment maps - let decl_comments = decl_comments_map - .into_iter() - .map(|(decl_id, map)| { - let mut comments = Vec::from_iter(map); - // Sort comments attached to this decl by source location - comments.sort_unstable_by(|a, b| { - ast_context.compare_src_locs(&a.0, &b.0) - }); + // Sort in REVERSE! Last element is the first in file source + // ordering. This makes it easy to pop the next comment off. + for comments in comments_by_file.values_mut() { + comments.sort_by(|a, b| { + ast_context.compare_src_locs( + &b.loc.unwrap().begin(), + &a.loc.unwrap().begin(), + ) + }); + } - (decl_id, comments.into_iter().map(|(_, v)| v).collect()) - }) - .collect(); - let stmt_comments = stmt_comments_map + let comments_by_file = comments_by_file .into_iter() - .map(|(decl_id, map)| (decl_id, map.into_iter().map(|(_, v)| v).collect())) + .map(|(k, v)| (k, RefCell::new(v))) .collect(); CommentContext { - decl_comments, - stmt_comments, + comments_by_file, } } - // Extract the comment for a given declaration - pub fn get_decl_comment(&self, decl_id: CDeclId) -> Option<&[String]> { - self.decl_comments.get(&decl_id).map(Vec::as_ref) + pub fn get_comments_before(&self, loc: SrcLoc, ctx: &TypedAstContext) -> Vec { + let file_id = ctx.file_map[loc.fileid as usize]; + let mut extracted_comments = vec![]; + let mut comments = match self.comments_by_file.get(&file_id) { + None => return extracted_comments, + Some(comments) => comments.borrow_mut(), + }; + loop { + if comments.is_empty() { break; } + let next_comment_loc = comments + .last() + .unwrap() + .begin_loc() + .expect("All comments must have a source location"); + if ctx.compare_src_locs(&next_comment_loc, &loc) != Ordering::Less { + break; + } + + extracted_comments.push(comments.pop().unwrap().kind); + } + extracted_comments + } + + pub fn get_comments_before_located( + &self, + located: &Located, + ctx: &TypedAstContext, + ) -> Vec { + match located.begin_loc() { + None => vec![], + Some(loc) => self.get_comments_before(loc, ctx), + } } - // Extract the comment for a given statement - pub fn get_stmt_comment(&self, stmt_id: CStmtId) -> Option<&[String]> { - self.stmt_comments.get(&stmt_id).map(Vec::as_ref) + pub fn get_remaining_comments(&mut self, file_id: FileId) -> Vec { + match self.comments_by_file.remove(&file_id) { + Some(comments) => comments.into_inner().into_iter().map(|c| c.kind).collect(), + None => vec![], + } } } diff --git a/c2rust-transpile/src/cfg/mod.rs b/c2rust-transpile/src/cfg/mod.rs index f529204731..f75654cf43 100644 --- a/c2rust-transpile/src/cfg/mod.rs +++ b/c2rust-transpile/src/cfg/mod.rs @@ -1378,6 +1378,17 @@ impl CfgBuilder { // Entry label entry: Label, ) -> Result, TranslationError> { + let add_comments_before = |loc: Option, wip: &mut WipBlock| { + if let Some(loc) = loc { + let comments = translator + .comment_context + .get_comments_before(loc, &translator.ast_context); + for cmmt in comments { + wip.push_comment(cmmt); + } + } + }; + // Add to the per_stmt_stack let live_in: IndexSet = self.currently_live.last().unwrap().clone(); self.per_stmt_stack @@ -1386,11 +1397,7 @@ impl CfgBuilder { let mut wip = self.new_wip_block(entry); // Add statement comment into current block right before the current statement - if let Some(comments) = translator.comment_context.get_stmt_comment(stmt_id) { - for cmmt in comments { - wip.push_comment(cmmt.clone()); - } - } + add_comments_before(translator.ast_context[stmt_id].begin_loc(), &mut wip); let out_wip: Result, TranslationError> = match translator.ast_context.index(stmt_id).kind { @@ -1405,14 +1412,7 @@ impl CfgBuilder { .insert(*decl, info); // Add declaration comment into current block right before the declaration - if let Some(comments) = translator - .comment_context - .get_decl_comment(*decl) - { - for cmmt in comments { - wip.push_comment(cmmt.clone()); - } - } + add_comments_before(translator.ast_context[*decl].begin_loc(), &mut wip); wip.push_decl(*decl); wip.defined.insert(*decl); @@ -1430,6 +1430,9 @@ impl CfgBuilder { wip.extend(stmts); wip.push_stmt(mk().expr_stmt(mk().return_expr(ret_val))); + // Add any comments left before the end of the return + add_comments_before(translator.ast_context[stmt_id].end_loc(), &mut wip); + self.add_wip_block(wip, End); Ok(None) @@ -1451,6 +1454,9 @@ impl CfgBuilder { // Condition let (stmts, val) = translator.convert_condition(ctx, true, scrutinee)?.discard_unsafe(); wip.extend(stmts); + // Add any comments left before the end of the condition + add_comments_before(translator.ast_context[scrutinee].end_loc(), &mut wip); + let cond_val = translator.ast_context[scrutinee].kind.get_bool(); self.add_wip_block( wip, @@ -1507,6 +1513,12 @@ impl CfgBuilder { let cond_val = translator.ast_context[condition].kind.get_bool(); let mut cond_wip = self.new_wip_block(cond_entry); cond_wip.extend(stmts); + // Add any comments left before the end of the condition + add_comments_before( + translator.ast_context[condition].end_loc(), + &mut cond_wip, + ); + self.add_wip_block( cond_wip, match cond_val { @@ -1573,6 +1585,11 @@ impl CfgBuilder { let cond_val = translator.ast_context[condition].kind.get_bool(); let mut cond_wip = self.new_wip_block(cond_entry); cond_wip.extend(stmts); + // Add any comments left before the end of the condition + add_comments_before( + translator.ast_context[condition].end_loc(), + &mut cond_wip, + ); self.add_wip_block( cond_wip, match cond_val { @@ -1624,6 +1641,11 @@ impl CfgBuilder { let cond_val = translator.ast_context[cond].kind.get_bool(); let mut cond_wip = slf.new_wip_block(cond_entry); cond_wip.extend(stmts); + // Add any comments left before the end of the condition + add_comments_before( + translator.ast_context[cond].end_loc(), + &mut cond_wip, + ); slf.add_wip_block( cond_wip, match cond_val { @@ -1662,6 +1684,11 @@ impl CfgBuilder { let incr_stmts = translator.convert_expr(ctx.unused(), incr)?.into_stmts(); let mut incr_wip = slf.new_wip_block(incr_entry); incr_wip.extend(incr_stmts); + // Add any comments left before the end of the increment + add_comments_before( + translator.ast_context[incr].end_loc(), + &mut incr_wip, + ); slf.add_wip_block(incr_wip, Jump(cond_entry)); } } @@ -1731,6 +1758,11 @@ impl CfgBuilder { } wip.extend(translator.convert_expr(ctx.unused(), expr)?.into_stmts()); + // Add any comments left before the end of the expr + add_comments_before( + translator.ast_context[expr].end_loc(), + &mut wip, + ); // If we can tell the expression is going to diverge, there is no falling through to // the next block. @@ -1831,6 +1863,11 @@ impl CfgBuilder { .convert_expr(ctx.used(), scrutinee)? .discard_unsafe(); wip.extend(stmts); + // Add any comments left before the end of the condition + add_comments_before( + translator.ast_context[scrutinee].end_loc(), + &mut wip, + ); let wip_label = wip.label; self.add_wip_block(wip, End); // NOTE: the `End` here is temporary and gets updated @@ -1895,7 +1932,14 @@ impl CfgBuilder { Ok(Some(wip)) } }; - let out_wip: Option = out_wip?; // This statement exists to help type inference... + let mut out_wip: Option = out_wip?; // This statement exists to help type inference... + if let Some(wip) = &mut out_wip { + // Add any comments left before the end of the statement + add_comments_before( + translator.ast_context[stmt_id].end_loc(), + wip, + ); + } let out_end = self.fresh_label(); let out_wip: Option = out_wip.map(|w| { diff --git a/c2rust-transpile/src/cfg/structures.rs b/c2rust-transpile/src/cfg/structures.rs index 5e7ad72318..90adbad0c2 100644 --- a/c2rust-transpile/src/cfg/structures.rs +++ b/c2rust-transpile/src/cfg/structures.rs @@ -2,7 +2,7 @@ use super::*; -use crate::rust_ast::comment_store; +use crate::rust_ast::{comment_store, pos_to_span}; /// Convert a sequence of structures produced by Relooper back into Rust statements pub fn structured_cfg( @@ -382,6 +382,16 @@ impl StructureState { ) { use crate::cfg::structures::StructuredAST::*; + let span = match ast { + // Singleton is handled below + Empty | Singleton(_) | Append(..) => DUMMY_SP, + + _ => comment_store + .add_comments(&queued_comments) + .map(pos_to_span) + .unwrap_or(DUMMY_SP), + }; + match ast { Empty => {} @@ -391,7 +401,10 @@ impl StructureState { } } Singleton(StmtOrComment::Stmt(mut s)) => { - s.span = comment_store.add_comment_lines(&queued_comments).unwrap_or(s.span); + s.span = comment_store + .add_comments(&queued_comments) + .map(pos_to_span) + .unwrap_or(s.span); queued_comments.clear(); output.push(s); } @@ -404,7 +417,6 @@ impl StructureState { Goto(to) => { // Assign to `current_block` the next label we want to go to. - let s = comment_store.add_comment_lines(&queued_comments).unwrap_or(DUMMY_SP); queued_comments.clear(); let lbl_expr = if self.debug_labels { @@ -413,7 +425,7 @@ impl StructureState { to.to_num_expr() }; output.push( - mk().span(s) + mk().span(span) .semi_stmt(mk().assign_expr(self.current_block.clone(), lbl_expr)), ); } @@ -421,7 +433,6 @@ impl StructureState { Match(cond, cases) => { // Make a `match`. - let s = comment_store.add_comment_lines(&queued_comments).unwrap_or(DUMMY_SP); queued_comments.clear(); let arms: Vec = cases @@ -440,7 +451,7 @@ impl StructureState { let e = mk().match_expr(cond, arms); - output.push(mk().span(s).expr_stmt(e)); + output.push(mk().span(span).expr_stmt(e)); } If(cond, then, els) => { @@ -451,7 +462,6 @@ impl StructureState { // * `if { } else { .. }` turns into `if ! { .. }` // - let s = comment_store.add_comment_lines(&queued_comments).unwrap_or(DUMMY_SP); queued_comments.clear(); let then = { @@ -503,14 +513,13 @@ impl StructureState { } }; - if_stmt.span = s; + if_stmt.span = span; output.push(if_stmt); } GotoTable(cases, then) => { // Dispatch based on the next `current_block` value. - let s = comment_store.add_comment_lines(&queued_comments).unwrap_or(DUMMY_SP); queued_comments.clear(); let mut arms: Vec = cases @@ -547,7 +556,7 @@ impl StructureState { let e = mk().match_expr(self.current_block.clone(), arms); - output.push(mk().span(s).expr_stmt(e)); + output.push(mk().span(span).expr_stmt(e)); } Loop(lbl, body) => { @@ -556,7 +565,6 @@ impl StructureState { // * Loops that start with an `if { break; }` get converted into `while` loops // - let s = comment_store.add_comment_lines(&queued_comments).unwrap_or(DUMMY_SP); queued_comments.clear(); let body = { @@ -590,7 +598,7 @@ impl StructureState { mk().block(body.iter().skip(1).cloned().collect()), lbl.map(|l| l.pretty_print()), ); - output.push(mk().span(s).expr_stmt(e)); + output.push(mk().span(span).expr_stmt(e)); return; } } @@ -601,13 +609,12 @@ impl StructureState { let e = mk().loop_expr(mk().block(body), lbl.map(|l| l.pretty_print())); - output.push(mk().span(s).expr_stmt(e)); + output.push(mk().span(span).expr_stmt(e)); } Exit(exit_style, lbl) => { // Make a (possibly labelled) `break` or `continue`. - let s = comment_store.add_comment_lines(&queued_comments).unwrap_or(DUMMY_SP); queued_comments.clear(); let lbl = lbl.map(|l| l.pretty_print()); @@ -616,7 +623,7 @@ impl StructureState { ExitStyle::Continue => mk().continue_expr(lbl), }; - output.push(mk().span(s).semi_stmt(e)); + output.push(mk().span(span).semi_stmt(e)); } } } diff --git a/c2rust-transpile/src/diagnostics.rs b/c2rust-transpile/src/diagnostics.rs index e66f097b1f..57d9cba028 100644 --- a/c2rust-transpile/src/diagnostics.rs +++ b/c2rust-transpile/src/diagnostics.rs @@ -8,7 +8,7 @@ use std::io; use std::str::FromStr; use std::sync::Arc; -use crate::c_ast::DisplaySrcLoc; +use crate::c_ast::DisplaySrcSpan; use c2rust_ast_exporter::get_clang_major_version; const DEFAULT_WARNINGS: &[Diagnostic] = &[]; @@ -20,6 +20,7 @@ pub enum Diagnostic { Comments, } +#[allow(unused_macros)] macro_rules! diag { ($type:path, $($arg:tt)*) => (warn!(target: &$type.to_string(), $($arg)*)) } @@ -68,7 +69,7 @@ pub fn init(mut enabled_warnings: HashSet, log_level: log::LevelFilt #[derive(Debug, Clone)] pub struct TranslationError { - loc: Vec, + loc: Vec, inner: Arc>, } @@ -148,7 +149,7 @@ impl TranslationError { self.inner.get_context().clone() } - pub fn new(loc: Option, inner: Context) -> Self { + pub fn new(loc: Option, inner: Context) -> Self { let mut loc_stack = vec![]; if let Some(loc) = loc { loc_stack.push(loc.clone()); @@ -166,7 +167,7 @@ impl TranslationError { } } - pub fn add_loc(mut self, loc: Option) -> Self { + pub fn add_loc(mut self, loc: Option) -> Self { if let Some(loc) = loc { self.loc.push(loc); } diff --git a/c2rust-transpile/src/rust_ast/comment_store.rs b/c2rust-transpile/src/rust_ast/comment_store.rs index c10b9f259f..900e3868e0 100644 --- a/c2rust-transpile/src/rust_ast/comment_store.rs +++ b/c2rust-transpile/src/rust_ast/comment_store.rs @@ -22,29 +22,55 @@ //! let updated_module: Mod = trav.traverse_mod(module); //! let updated_cmmt_store = trav.into_comment_store(); //! ``` +//! +//! Comments can currently be attached and printed in the following positions +//! (see pprust.rs for current details): +//! +//! Before the following AST elements: +//! - Lit +//! - Attribute +//! - Ty +//! - ForeignItem +//! - Item +//! - Variant +//! - Field +//! - TraitItem +//! - ImplItem +//! - Stmt +//! - Block +//! - Path +//! - Arm +//! +//! Trailing comments can be printed after the following elements hi pos, but on +//! the same line: +//! - Stmt +//! - Comma separated Expr +//! - Field +//! +//! Before the close of a Block -use crate::rust_ast::traverse; +use crate::rust_ast::{pos_to_span, traverse}; use itertools::Itertools; +use smallvec::{smallvec, SmallVec}; use std::collections::BTreeMap; use syntax::ast::*; use syntax::parse::lexer::comments; -use syntax_pos::hygiene::SyntaxContext; -use syntax_pos::{BytePos, Span, DUMMY_SP}; +use syntax_pos::BytePos; pub struct CommentStore { - /// The `Span` keys do _not_ correspond to the comment position. Instead, they refer to the - /// `Span` of whatever is associated with the comment. - output_comments: BTreeMap, + /// The `BytePos` keys do _not_ correspond to the comment position. Instead, they refer to the + /// `BytePos` of whatever is associated with the comment. + output_comments: BTreeMap>, /// Monotonically increasing source of new byte positions. - span_source: u32, + current_position: u32, } impl CommentStore { pub fn new() -> Self { CommentStore { output_comments: BTreeMap::new(), - span_source: 0, + current_position: 0, } } @@ -57,32 +83,51 @@ impl CommentStore { /// Convert the comment context into the accumulated (and ordered) `libsyntax` comments. pub fn into_comments(self) -> Vec { - self.output_comments.into_iter().map(|(_, v)| v).collect() - } + self.output_comments.into_iter().map(|(_, v)| v).flatten().collect() + } + + /// Add comments at the specified position, then return the `BytePos` that + /// should be given to something we want associated with this comment. If + /// `pos` is None, or a comment is not found in the given position, use the + /// current position instead. + fn insert_comments(&mut self, mut new_comments: SmallVec<[comments::Comment; 1]>, pos: Option) -> BytePos { + if let Some(pos) = pos { + if let Some(comments) = self.output_comments.get_mut(&pos) { + comments.extend(new_comments); + return pos; + } + } - /// Add a `Comment` at the current position, then return the `Span` that should be given to - /// something we want associated with this comment. - pub fn add_comment(&mut self, mut cmmt: comments::Comment) -> Span { - // This line is not necessary. All it does is prevent the confusing situation where comments - // have exactly the same position as some AST node to which they are _not_ related. - self.span_source += 1; + // This line is not necessary. All it does is prevent the confusing + // situation where comments have exactly the same position as some AST + // node to which they are _not_ related. + self.current_position += 1; // The position of the comment has to be LESS than the span of the AST node it annotates. - cmmt.pos = BytePos(self.span_source); - self.span_source += 1; - let sp = Span::new( - BytePos(self.span_source), - BytePos(self.span_source), - SyntaxContext::empty(), - ); - - self.output_comments.insert(sp, cmmt); - sp + for cmmt in &mut new_comments { + cmmt.pos = BytePos(self.current_position); + self.current_position += 1; + } + + let new_pos = BytePos(self.current_position); + self.output_comments.insert(new_pos, new_comments); + new_pos } /// Add a comment at the current position, then return the `Span` that should be given to /// something we want associated with this comment. - pub fn add_comment_lines(&mut self, lines: &[String]) -> Option { + pub fn add_comments(&mut self, lines: &[String]) -> Option { + self.extend_existing_comments(lines, None) + } + + /// Add a comment at the specified position, then return the `BytePos` that + /// should be given to something we want associated with this comment. If + /// pos is None, use the current position. + pub fn extend_existing_comments( + &mut self, + lines: &[String], + pos: Option, + ) -> Option { fn translate_comment(comment: &String) -> String { comment .lines() @@ -105,25 +150,26 @@ impl CommentStore { if lines.is_empty() { None } else { - Some(self.add_comment(comments::Comment { + let new_comment = comments::Comment { style: comments::CommentStyle::Isolated, lines: lines, pos: BytePos(0), // overwritten in `add_comment` - })) + }; + Some(self.insert_comments(smallvec![new_comment], pos)) } } } pub struct CommentTraverser { - old_comments: BTreeMap, + old_comments: BTreeMap>, store: CommentStore, } impl CommentTraverser { - fn reinsert_comment_at(&mut self, sp: Span) -> Span { - if let Some(cmmt) = self.old_comments.remove(&sp) { - self.store.add_comment(cmmt) + fn reinsert_comment_at(&mut self, sp: BytePos) -> BytePos { + if let Some(cmmts) = self.old_comments.remove(&sp) { + self.store.insert_comments(cmmts, None) } else { - DUMMY_SP + sp } } @@ -134,49 +180,32 @@ impl CommentTraverser { } } -impl traverse::Traversal for CommentTraverser { - fn traverse_stmt(&mut self, mut s: Stmt) -> Stmt { - s.span = self.reinsert_comment_at(s.span); - traverse::traverse_stmt_def(self, s) - } - - fn traverse_expr(&mut self, mut e: Expr) -> Expr { - e.span = self.reinsert_comment_at(e.span); - traverse::traverse_expr_def(self, e) - } - - fn traverse_trait_item(&mut self, mut ti: TraitItem) -> TraitItem { - ti.span = self.reinsert_comment_at(ti.span); - traverse::traverse_trait_item_def(self, ti) - } - - fn traverse_impl_item(&mut self, mut ii: ImplItem) -> ImplItem { - ii.span = self.reinsert_comment_at(ii.span); - traverse::traverse_impl_item_def(self, ii) - } - - fn traverse_block(&mut self, mut b: Block) -> Block { - b.span = self.reinsert_comment_at(b.span); - traverse::traverse_block_def(self, b) - } - - fn traverse_local(&mut self, mut l: Local) -> Local { - l.span = self.reinsert_comment_at(l.span); - traverse::traverse_local_def(self, l) - } - - fn traverse_field(&mut self, mut f: Field) -> Field { - f.span = self.reinsert_comment_at(f.span); - traverse::traverse_field_def(self, f) - } +macro_rules! reinsert_and_traverse { + ($fn:ident, $ty:ty, $traverse:path) => { + fn $fn(&mut self, mut x: $ty) -> $ty { + let orig = x.span.data(); + x.span = pos_to_span(self.reinsert_comment_at(orig.lo)); + x = $traverse(self, x); + if orig.lo != orig.hi { + x.span = x.span.with_hi(self.reinsert_comment_at(orig.hi)); + } + x + } + }; +} - fn traverse_item(&mut self, mut i: Item) -> Item { - i.span = self.reinsert_comment_at(i.span); - traverse::traverse_item_def(self, i) - } +impl traverse::Traversal for CommentTraverser { + reinsert_and_traverse!(traverse_stmt, Stmt, traverse::traverse_stmt_def); + reinsert_and_traverse!(traverse_expr, Expr, traverse::traverse_expr_def); + reinsert_and_traverse!(traverse_trait_item, TraitItem, traverse::traverse_trait_item_def); + reinsert_and_traverse!(traverse_impl_item, ImplItem, traverse::traverse_impl_item_def); + reinsert_and_traverse!(traverse_block, Block, traverse::traverse_block_def); + reinsert_and_traverse!(traverse_local, Local, traverse::traverse_local_def); + reinsert_and_traverse!(traverse_field, Field, traverse::traverse_field_def); + reinsert_and_traverse!(traverse_item, Item, traverse::traverse_item_def); fn traverse_foreign_item(&mut self, mut i: ForeignItem) -> ForeignItem { - i.span = self.reinsert_comment_at(i.span); + i.span = pos_to_span(self.reinsert_comment_at(i.span.lo())); i } } diff --git a/c2rust-transpile/src/rust_ast/mod.rs b/c2rust-transpile/src/rust_ast/mod.rs index 5aedc08fa5..56ab94d290 100644 --- a/c2rust-transpile/src/rust_ast/mod.rs +++ b/c2rust-transpile/src/rust_ast/mod.rs @@ -1,3 +1,11 @@ pub mod comment_store; pub mod item_store; pub mod traverse; + +use syntax_pos::{BytePos, Span}; +use syntax_pos::hygiene::SyntaxContext; + +/// Make a new span at `pos` +pub fn pos_to_span(pos: BytePos) -> Span { + Span::new(pos, pos, SyntaxContext::empty()) +} diff --git a/c2rust-transpile/src/translator/mod.rs b/c2rust-transpile/src/translator/mod.rs index 89b21d8f35..c40a47e9e6 100644 --- a/c2rust-transpile/src/translator/mod.rs +++ b/c2rust-transpile/src/translator/mod.rs @@ -20,6 +20,7 @@ use syntax::{ast, with_globals}; use syntax_pos::{Span, DUMMY_SP}; use syntax_pos::edition::Edition; +use crate::rust_ast::pos_to_span; use crate::rust_ast::comment_store::CommentStore; use crate::rust_ast::item_store::ItemStore; use crate::rust_ast::traverse::Traversal; @@ -737,8 +738,6 @@ pub fn translate( let translation = to_string(|s| { print_header(s, &t)?; - // Re-order comments - let mut traverser = t.comment_store.into_inner().into_comment_traverser(); let mut mod_items: Vec> = Vec::new(); // Keep track of new uses we need while building header submodules @@ -747,19 +746,32 @@ pub fn translate( // Header Reorganization: Submodule Item Stores for (file_id, ref mut mod_item_store) in t.items.borrow_mut().iter_mut() { if *file_id != t.main_file { - mod_items.push(make_submodule( + let mut submodule = make_submodule( &t.ast_context, mod_item_store, *file_id, &mut new_uses, &t.mod_names, - )); + ); + let comments = t.comment_context.get_remaining_comments(*file_id); + submodule.span = match t + .comment_store + .borrow_mut() + .add_comments(&comments) + { + Some(pos) => submodule.span.with_hi(pos), + None => submodule.span, + }; + mod_items.push(submodule); } } // Main file item store let (items, foreign_items, uses) = t.items.borrow_mut()[&t.main_file].drain(); + // Re-order comments + let mut traverser = t.comment_store.into_inner().into_comment_traverser(); + // Add a comment mapping span to each node that should have a // comment printed before it. The pretty printer picks up these // spans and uses them to decide when to emit comments. @@ -776,9 +788,12 @@ pub fn translate( .map(|p_i| p_i.map(|i| traverser.traverse_item(i))) .collect(); + let mut reordered_comment_store = traverser.into_comment_store(); + let remaining_comments = t.comment_context.get_remaining_comments(t.main_file); + reordered_comment_store.add_comments(&remaining_comments); s.comments() .get_or_insert(vec![]) - .extend(traverser.into_comment_store().into_comments()); + .extend(reordered_comment_store.into_comments()); for mod_item in mod_items { s.print_item(&*mod_item)?; @@ -807,6 +822,8 @@ pub fn translate( s.print_item(&*x)?; } + s.print_remaining_comments()?; + Ok(()) }); (translation, pragmas, crates) @@ -1083,6 +1100,34 @@ impl<'c> Translation<'c> { mk().mac_expr(mk().mac(vec![macro_name], macro_msg, MacDelimiter::Parenthesis)) } + fn get_comment_span_before(&self, src_loc: Option) -> Span { + if let Some(src_loc) = src_loc { + let comments = self.comment_context + .get_comments_before(src_loc.begin(), &self.ast_context); + self.comment_store + .borrow_mut() + .add_comments(&comments) + .map(pos_to_span) + .unwrap_or(DUMMY_SP) + } else { + DUMMY_SP + } + } + + fn extend_comment_span_end(&self, src_loc: Option, span: Span) -> Span { + if let Some(src_loc) = src_loc { + let comments = self + .comment_context + .get_comments_before(src_loc.end(), &self.ast_context); + match self.comment_store.borrow_mut().add_comments(&comments) { + Some(pos) => span.with_hi(pos), + None => span, + } + } else { + span + } + } + fn mk_cross_check(&self, mk: Builder, args: Vec<&str>) -> Builder { if self.tcfg.cross_checks { mk.call_attr("cross_check", args) @@ -1316,18 +1361,14 @@ impl<'c> Translation<'c> { ctx: ExprContext, decl_id: CDeclId, ) -> Result { - let mut s = self - .comment_context - .get_decl_comment(decl_id) - .and_then(|decl_cmt| self.comment_store.borrow_mut().add_comment_lines(decl_cmt)) - .unwrap_or(DUMMY_SP); - let decl = self .ast_context .get_decl(&decl_id) .ok_or_else(|| format_err!("Missing decl {:?}", decl_id))?; - let _src_loc = &decl.loc; + let mut s = self.get_comment_span_before(decl.loc); + + // let src_loc = &decl.loc; match decl.kind { CDeclKind::Struct { fields: None, .. } @@ -1342,6 +1383,8 @@ impl<'c> Translation<'c> { .borrow() .resolve_decl_name(decl_id) .unwrap(); + + let s = self.extend_comment_span_end(decl.loc, s); let extern_item = mk().span(s).pub_().ty_foreign_item(name); Ok(ConvertedDecl::ForeignItem(extern_item)) } @@ -1459,6 +1502,7 @@ impl<'c> Translation<'c> { let repr_attr = mk().meta_item(vec!["repr"], MetaItemKind::List(reprs)); + let s = self.extend_comment_span_end(decl.loc, s); Ok(ConvertedDecl::Item( mk().span(s) .pub_() @@ -1498,6 +1542,7 @@ impl<'c> Translation<'c> { } } + let s = self.extend_comment_span_end(decl.loc, s); Ok(if field_syns.is_empty() { // Empty unions are a GNU extension, but Rust doesn't allow empty unions. ConvertedDecl::Item( @@ -1532,6 +1577,7 @@ impl<'c> Translation<'c> { .resolve_decl_name(decl_id) .expect("Enums should already be renamed"); let ty = self.convert_type(integral_type.ctype)?; + let s = self.extend_comment_span_end(decl.loc, s); Ok(ConvertedDecl::Item( mk().span(s).pub_().type_item(enum_name, ty), )) @@ -1557,6 +1603,7 @@ impl<'c> Translation<'c> { } }; + let s = self.extend_comment_span_end(decl.loc, s); Ok(ConvertedDecl::Item( mk().span(s).pub_().const_item(name, ty, val), )) @@ -1618,14 +1665,14 @@ impl<'c> Translation<'c> { let is_main = self.ast_context.c_main == Some(decl_id); let converted_function = self.convert_function( - ctx, s, is_global, is_inline, is_main, is_var, is_extern, new_name, name, - &args, ret, body, attrs, + ctx, &decl, s, is_global, is_inline, is_main, is_var, is_extern, + new_name, name, &args, ret, body, attrs, ); converted_function.or_else(|e| match self.tcfg.replace_unsupported_decls { ReplaceMode::Extern if body.is_none() => self.convert_function( - ctx, s, is_global, false, is_main, is_var, is_extern, new_name, name, - &args, ret, None, attrs, + ctx, &decl, s, is_global, false, is_main, is_var, is_extern, + new_name, name, &args, ret, None, attrs, ), _ => Err(e), }) @@ -1740,10 +1787,16 @@ impl<'c> Translation<'c> { let comment = String::from("// Initialized in run_static_initializers"); // REVIEW: We might want to add the comment to the original span comments + let comment_pos = if s == DUMMY_SP { + None + } else { + Some(s.lo()) + }; s = self .comment_store .borrow_mut() - .add_comment_lines(&[comment]) + .extend_existing_comments(&[comment], comment_pos) + .map(pos_to_span) .unwrap_or(s); self.add_static_initializer_to_section(new_name, typ, &mut init)?; @@ -1901,6 +1954,7 @@ impl<'c> Translation<'c> { fn convert_function( &self, ctx: ExprContext, + c_decl: &CDecl, span: Span, is_global: bool, is_inline: bool, @@ -2024,7 +2078,8 @@ impl<'c> Translation<'c> { _ => panic!("function body expects to be a compound statement"), }; body_stmts.append(&mut self.convert_function_body(ctx, name, body_ids, ret)?); - let block = stmts_block(body_stmts); + let mut block = stmts_block(body_stmts); + block.span = self.extend_comment_span_end(c_decl.loc, block.span); // Only add linkage attributes if the function is `extern` let mut mk_ = if is_main { @@ -2334,7 +2389,8 @@ impl<'c> Translation<'c> { let span = self .comment_store .borrow_mut() - .add_comment_lines(&[comment]) + .add_comments(&[comment]) + .map(pos_to_span) .unwrap_or(DUMMY_SP); let static_item = mk().span(span) @@ -4305,7 +4361,7 @@ impl<'c> Translation<'c> { let decl_file_id = self.ast_context.file_id(decl); if self.tcfg.reorganize_definitions { - add_src_loc_attr(&mut item.attrs, &decl.loc); + add_src_loc_attr(&mut item.attrs, &decl.loc.as_ref().map(|x| x.begin())); let mut item_stores = self.items.borrow_mut(); let items = item_stores .entry(decl_file_id.unwrap()) @@ -4323,7 +4379,7 @@ impl<'c> Translation<'c> { let decl_file_id = self.ast_context.file_id(decl); if self.tcfg.reorganize_definitions { - add_src_loc_attr(&mut item.attrs, &decl.loc); + add_src_loc_attr(&mut item.attrs, &decl.loc.as_ref().map(|x| x.begin())); let mut items = self.items.borrow_mut(); let mod_block_items = items .entry(decl_file_id.unwrap()) From 68774b55d94d6f352c228946eaa844c85e28081f Mon Sep 17 00:00:00 2001 From: Stephen Crane Date: Tue, 30 Jul 2019 12:56:41 -0700 Subject: [PATCH 02/15] WIP track comments by spans and assign locations before translation --- c2rust-refactor/src/rewrite/base.rs | 19 +-- c2rust-transpile/src/c_ast/iterators.rs | 41 ++++- c2rust-transpile/src/c_ast/mod.rs | 12 +- c2rust-transpile/src/cfg/mod.rs | 120 +++++++------ c2rust-transpile/src/cfg/relooper.rs | 10 ++ c2rust-transpile/src/cfg/structures.rs | 161 +++++++++++------- c2rust-transpile/src/lib.rs | 1 + .../src/rust_ast/comment_store.rs | 7 + c2rust-transpile/src/translator/mod.rs | 120 ++++++++----- tests/comments/src/comments.c | 8 +- tests/comments/src/comments.h | 3 + tests/comments/src/test_comments.rs | 2 +- tests/gotos/src/idiomatic_nested_loops.c | 13 ++ 13 files changed, 340 insertions(+), 177 deletions(-) diff --git a/c2rust-refactor/src/rewrite/base.rs b/c2rust-refactor/src/rewrite/base.rs index a4027cc764..7ae363b3bc 100644 --- a/c2rust-refactor/src/rewrite/base.rs +++ b/c2rust-refactor/src/rewrite/base.rs @@ -220,7 +220,7 @@ where diff::Result::Left(_) => { // There's an item on the left corresponding to nothing on the right. // Delete the item from the left. - let old_span = rewind_span_over_whitespace(ast(&old[i]).splice_span(), &rcx); + let old_span = ast(&old[i]).splice_span(); // let old_span = ast(&old[i]).splice_span(); let old_span = match old_ids[i] { SeqItemId::Node(id) => extend_span_comments(&id, old_span, &rcx), @@ -482,7 +482,7 @@ pub fn extend_span_comments(id: &NodeId, span: Span, rcx: &RewriteCtxt) -> Span /// Extend a node span to cover comments around it. Returns Ok(span) if all /// comments were covered, and Err(span) if only some could be covered. -pub fn extend_span_comments_strict(id: &NodeId, span: Span, rcx: &RewriteCtxt) -> Result { +pub fn extend_span_comments_strict(id: &NodeId, mut span: Span, rcx: &RewriteCtxt) -> Result { let source_map = rcx.session().source_map(); let comments = match rcx.comments().get(id) { @@ -516,11 +516,11 @@ pub fn extend_span_comments_strict(id: &NodeId, span: Span, rcx: &RewriteCtxt) - let mut all_matched = true; - let mut span = rewind_span_over_whitespace(span, rcx); for comment in &before { + let cur_span = rewind_span_over_whitespace(span, rcx); let comment_size = usize::sum(comment.lines.iter().map(|l| l.len())); - let comment_start = BytePos::from_usize(span.lo().to_usize() - comment_size); - let comment_span = span.shrink_to_lo().with_lo(comment_start); + let comment_start = BytePos::from_usize(cur_span.lo().to_usize() - comment_size); + let comment_span = cur_span.shrink_to_lo().with_lo(comment_start); let source = match source_map.span_to_snippet(comment_span) { Ok(snippet) => snippet, Err(_) => { @@ -529,15 +529,14 @@ pub fn extend_span_comments_strict(id: &NodeId, span: Span, rcx: &RewriteCtxt) - } }; let matches = source.lines().zip(&comment.lines).all(|(src_line, comment_line)| { + if src_line.trim() != comment_line.trim() { + debug!("comment {:?} did not match source {:?}", comment_line, src_line); + } src_line.trim() == comment_line.trim() }); if matches { - // Extend to previous newline because this is an isolated comment - let comment_span = rewind_span_over_whitespace(comment_span, rcx); - - span = span.with_lo(comment_span.lo()); + span = cur_span.with_lo(comment_span.lo()); } else { - debug!("comment {:?} did not match source {:?}", comment, source); all_matched = false; break; } diff --git a/c2rust-transpile/src/c_ast/iterators.rs b/c2rust-transpile/src/c_ast/iterators.rs index 382db87f0f..6efea10c2e 100644 --- a/c2rust-transpile/src/c_ast/iterators.rs +++ b/c2rust-transpile/src/c_ast/iterators.rs @@ -1,6 +1,6 @@ use crate::c_ast::*; -#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] pub enum SomeId { Stmt(CStmtId), Expr(CExprId), @@ -291,7 +291,7 @@ fn immediate_children(context: &TypedAstContext, s_or_e: SomeId) -> Vec } } -fn immediate_children_all_types(context: &TypedAstContext, s_or_e: SomeId) -> Vec { +pub fn immediate_children_all_types(context: &TypedAstContext, s_or_e: SomeId) -> Vec { match s_or_e { SomeId::Stmt(stmt_id) => immediate_stmt_children(&context[stmt_id].kind), SomeId::Expr(expr_id) => immediate_expr_children_all_types(&context[expr_id].kind), @@ -371,3 +371,40 @@ impl<'context> Iterator for DFNodes<'context> { result } } + +struct VisitNode { + id: SomeId, + seen: bool, +} + +impl VisitNode { + fn new(id: SomeId) -> Self { + Self { + id, + seen: false, + } + } +} + +pub trait NodeVisitor { + fn pre(&mut self, _id: SomeId) {} + fn post(&mut self, _id: SomeId) {} + fn visit_tree(&mut self, context: &TypedAstContext, root: SomeId) { + let mut stack = vec![VisitNode::new(root)]; + while let Some(mut node) = stack.pop() { + if !node.seen { + let id = node.id; + node.seen = true; + stack.push(node); + + let children = immediate_children_all_types(context, id); + // Add children in reverse order since we visit the end of the stack first + stack.extend(children.into_iter().rev().map(VisitNode::new)); + + self.pre(id); + } else { + self.post(node.id); + } + } + } +} diff --git a/c2rust-transpile/src/c_ast/mod.rs b/c2rust-transpile/src/c_ast/mod.rs index b4c3a84df6..7a81def9d8 100644 --- a/c2rust-transpile/src/c_ast/mod.rs +++ b/c2rust-transpile/src/c_ast/mod.rs @@ -39,6 +39,8 @@ mod conversion; pub mod iterators; mod print; +use iterators::{DFNodes, SomeId}; + /// AST context containing all of the nodes in the Clang AST #[derive(Debug, Clone)] pub struct TypedAstContext { @@ -219,6 +221,15 @@ impl TypedAstContext { located.loc.as_ref().and_then(|loc| self.file_map.get(loc.fileid as usize).copied()) } + pub fn get_src_loc(&self, id: SomeId) -> Option { + match id { + SomeId::Stmt(id) => self.index(id).loc, + SomeId::Expr(id) => self.index(id).loc, + SomeId::Decl(id) => self.index(id).loc, + SomeId::Type(id) => self.index(id).loc, + } + } + pub fn iter_decls(&self) -> indexmap::map::Iter { self.c_decls.iter() } @@ -441,7 +452,6 @@ impl TypedAstContext { } pub fn prune_unused_decls(&mut self) { - use self::iterators::{DFNodes, SomeId}; // Starting from a set of root declarations, walk each one to find declarations it // depends on. Then walk each of those, recursively. diff --git a/c2rust-transpile/src/cfg/mod.rs b/c2rust-transpile/src/cfg/mod.rs index f75654cf43..4f4ff9968a 100644 --- a/c2rust-transpile/src/cfg/mod.rs +++ b/c2rust-transpile/src/cfg/mod.rs @@ -30,7 +30,7 @@ use syntax; use syntax::ast::{Arm, Expr, ExprKind, Lit, LitIntType, LitKind, Pat, Stmt, StmtKind}; use syntax::print::pprust; use syntax::ptr::P; -use syntax_pos::DUMMY_SP; +use syntax_pos::{DUMMY_SP, Span}; use indexmap::{IndexMap, IndexSet}; @@ -135,6 +135,7 @@ pub enum Structure { Simple { entries: IndexSet