From eeadcb63b757157dacd15a188ae09e130d0e5728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20Leuth=C3=A4user?= <1417198+max-leuthaeuser@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:24:10 +0200 Subject: [PATCH] [c2cpg] Implemented support for JSON Compilation Database Files (#5005) --- .../src/main/scala/io/joern/c2cpg/Main.scala | 24 +++- .../joern/c2cpg/astcreation/AstCreator.scala | 2 +- .../astcreation/AstForStatementsCreator.scala | 3 +- .../io/joern/c2cpg/parser/CdtParser.scala | 16 ++- .../JSONCompilationDatabaseParser.scala | 97 +++++++++++++++ .../io/joern/c2cpg/parser/ParserConfig.scala | 52 +++++--- .../joern/c2cpg/passes/AstCreationPass.scala | 34 +++++- .../joern/c2cpg/passes/PreprocessorPass.scala | 40 +++++- .../JSONCompilationDatabaseParserTests.scala | 114 ++++++++++++++++++ .../scala/io/joern/x2cpg/SourceFiles.scala | 2 +- 10 files changed, 352 insertions(+), 32 deletions(-) create mode 100644 joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/JSONCompilationDatabaseParser.scala create mode 100644 joern-cli/frontends/c2cpg/src/test/scala/io/joern/c2cpg/io/JSONCompilationDatabaseParserTests.scala diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/Main.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/Main.scala index 261a99656224..61b0675dcbc2 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/Main.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/Main.scala @@ -3,6 +3,7 @@ package io.joern.c2cpg import io.joern.c2cpg.Frontend.* import io.joern.x2cpg.{X2CpgConfig, X2CpgMain} import io.joern.x2cpg.utils.server.FrontendHTTPServer +import io.joern.x2cpg.SourceFiles import org.slf4j.LoggerFactory import scopt.OParser @@ -16,7 +17,8 @@ final case class Config( includePathsAutoDiscovery: Boolean = false, skipFunctionBodies: Boolean = false, noImageLocations: Boolean = false, - withPreprocessedFiles: Boolean = false + withPreprocessedFiles: Boolean = false, + compilationDatabase: Option[String] = None ) extends X2CpgConfig[Config] { def withIncludePaths(includePaths: Set[String]): Config = { this.copy(includePaths = includePaths).withInheritedFields(this) @@ -57,6 +59,10 @@ final case class Config( def withPreprocessedFiles(value: Boolean): Config = { this.copy(withPreprocessedFiles = value).withInheritedFields(this) } + + def withCompilationDatabase(value: String): Config = { + this.copy(compilationDatabase = Some(value)).withInheritedFields(this) + } } private object Frontend { @@ -93,9 +99,9 @@ private object Frontend { .text("instructs the parser to skip function and method bodies.") .action((_, c) => c.withSkipFunctionBodies(true)), opt[Unit]("no-image-locations") - .text( - "performance optimization, allows the parser not to create image-locations. An image location explains how a name made it into the translation unit. Eg: via macro expansion or preprocessor." - ) + .text("""performance optimization, allows the parser not to create image-locations. + | An image location explains how a name made it into the translation unit. + | E.g., via macro expansion or preprocessor.""".stripMargin) .action((_, c) => c.withNoImageLocations(true)), opt[Unit]("with-preprocessed-files") .text("includes *.i files and gives them priority over their unprocessed origin source files.") @@ -103,7 +109,15 @@ private object Frontend { opt[String]("define") .unbounded() .text("define a name") - .action((d, c) => c.withDefines(c.defines + d)) + .action((d, c) => c.withDefines(c.defines + d)), + opt[String]("compilation-database") + .text("""enables the processing of compilation database files (e.g., compile_commands.json). + | This allows to automatically extract compiler options, source files, and other build information from the specified database + | and ensuring consistency with the build configuration. + | For a cmake based build such a file is generated with the environment variable CMAKE_EXPORT_COMPILE_COMMANDS being present. + | Clang based build are supported e.g., with https://github.com/rizsotto/Bear + | """.stripMargin) + .action((d, c) => c.withCompilationDatabase(SourceFiles.toAbsolutePath(d, c.inputPath))) ) } diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/astcreation/AstCreator.scala index c2deefda3673..20fc35428ef6 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/astcreation/AstCreator.scala @@ -42,7 +42,7 @@ class AstCreator( protected val usingDeclarationMappings: mutable.Map[String, String] = mutable.HashMap.empty // TypeDecls with their bindings (with their refs) for lambdas and methods are not put in the AST - // where the respective nodes are defined. Instead we put them under the parent TYPE_DECL in which they are defined. + // where the respective nodes are defined. Instead, we put them under the parent TYPE_DECL in which they are defined. // To achieve this we need this extra stack. protected val methodAstParentStack: Stack[NewNode] = new Stack() diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/astcreation/AstForStatementsCreator.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/astcreation/AstForStatementsCreator.scala index 0dcc4b8bc032..8db9c0f119cc 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/astcreation/AstForStatementsCreator.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/astcreation/AstForStatementsCreator.scala @@ -191,7 +191,8 @@ trait AstForStatementsCreator(implicit withSchemaValidation: ValidationMode) { t // We only handle un-parsable macros here for now val isFromMacroExpansion = statement.getProblem.getNodeLocations.exists(_.isInstanceOf[IASTMacroExpansionLocation]) val asts = if (isFromMacroExpansion) { - new CdtParser(config).parse(statement.getRawSignature, Paths.get(statement.getContainingFilename)) match + new CdtParser(config, List.empty) + .parse(statement.getRawSignature, Paths.get(statement.getContainingFilename)) match case Some(node) => node.getDeclarations.toIndexedSeq.flatMap(astsForDeclaration) case None => Seq.empty } else { diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CdtParser.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CdtParser.scala index 9da14ebcd7fa..1a8cfce01c62 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CdtParser.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CdtParser.scala @@ -2,6 +2,7 @@ package io.joern.c2cpg.parser import better.files.File import io.joern.c2cpg.Config +import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject import io.shiftleft.utils.IOUtils import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage @@ -41,13 +42,15 @@ object CdtParser { } -class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorStatementsLogger { +class CdtParser(config: Config, compilationDatabase: List[CommandObject]) + extends ParseProblemsLogger + with PreprocessorStatementsLogger { import io.joern.c2cpg.parser.CdtParser._ private val headerFileFinder = new HeaderFileFinder(config.inputPath) - private val parserConfig = ParserConfig.fromConfig(config) - private val definedSymbols = parserConfig.definedSymbols.asJava + private val parserConfig = ParserConfig.fromConfig(config, compilationDatabase) + private val definedSymbols = parserConfig.definedSymbols private val includePaths = parserConfig.userIncludePaths private val log = new DefaultLogService @@ -80,7 +83,12 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta val additionalIncludes = if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP else parserConfig.systemIncludePathsC - new ScannerInfo(definedSymbols, (includePaths ++ additionalIncludes).map(_.toString).toArray) + val fileSpecificDefines = parserConfig.definedSymbolsPerFile.getOrElse(file.toString, Map.empty) + val fileSpecificIncludes = parserConfig.includesPerFile.getOrElse(file.toString, List.empty) + new ScannerInfo( + (definedSymbols ++ fileSpecificDefines).asJava, + fileSpecificIncludes.toArray ++ (includePaths ++ additionalIncludes).map(_.toString).toArray + ) } private def parseInternal(code: String, inFile: File): IASTTranslationUnit = { diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/JSONCompilationDatabaseParser.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/JSONCompilationDatabaseParser.scala new file mode 100644 index 000000000000..ede3e37f621a --- /dev/null +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/JSONCompilationDatabaseParser.scala @@ -0,0 +1,97 @@ +package io.joern.c2cpg.parser + +import io.joern.x2cpg.SourceFiles +import io.shiftleft.utils.IOUtils +import org.slf4j.LoggerFactory +import ujson.Value + +import java.nio.file.Paths +import scala.util.Try + +object JSONCompilationDatabaseParser { + + private val logger = LoggerFactory.getLogger(getClass) + + /** {{{ + * 1) -D: Matches the -D flag, which is the key prefix for defining macros. + * 2) ([A-Za-z_][A-Za-z0-9_]+): Matches a valid macro name (which must start with a letter or underscore and can be followed by letters, numbers, or underscores). + * 3) (=(\\*".*"))?: Optionally matches = followed by either: + * a) A quoted string: Allows for strings in quotes. + * b) Any char sequence (.*") closed with a quote. + * }}} + */ + private val defineInCommandPattern = """-D([A-Za-z_][A-Za-z0-9_]+)(=(\\*".*"))?""".r + + /** {{{ + * 1) -I: Matches the -I flag, which indicates an include directory. + * 2) (\S+): Matches one or more non-whitespace characters, which represent the path of the directory. + * }}} + */ + private val includeInCommandPattern = """-I(\S+)""".r + + case class CommandObject(directory: String, arguments: List[String], command: List[String], file: String) { + + /** @return + * the file path (guaranteed to be absolute) + */ + def compiledFile(): String = SourceFiles.toAbsolutePath(file, directory) + + private def nameValuePairFromDefine(define: String): (String, String) = { + val s = define.stripPrefix("-D") + if (s.contains("=")) { + val split = s.split("=") + (split.head, split(1)) + } else { + (s, "") + } + } + + private def pathFromInclude(include: String): String = include.stripPrefix("-I") + + def includes(): List[String] = { + val includesFromArguments = arguments.filter(a => a.startsWith("-I")).map(pathFromInclude) + val includesFromCommand = command.flatMap { c => + val includes = includeInCommandPattern.findAllIn(c).toList + includes.map(pathFromInclude) + } + includesFromArguments ++ includesFromCommand + } + + def defines(): List[(String, String)] = { + val definesFromArguments = arguments.filter(a => a.startsWith("-D")).map(nameValuePairFromDefine) + val definesFromCommand = command.flatMap { c => + val defines = defineInCommandPattern.findAllIn(c).toList + defines.map(nameValuePairFromDefine) + } + definesFromArguments ++ definesFromCommand + } + } + + private def hasKey(node: Value, key: String): Boolean = Try(node(key)).isSuccess + + private def safeArguments(obj: Value): List[String] = { + if (hasKey(obj, "arguments")) obj("arguments").arrOpt.map(_.toList.map(_.str)).getOrElse(List.empty) + else List.empty + } + + private def safeCommand(obj: Value): List[String] = { + if (hasKey(obj, "command")) List(obj("command").str) + else List.empty + } + + def parse(compileCommandsJson: String): List[CommandObject] = { + try { + val jsonContent = IOUtils.readEntireFile(Paths.get(compileCommandsJson)) + val json = ujson.read(jsonContent) + val allCommandObjects = json.arr.toList + allCommandObjects.map { obj => + CommandObject(obj("directory").str, safeArguments(obj), safeCommand(obj), obj("file").str) + } + } catch { + case t: Throwable => + logger.warn(s"Could not parse '$compileCommandsJson'", t) + List.empty + } + } + +} diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/ParserConfig.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/ParserConfig.scala index 7bb82a6b751e..955430d1a5ee 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/ParserConfig.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/ParserConfig.scala @@ -1,6 +1,7 @@ package io.joern.c2cpg.parser import io.joern.c2cpg.Config +import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject import io.joern.c2cpg.utils.IncludeAutoDiscovery import java.nio.file.{Path, Paths} @@ -8,21 +9,42 @@ import java.nio.file.{Path, Paths} object ParserConfig { def empty: ParserConfig = - ParserConfig(Set.empty, Set.empty, Set.empty, Map.empty, logProblems = false, logPreprocessor = false) + ParserConfig( + Set.empty, + Set.empty, + Set.empty, + Map.empty, + Map.empty, + Map.empty, + logProblems = false, + logPreprocessor = false + ) - def fromConfig(config: Config): ParserConfig = ParserConfig( - config.includePaths.map(Paths.get(_).toAbsolutePath), - IncludeAutoDiscovery.discoverIncludePathsC(config), - IncludeAutoDiscovery.discoverIncludePathsCPP(config), - config.defines.map { - case define if define.contains("=") => - val s = define.split("=") - s.head -> s(1) - case define => define -> "true" - }.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS, - config.logProblems, - config.logPreprocessor - ) + def fromConfig(config: Config, compilationDatabase: List[CommandObject]): ParserConfig = { + val compilationDatabaseDefines = compilationDatabase.map { c => + c.compiledFile() -> c.defines().toMap + }.toMap + val includes = compilationDatabase.map { c => + c.compiledFile() -> c.includes() + }.toMap + ParserConfig( + config.includePaths.map(Paths.get(_).toAbsolutePath), + IncludeAutoDiscovery.discoverIncludePathsC(config), + IncludeAutoDiscovery.discoverIncludePathsCPP(config), + config.defines.map { define => + if (define.contains("=")) { + val split = define.split("=") + split.head -> split(1) + } else { + define -> "" + } + }.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS, + compilationDatabaseDefines, + includes, + config.logProblems, + config.logPreprocessor + ) + } } @@ -31,6 +53,8 @@ case class ParserConfig( systemIncludePathsC: Set[Path], systemIncludePathsCPP: Set[Path], definedSymbols: Map[String, String], + definedSymbolsPerFile: Map[String, Map[String, String]], + includesPerFile: Map[String, List[String]], logProblems: Boolean, logPreprocessor: Boolean ) diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala index 182be0a18071..0325a19b0395 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala @@ -5,6 +5,8 @@ import io.joern.c2cpg.Config import io.joern.c2cpg.astcreation.AstCreator import io.joern.c2cpg.astcreation.CGlobal import io.joern.c2cpg.parser.{CdtParser, FileDefaults} +import io.joern.c2cpg.parser.JSONCompilationDatabaseParser +import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject import io.shiftleft.codepropertygraph.generated.Cpg import io.shiftleft.passes.ForkJoinParallelCpgPass import io.joern.x2cpg.SourceFiles @@ -24,10 +26,13 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report()) private val logger: Logger = LoggerFactory.getLogger(classOf[AstCreationPass]) + private val global = new CGlobal() private val file2OffsetTable: ConcurrentHashMap[String, Array[Int]] = new ConcurrentHashMap() - private val parser: CdtParser = new CdtParser(config) - private val global = new CGlobal() + private val compilationDatabase: List[CommandObject] = + config.compilationDatabase.map(JSONCompilationDatabaseParser.parse).getOrElse(List.empty) + + private val parser: CdtParser = new CdtParser(config, compilationDatabase) def typesSeen(): List[String] = global.usedTypes.keys().asScala.toList @@ -35,7 +40,7 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report()) global.methodDeclarations.asScala.toMap -- global.methodDefinitions.asScala.keys } - override def generateParts(): Array[String] = { + private def sourceFilesFromDirectory(): Array[String] = { val sourceFileExtensions = FileDefaults.SOURCE_FILE_EXTENSIONS ++ FileDefaults.HEADER_FILE_EXTENSIONS ++ Option.when(config.withPreprocessedFiles)(FileDefaults.PREPROCESSED_EXT).toList @@ -60,6 +65,29 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report()) } } + private def sourceFilesFromCompilationDatabase(compilationDatabaseFile: String): Array[String] = { + if (compilationDatabase.isEmpty) { + logger.warn(s"'$compilationDatabaseFile' contains no source files. CPG will be empty.") + } + SourceFiles + .filterFiles( + compilationDatabase.map(_.compiledFile()), + config.inputPath, + ignoredDefaultRegex = Option(DefaultIgnoredFolders), + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) + ) + .toArray + } + + override def generateParts(): Array[String] = { + if (config.compilationDatabase.isEmpty) { + sourceFilesFromDirectory() + } else { + sourceFilesFromCompilationDatabase(config.compilationDatabase.get) + } + } + override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = { val path = Paths.get(filename).toAbsolutePath val relPath = SourceFiles.toRelativePath(path.toString, config.inputPath) diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala index 3a884d7a9257..a8e435413799 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala @@ -3,12 +3,15 @@ package io.joern.c2cpg.passes import io.joern.c2cpg.C2Cpg.DefaultIgnoredFolders import io.joern.c2cpg.Config import io.joern.c2cpg.parser.{CdtParser, FileDefaults} +import io.joern.c2cpg.parser.JSONCompilationDatabaseParser +import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject import io.joern.x2cpg.SourceFiles import org.eclipse.cdt.core.dom.ast.{ - IASTPreprocessorIfStatement, IASTPreprocessorIfdefStatement, + IASTPreprocessorIfStatement, IASTPreprocessorStatement } +import org.slf4j.LoggerFactory import java.nio.file.Paths import scala.collection.parallel.CollectionConverters.ImmutableIterableIsParallelizable @@ -16,9 +19,14 @@ import scala.collection.parallel.immutable.ParIterable class PreprocessorPass(config: Config) { - private val parser = new CdtParser(config) + private val logger = LoggerFactory.getLogger(classOf[PreprocessorPass]) + + private val compilationDatabase: List[CommandObject] = + config.compilationDatabase.map(JSONCompilationDatabaseParser.parse).getOrElse(List.empty) - def run(): ParIterable[String] = + private val parser = new CdtParser(config, compilationDatabase) + + private def sourceFilesFromDirectory(): ParIterable[String] = { SourceFiles .determine( config.inputPath, @@ -29,6 +37,32 @@ class PreprocessorPass(config: Config) { ) .par .flatMap(runOnPart) + } + + private def sourceFilesFromCompilationDatabase(compilationDatabaseFile: String): ParIterable[String] = { + if (compilationDatabase.isEmpty) { + logger.warn(s"'$compilationDatabaseFile' contains no source files.") + } + SourceFiles + .filterFiles( + compilationDatabase.map(_.compiledFile()), + config.inputPath, + ignoredDefaultRegex = Option(DefaultIgnoredFolders), + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) + ) + .par + .flatMap(runOnPart) + } + + def run(): ParIterable[String] = { + if (config.compilationDatabase.isEmpty) { + sourceFilesFromDirectory() + } else { + sourceFilesFromCompilationDatabase(config.compilationDatabase.get) + } + + } private def preprocessorStatement2String(stmt: IASTPreprocessorStatement): Option[String] = stmt match { case s: IASTPreprocessorIfStatement => diff --git a/joern-cli/frontends/c2cpg/src/test/scala/io/joern/c2cpg/io/JSONCompilationDatabaseParserTests.scala b/joern-cli/frontends/c2cpg/src/test/scala/io/joern/c2cpg/io/JSONCompilationDatabaseParserTests.scala new file mode 100644 index 000000000000..a57c4612d8c9 --- /dev/null +++ b/joern-cli/frontends/c2cpg/src/test/scala/io/joern/c2cpg/io/JSONCompilationDatabaseParserTests.scala @@ -0,0 +1,114 @@ +package io.joern.c2cpg.io + +import better.files.File +import io.joern.c2cpg.parser.JSONCompilationDatabaseParser +import io.joern.c2cpg.C2Cpg +import io.joern.c2cpg.Config +import io.shiftleft.semanticcpg.language.* +import io.shiftleft.semanticcpg.language.types.structure.FileTraversal +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.nio.file.Paths + +class JSONCompilationDatabaseParserTests extends AnyWordSpec with Matchers { + + "Parsing a simple compile_commands.json" should { + "generate a proper list of CommandObjects" in { + val content = + """ + |[ + | { "directory": "/home/user/llvm/build", + | "arguments": ["/usr/bin/clang++", "-I/usr/include", "-I./include", "-DSOMEDEFA=With spaces, quotes and \\-es.", "-c", "-o", "file.o", "file.cc"], + | "file": "file.cc" }, + | { "directory": "/home/user/llvm/build", + | "command": "/usr/bin/clang++ -I/home/user/project/includes -DSOMEDEFB=\"With spaces, quotes and \\-es.\" -DSOMEDEFC -c -o file.o file.cc", + | "file": "file2.cc" } + |]""".stripMargin + + File.usingTemporaryFile("compile_commands.json") { commandJsonFile => + commandJsonFile.writeText(content) + + val commandObjects = JSONCompilationDatabaseParser.parse(commandJsonFile.pathAsString) + commandObjects.map(_.compiledFile()) shouldBe List( + Paths.get("/home/user/llvm/build/file.cc").toString, + Paths.get("/home/user/llvm/build/file2.cc").toString + ) + commandObjects.flatMap(_.defines()) shouldBe List( + ("SOMEDEFA", "With spaces, quotes and \\-es."), + ("SOMEDEFB", "\"With spaces, quotes and \\-es.\""), + ("SOMEDEFC", "") + ) + commandObjects.flatMap(_.includes()) shouldBe List("/usr/include", "./include", "/home/user/project/includes") + } + } + } + + private def newProjectUnderTest(): File = { + val dir = File.newTemporaryDirectory("c2cpgJSONCompilationDatabaseParserTests") + + val mainText = + """ + |int main(int argc, char *argv[]) { + | print("Hello World!"); + |} + |#ifdef SOMEDEFA + |void foo() {} + |#endif + |#ifdef SOMEDEFC + |void bar() {} + |#endif + |""".stripMargin + + val fileA = dir / "fileA.c" + fileA.createIfNotExists(createParents = true) + fileA.writeText(mainText) + fileA.deleteOnExit() + val fileB = dir / "fileB.c" + fileB.createIfNotExists(createParents = true) + fileB.writeText(mainText) + fileB.deleteOnExit() + val fileC = dir / "fileC.c" + fileC.createIfNotExists(createParents = true) + fileC.writeText(mainText) + fileC.deleteOnExit() + + val compilerCommands = dir / "compile_commands.json" + compilerCommands.createIfNotExists(createParents = true) + val content = s""" + |[ + | { "directory": "${dir.pathAsString}", + | "arguments": ["/usr/bin/clang++", "-Irelative", "-DSOMEDEFA=With spaces, quotes and \\-es.", "-c", "-o", "fileA.o", "fileA.cc"], + | "file": "fileA.c" }, + | { "directory": ".", + | "arguments": ["/usr/bin/clang++", "-Irelative", "-DSOMEDEFB=With spaces, quotes and \\-es.", "-c", "-o", "fileB.o", "fileB.cc"], + | "file": "${fileB.pathAsString}" } + |]""".stripMargin.replace("\\", "\\\\") // escape for tests under Windows + compilerCommands.writeText(content) + compilerCommands.deleteOnExit() + + dir.deleteOnExit() + } + + "Using a simple compile_commands.json" should { + "respect the files listed" in { + val cpgOutFile = File.newTemporaryFile("c2cpg.bin") + cpgOutFile.deleteOnExit() + val projectUnderTest = newProjectUnderTest() + val input = projectUnderTest.path.toAbsolutePath.toString + val output = cpgOutFile.toString + val config = Config() + .withInputPath(input) + .withOutputPath(output) + .withCompilationDatabase((File(input) / "compile_commands.json").pathAsString) + val c2cpg = new C2Cpg() + val cpg = c2cpg.createCpg(config).get + cpg.file.nameNot(FileTraversal.UNKNOWN, "").name.sorted.l should contain theSameElementsAs List( + "fileA.c", + "fileB.c" + // fileC.c is ignored because it is not listed in the compile_commands.json + ) + cpg.method.nameNot("").name.sorted.l shouldBe List("foo", "main", "main") + } + } +} diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala index 9fac9de33edc..f8ac012141fc 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala @@ -106,7 +106,7 @@ object SourceFiles { && !ignoredFilesRegex.exists(isIgnoredByRegex(file, inputPath, _)) && !ignoredFilesPath.exists(isIgnoredByFileList(file, _)) - private def filterFiles( + def filterFiles( files: List[String], inputPath: String, ignoredDefaultRegex: Option[Seq[Regex]] = None,