Skip to content

Commit

Permalink
[c2cpg] Implemented support for JSON Compilation Database Files (#5005)
Browse files Browse the repository at this point in the history
  • Loading branch information
max-leuthaeuser authored Oct 21, 2024
1 parent 11f92c0 commit eeadcb6
Show file tree
Hide file tree
Showing 10 changed files with 352 additions and 32 deletions.
24 changes: 19 additions & 5 deletions joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package io.joern.c2cpg
import io.joern.c2cpg.Frontend.*
import io.joern.x2cpg.{X2CpgConfig, X2CpgMain}
import io.joern.x2cpg.utils.server.FrontendHTTPServer
import io.joern.x2cpg.SourceFiles
import org.slf4j.LoggerFactory
import scopt.OParser

Expand All @@ -16,7 +17,8 @@ final case class Config(
includePathsAutoDiscovery: Boolean = false,
skipFunctionBodies: Boolean = false,
noImageLocations: Boolean = false,
withPreprocessedFiles: Boolean = false
withPreprocessedFiles: Boolean = false,
compilationDatabase: Option[String] = None
) extends X2CpgConfig[Config] {
def withIncludePaths(includePaths: Set[String]): Config = {
this.copy(includePaths = includePaths).withInheritedFields(this)
Expand Down Expand Up @@ -57,6 +59,10 @@ final case class Config(
def withPreprocessedFiles(value: Boolean): Config = {
this.copy(withPreprocessedFiles = value).withInheritedFields(this)
}

def withCompilationDatabase(value: String): Config = {
this.copy(compilationDatabase = Some(value)).withInheritedFields(this)
}
}

private object Frontend {
Expand Down Expand Up @@ -93,17 +99,25 @@ private object Frontend {
.text("instructs the parser to skip function and method bodies.")
.action((_, c) => c.withSkipFunctionBodies(true)),
opt[Unit]("no-image-locations")
.text(
"performance optimization, allows the parser not to create image-locations. An image location explains how a name made it into the translation unit. Eg: via macro expansion or preprocessor."
)
.text("""performance optimization, allows the parser not to create image-locations.
| An image location explains how a name made it into the translation unit.
| E.g., via macro expansion or preprocessor.""".stripMargin)
.action((_, c) => c.withNoImageLocations(true)),
opt[Unit]("with-preprocessed-files")
.text("includes *.i files and gives them priority over their unprocessed origin source files.")
.action((_, c) => c.withPreprocessedFiles(true)),
opt[String]("define")
.unbounded()
.text("define a name")
.action((d, c) => c.withDefines(c.defines + d))
.action((d, c) => c.withDefines(c.defines + d)),
opt[String]("compilation-database")
.text("""enables the processing of compilation database files (e.g., compile_commands.json).
| This allows to automatically extract compiler options, source files, and other build information from the specified database
| and ensuring consistency with the build configuration.
| For a cmake based build such a file is generated with the environment variable CMAKE_EXPORT_COMPILE_COMMANDS being present.
| Clang based build are supported e.g., with https://github.com/rizsotto/Bear
| """.stripMargin)
.action((d, c) => c.withCompilationDatabase(SourceFiles.toAbsolutePath(d, c.inputPath)))
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class AstCreator(
protected val usingDeclarationMappings: mutable.Map[String, String] = mutable.HashMap.empty

// TypeDecls with their bindings (with their refs) for lambdas and methods are not put in the AST
// where the respective nodes are defined. Instead we put them under the parent TYPE_DECL in which they are defined.
// where the respective nodes are defined. Instead, we put them under the parent TYPE_DECL in which they are defined.
// To achieve this we need this extra stack.
protected val methodAstParentStack: Stack[NewNode] = new Stack()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ trait AstForStatementsCreator(implicit withSchemaValidation: ValidationMode) { t
// We only handle un-parsable macros here for now
val isFromMacroExpansion = statement.getProblem.getNodeLocations.exists(_.isInstanceOf[IASTMacroExpansionLocation])
val asts = if (isFromMacroExpansion) {
new CdtParser(config).parse(statement.getRawSignature, Paths.get(statement.getContainingFilename)) match
new CdtParser(config, List.empty)
.parse(statement.getRawSignature, Paths.get(statement.getContainingFilename)) match
case Some(node) => node.getDeclarations.toIndexedSeq.flatMap(astsForDeclaration)
case None => Seq.empty
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package io.joern.c2cpg.parser

import better.files.File
import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.shiftleft.utils.IOUtils
import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage
import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage
Expand Down Expand Up @@ -41,13 +42,15 @@ object CdtParser {

}

class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorStatementsLogger {
class CdtParser(config: Config, compilationDatabase: List[CommandObject])
extends ParseProblemsLogger
with PreprocessorStatementsLogger {

import io.joern.c2cpg.parser.CdtParser._

private val headerFileFinder = new HeaderFileFinder(config.inputPath)
private val parserConfig = ParserConfig.fromConfig(config)
private val definedSymbols = parserConfig.definedSymbols.asJava
private val parserConfig = ParserConfig.fromConfig(config, compilationDatabase)
private val definedSymbols = parserConfig.definedSymbols
private val includePaths = parserConfig.userIncludePaths
private val log = new DefaultLogService

Expand Down Expand Up @@ -80,7 +83,12 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta
val additionalIncludes =
if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP
else parserConfig.systemIncludePathsC
new ScannerInfo(definedSymbols, (includePaths ++ additionalIncludes).map(_.toString).toArray)
val fileSpecificDefines = parserConfig.definedSymbolsPerFile.getOrElse(file.toString, Map.empty)
val fileSpecificIncludes = parserConfig.includesPerFile.getOrElse(file.toString, List.empty)
new ScannerInfo(
(definedSymbols ++ fileSpecificDefines).asJava,
fileSpecificIncludes.toArray ++ (includePaths ++ additionalIncludes).map(_.toString).toArray
)
}

private def parseInternal(code: String, inFile: File): IASTTranslationUnit = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package io.joern.c2cpg.parser

import io.joern.x2cpg.SourceFiles
import io.shiftleft.utils.IOUtils
import org.slf4j.LoggerFactory
import ujson.Value

import java.nio.file.Paths
import scala.util.Try

object JSONCompilationDatabaseParser {

private val logger = LoggerFactory.getLogger(getClass)

/** {{{
* 1) -D: Matches the -D flag, which is the key prefix for defining macros.
* 2) ([A-Za-z_][A-Za-z0-9_]+): Matches a valid macro name (which must start with a letter or underscore and can be followed by letters, numbers, or underscores).
* 3) (=(\\*".*"))?: Optionally matches = followed by either:
* a) A quoted string: Allows for strings in quotes.
* b) Any char sequence (.*") closed with a quote.
* }}}
*/
private val defineInCommandPattern = """-D([A-Za-z_][A-Za-z0-9_]+)(=(\\*".*"))?""".r

/** {{{
* 1) -I: Matches the -I flag, which indicates an include directory.
* 2) (\S+): Matches one or more non-whitespace characters, which represent the path of the directory.
* }}}
*/
private val includeInCommandPattern = """-I(\S+)""".r

case class CommandObject(directory: String, arguments: List[String], command: List[String], file: String) {

/** @return
* the file path (guaranteed to be absolute)
*/
def compiledFile(): String = SourceFiles.toAbsolutePath(file, directory)

private def nameValuePairFromDefine(define: String): (String, String) = {
val s = define.stripPrefix("-D")
if (s.contains("=")) {
val split = s.split("=")
(split.head, split(1))
} else {
(s, "")
}
}

private def pathFromInclude(include: String): String = include.stripPrefix("-I")

def includes(): List[String] = {
val includesFromArguments = arguments.filter(a => a.startsWith("-I")).map(pathFromInclude)
val includesFromCommand = command.flatMap { c =>
val includes = includeInCommandPattern.findAllIn(c).toList
includes.map(pathFromInclude)
}
includesFromArguments ++ includesFromCommand
}

def defines(): List[(String, String)] = {
val definesFromArguments = arguments.filter(a => a.startsWith("-D")).map(nameValuePairFromDefine)
val definesFromCommand = command.flatMap { c =>
val defines = defineInCommandPattern.findAllIn(c).toList
defines.map(nameValuePairFromDefine)
}
definesFromArguments ++ definesFromCommand
}
}

private def hasKey(node: Value, key: String): Boolean = Try(node(key)).isSuccess

private def safeArguments(obj: Value): List[String] = {
if (hasKey(obj, "arguments")) obj("arguments").arrOpt.map(_.toList.map(_.str)).getOrElse(List.empty)
else List.empty
}

private def safeCommand(obj: Value): List[String] = {
if (hasKey(obj, "command")) List(obj("command").str)
else List.empty
}

def parse(compileCommandsJson: String): List[CommandObject] = {
try {
val jsonContent = IOUtils.readEntireFile(Paths.get(compileCommandsJson))
val json = ujson.read(jsonContent)
val allCommandObjects = json.arr.toList
allCommandObjects.map { obj =>
CommandObject(obj("directory").str, safeArguments(obj), safeCommand(obj), obj("file").str)
}
} catch {
case t: Throwable =>
logger.warn(s"Could not parse '$compileCommandsJson'", t)
List.empty
}
}

}
Original file line number Diff line number Diff line change
@@ -1,28 +1,50 @@
package io.joern.c2cpg.parser

import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.joern.c2cpg.utils.IncludeAutoDiscovery

import java.nio.file.{Path, Paths}

object ParserConfig {

def empty: ParserConfig =
ParserConfig(Set.empty, Set.empty, Set.empty, Map.empty, logProblems = false, logPreprocessor = false)
ParserConfig(
Set.empty,
Set.empty,
Set.empty,
Map.empty,
Map.empty,
Map.empty,
logProblems = false,
logPreprocessor = false
)

def fromConfig(config: Config): ParserConfig = ParserConfig(
config.includePaths.map(Paths.get(_).toAbsolutePath),
IncludeAutoDiscovery.discoverIncludePathsC(config),
IncludeAutoDiscovery.discoverIncludePathsCPP(config),
config.defines.map {
case define if define.contains("=") =>
val s = define.split("=")
s.head -> s(1)
case define => define -> "true"
}.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS,
config.logProblems,
config.logPreprocessor
)
def fromConfig(config: Config, compilationDatabase: List[CommandObject]): ParserConfig = {
val compilationDatabaseDefines = compilationDatabase.map { c =>
c.compiledFile() -> c.defines().toMap
}.toMap
val includes = compilationDatabase.map { c =>
c.compiledFile() -> c.includes()
}.toMap
ParserConfig(
config.includePaths.map(Paths.get(_).toAbsolutePath),
IncludeAutoDiscovery.discoverIncludePathsC(config),
IncludeAutoDiscovery.discoverIncludePathsCPP(config),
config.defines.map { define =>
if (define.contains("=")) {
val split = define.split("=")
split.head -> split(1)
} else {
define -> ""
}
}.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS,
compilationDatabaseDefines,
includes,
config.logProblems,
config.logPreprocessor
)
}

}

Expand All @@ -31,6 +53,8 @@ case class ParserConfig(
systemIncludePathsC: Set[Path],
systemIncludePathsCPP: Set[Path],
definedSymbols: Map[String, String],
definedSymbolsPerFile: Map[String, Map[String, String]],
includesPerFile: Map[String, List[String]],
logProblems: Boolean,
logPreprocessor: Boolean
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import io.joern.c2cpg.Config
import io.joern.c2cpg.astcreation.AstCreator
import io.joern.c2cpg.astcreation.CGlobal
import io.joern.c2cpg.parser.{CdtParser, FileDefaults}
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.passes.ForkJoinParallelCpgPass
import io.joern.x2cpg.SourceFiles
Expand All @@ -24,18 +26,21 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())

private val logger: Logger = LoggerFactory.getLogger(classOf[AstCreationPass])

private val global = new CGlobal()
private val file2OffsetTable: ConcurrentHashMap[String, Array[Int]] = new ConcurrentHashMap()
private val parser: CdtParser = new CdtParser(config)

private val global = new CGlobal()
private val compilationDatabase: List[CommandObject] =
config.compilationDatabase.map(JSONCompilationDatabaseParser.parse).getOrElse(List.empty)

private val parser: CdtParser = new CdtParser(config, compilationDatabase)

def typesSeen(): List[String] = global.usedTypes.keys().asScala.toList

def unhandledMethodDeclarations(): Map[String, CGlobal.MethodInfo] = {
global.methodDeclarations.asScala.toMap -- global.methodDefinitions.asScala.keys
}

override def generateParts(): Array[String] = {
private def sourceFilesFromDirectory(): Array[String] = {
val sourceFileExtensions = FileDefaults.SOURCE_FILE_EXTENSIONS
++ FileDefaults.HEADER_FILE_EXTENSIONS
++ Option.when(config.withPreprocessedFiles)(FileDefaults.PREPROCESSED_EXT).toList
Expand All @@ -60,6 +65,29 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
}
}

private def sourceFilesFromCompilationDatabase(compilationDatabaseFile: String): Array[String] = {
if (compilationDatabase.isEmpty) {
logger.warn(s"'$compilationDatabaseFile' contains no source files. CPG will be empty.")
}
SourceFiles
.filterFiles(
compilationDatabase.map(_.compiledFile()),
config.inputPath,
ignoredDefaultRegex = Option(DefaultIgnoredFolders),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.toArray
}

override def generateParts(): Array[String] = {
if (config.compilationDatabase.isEmpty) {
sourceFilesFromDirectory()
} else {
sourceFilesFromCompilationDatabase(config.compilationDatabase.get)
}
}

override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = {
val path = Paths.get(filename).toAbsolutePath
val relPath = SourceFiles.toRelativePath(path.toString, config.inputPath)
Expand Down
Loading

0 comments on commit eeadcb6

Please sign in to comment.