From 12703383a7a025a43abc1afb87730a25ab6a9a04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20Leuth=C3=A4user?= <1417198+max-leuthaeuser@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:51:14 +0200 Subject: [PATCH 1/2] [c2cpg] Implemented cached header file loading --- .../io/joern/c2cpg/parser/CdtParser.scala | 12 ++++++++---- .../parser/CustomFileContentProvider.scala | 19 +++++++++++++++++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CdtParser.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CdtParser.scala index 8c4f051070dd..9da14ebcd7fa 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CdtParser.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CdtParser.scala @@ -30,9 +30,13 @@ object CdtParser { failure: Option[Throwable] = None ) - def readFileAsFileContent(path: Path): FileContent = { + def loadLinesAsFileContent(path: Path, lines: Array[Char]): InternalFileContent = { + FileContent.create(path.toString, true, lines).asInstanceOf[InternalFileContent] + } + + def readFileAsFileContent(path: Path): InternalFileContent = { val lines = IOUtils.readLinesInFile(path).mkString("\n").toArray - FileContent.create(path.toString, true, lines) + loadLinesAsFileContent(path, lines) } } @@ -97,8 +101,8 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta try { val fileContent = readFileAsFileContent(realPath.path) val fileContentProvider = new CustomFileContentProvider(headerFileFinder) - val lang = createParseLanguage(realPath.path, fileContent.asInstanceOf[InternalFileContent].toString) - val scannerInfo = createScannerInfo(realPath.path) + val lang = createParseLanguage(realPath.path, fileContent.toString) + val scannerInfo = createScannerInfo(realPath.path) val translationUnit = lang.getASTTranslationUnit(fileContent, scannerInfo, fileContentProvider, null, opts, log) val problems = CPPVisitor.getProblems(translationUnit) if (parserConfig.logProblems) logProblems(problems.toList) diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CustomFileContentProvider.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CustomFileContentProvider.scala index b22a21bc4c8d..5b90fd188444 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CustomFileContentProvider.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CustomFileContentProvider.scala @@ -1,14 +1,22 @@ package io.joern.c2cpg.parser +import io.shiftleft.utils.IOUtils import org.eclipse.cdt.core.index.IIndexFileLocation import org.eclipse.cdt.internal.core.parser.IMacroDictionary import org.eclipse.cdt.internal.core.parser.scanner.{InternalFileContent, InternalFileContentProvider} import org.slf4j.LoggerFactory import java.nio.file.Paths +import java.util.concurrent.ConcurrentHashMap + +object CustomFileContentProvider { + private val headerFileToLines: ConcurrentHashMap[String, Array[Char]] = new ConcurrentHashMap() +} class CustomFileContentProvider(headerFileFinder: HeaderFileFinder) extends InternalFileContentProvider { + import io.joern.c2cpg.parser.CustomFileContentProvider.headerFileToLines + private val logger = LoggerFactory.getLogger(classOf[CustomFileContentProvider]) private def loadContent(path: String): InternalFileContent = { @@ -19,8 +27,15 @@ class CustomFileContentProvider(headerFileFinder: HeaderFileFinder) extends Inte } maybeFullPath .map { foundPath => - logger.debug(s"Loading header file '$foundPath'") - CdtParser.readFileAsFileContent(Paths.get(foundPath)).asInstanceOf[InternalFileContent] + val p = Paths.get(foundPath) + val content = headerFileToLines.computeIfAbsent( + foundPath, + _ => { + logger.debug(s"Loading header file '$foundPath'") + IOUtils.readLinesInFile(p).mkString("\n").toArray + } + ) + CdtParser.loadLinesAsFileContent(p, content) } .getOrElse { logger.debug(s"Cannot find header file for '$path'") From 38b8698d0293271ce1d72d9035b662d53a30da93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20Leuth=C3=A4user?= <1417198+max-leuthaeuser@users.noreply.github.com> Date: Thu, 10 Oct 2024 19:29:14 +0200 Subject: [PATCH 2/2] Also only log missing headers once --- .../joern/c2cpg/parser/CustomFileContentProvider.scala | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CustomFileContentProvider.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CustomFileContentProvider.scala index 5b90fd188444..e30846d86f68 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CustomFileContentProvider.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/parser/CustomFileContentProvider.scala @@ -1,5 +1,6 @@ package io.joern.c2cpg.parser +import io.joern.c2cpg.parser.CustomFileContentProvider.missingHeaderFiles import io.shiftleft.utils.IOUtils import org.eclipse.cdt.core.index.IIndexFileLocation import org.eclipse.cdt.internal.core.parser.IMacroDictionary @@ -11,6 +12,7 @@ import java.util.concurrent.ConcurrentHashMap object CustomFileContentProvider { private val headerFileToLines: ConcurrentHashMap[String, Array[Char]] = new ConcurrentHashMap() + private val missingHeaderFiles: ConcurrentHashMap[String, Boolean] = new ConcurrentHashMap() } class CustomFileContentProvider(headerFileFinder: HeaderFileFinder) extends InternalFileContentProvider { @@ -38,7 +40,13 @@ class CustomFileContentProvider(headerFileFinder: HeaderFileFinder) extends Inte CdtParser.loadLinesAsFileContent(p, content) } .getOrElse { - logger.debug(s"Cannot find header file for '$path'") + missingHeaderFiles.computeIfAbsent( + path, + _ => { + logger.debug(s"Cannot find header file for '$path'") + true + } + ) null }