Skip to content

Commit

Permalink
[c2cpg] Handle preprocessed files (joernio#4466)
Browse files Browse the repository at this point in the history
cpp/gcc/g++ offer the -E switch. The -E switch forces the compiler to stop after the preprocessing phase and spit out the result where all preprocessor directives are expanded or processed, all macros expanded, and all header files included. Typically, this result is stored as *.i file.

With this PR we process such files and exclude all the others with the exact same name if `--with-preprocessed-files` is used.

Fixes: joernio#4462
  • Loading branch information
max-leuthaeuser authored Apr 22, 2024
1 parent abf5a8d commit 1b252c9
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ final case class Config(
printIfDefsOnly: Boolean = false,
includePathsAutoDiscovery: Boolean = false,
skipFunctionBodies: Boolean = false,
noImageLocations: Boolean = false
noImageLocations: Boolean = false,
withPreprocessedFiles: Boolean = false
) extends X2CpgConfig[Config] {
def withIncludePaths(includePaths: Set[String]): Config = {
this.copy(includePaths = includePaths).withInheritedFields(this)
Expand Down Expand Up @@ -53,6 +54,10 @@ final case class Config(
def withNoImageLocations(value: Boolean): Config = {
this.copy(noImageLocations = value).withInheritedFields(this)
}

def withPreprocessedFiles(value: Boolean): Config = {
this.copy(withPreprocessedFiles = value).withInheritedFields(this)
}
}

private object Frontend {
Expand Down Expand Up @@ -93,6 +98,9 @@ private object Frontend {
"performance optimization, allows the parser not to create image-locations. An image location explains how a name made it into the translation unit. Eg: via macro expansion or preprocessor."
)
.action((_, c) => c.withNoImageLocations(true)),
opt[Unit]("with-preprocessed-files")
.text("includes *.i files and gives them priority over their unprocessed origin source files.")
.action((_, c) => c.withPreprocessedFiles(true)),
opt[String]("define")
.unbounded()
.text("define a name")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import org.eclipse.cdt.core.model.ILanguage
import org.eclipse.cdt.core.parser.{DefaultLogService, ScannerInfo}
import org.eclipse.cdt.core.parser.FileContent
import org.eclipse.cdt.internal.core.dom.parser.cpp.semantics.CPPVisitor
import org.eclipse.cdt.internal.core.parser.scanner.InternalFileContent
import org.slf4j.LoggerFactory

import java.nio.file.{NoSuchFileException, Path}
Expand Down Expand Up @@ -53,8 +54,18 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta
// performance optimization, allows the parser not to create image-locations
if (config.noImageLocations) opts |= ILanguage.OPTION_NO_IMAGE_LOCATIONS

private def createParseLanguage(file: Path): ILanguage = {
if (FileDefaults.isCPPFile(file.toString)) {
private def preprocessedFileIsFromCPPFile(file: Path, code: String): Boolean = {
if (config.withPreprocessedFiles && file.toString.endsWith(FileDefaults.PREPROCESSED_EXT)) {
val fileWithoutExt = file.toString.stripSuffix(FileDefaults.PREPROCESSED_EXT)
val filesWithCPPExt = FileDefaults.CPP_FILE_EXTENSIONS.map(ext => File(s"$fileWithoutExt$ext").name)
code.linesIterator.exists(line => filesWithCPPExt.exists(f => line.contains(s"\"$f\"")))
} else {
false
}
}

private def createParseLanguage(file: Path, code: String): ILanguage = {
if (FileDefaults.isCPPFile(file.toString) || preprocessedFileIsFromCPPFile(file, code)) {
GPPLanguage.getDefault
} else {
GCCLanguage.getDefault
Expand All @@ -71,7 +82,7 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta
private def parseInternal(code: String, inFile: File): IASTTranslationUnit = {
val fileContent = FileContent.create(inFile.toString, true, code.toCharArray)
val fileContentProvider = new CustomFileContentProvider(headerFileFinder)
val lang = createParseLanguage(inFile.path)
val lang = createParseLanguage(inFile.path, code)
val scannerInfo = createScannerInfo(inFile.path)
val translationUnit = lang.getASTTranslationUnit(fileContent, scannerInfo, fileContentProvider, null, opts, log)
val problems = CPPVisitor.getProblems(translationUnit)
Expand All @@ -86,8 +97,8 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta
try {
val fileContent = readFileAsFileContent(realPath.path)
val fileContentProvider = new CustomFileContentProvider(headerFileFinder)
val lang = createParseLanguage(realPath.path)
val scannerInfo = createScannerInfo(realPath.path)
val lang = createParseLanguage(realPath.path, fileContent.asInstanceOf[InternalFileContent].toString)
val scannerInfo = createScannerInfo(realPath.path)
val translationUnit = lang.getASTTranslationUnit(fileContent, scannerInfo, fileContentProvider, null, opts, log)
val problems = CPPVisitor.getProblems(translationUnit)
if (parserConfig.logProblems) logProblems(problems.toList)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ package io.joern.c2cpg.parser

object FileDefaults {

val C_EXT: String = ".c"
val CPP_EXT: String = ".cpp"
val C_EXT: String = ".c"
val CPP_EXT: String = ".cpp"
val PREPROCESSED_EXT: String = ".i"

private val CC_EXT = ".cc"
private val C_HEADER_EXT = ".h"
Expand All @@ -14,7 +15,7 @@ object FileDefaults {

val HEADER_FILE_EXTENSIONS: Set[String] = Set(C_HEADER_EXT, CPP_HEADER_EXT, OTHER_HEADER_EXT)

private val CPP_FILE_EXTENSIONS = Set(CC_EXT, CPP_EXT, CPP_HEADER_EXT)
val CPP_FILE_EXTENSIONS: Set[String] = Set(CC_EXT, CPP_EXT, CPP_HEADER_EXT)

def isHeaderFile(filePath: String): Boolean =
HEADER_FILE_EXTENSIONS.exists(filePath.endsWith)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,30 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())

def typesSeen(): List[String] = global.usedTypes.keys().asScala.filterNot(_ == Defines.anyTypeName).toList

override def generateParts(): Array[String] = SourceFiles
.determine(
config.inputPath,
FileDefaults.SOURCE_FILE_EXTENSIONS ++ FileDefaults.HEADER_FILE_EXTENSIONS,
ignoredDefaultRegex = Option(DefaultIgnoredFolders),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.toArray
override def generateParts(): Array[String] = {
val sourceFileExtensions = FileDefaults.SOURCE_FILE_EXTENSIONS
++ FileDefaults.HEADER_FILE_EXTENSIONS
++ Option.when(config.withPreprocessedFiles)(FileDefaults.PREPROCESSED_EXT).toList
val allSourceFiles = SourceFiles
.determine(
config.inputPath,
sourceFileExtensions,
ignoredDefaultRegex = Option(DefaultIgnoredFolders),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.toArray
if (config.withPreprocessedFiles) {
allSourceFiles.filter {
case f if !f.endsWith(FileDefaults.PREPROCESSED_EXT) =>
val fAsPreprocessedFile = s"${f.substring(0, f.lastIndexOf("."))}${FileDefaults.PREPROCESSED_EXT}"
!allSourceFiles.exists { sourceFile => f != sourceFile && sourceFile == fAsPreprocessedFile }
case _ => true
}
} else {
allSourceFiles
}
}

override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = {
val path = Paths.get(filename).toAbsolutePath
Expand Down
Original file line number Diff line number Diff line change
@@ -1,64 +1,109 @@
package io.joern.c2cpg.passes.ast

import io.joern.c2cpg.testfixtures.C2CpgSuite
import io.shiftleft.semanticcpg.language._
import io.joern.c2cpg.Config
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.types.structure.FileTraversal
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

class FileTests extends C2CpgSuite {

private val cpg = code("""
| int foo() {}
| int bar() {}
| struct my_struct { int x; };
|""".stripMargin)

"should contain the correct file nodes" in {
val List(fileTest, fileUnknown) = cpg.file.nameNot("<includes>").l
fileTest.name shouldBe "Test0.c"
fileTest.order shouldBe 0
fileUnknown.name shouldBe FileTraversal.UNKNOWN
fileUnknown.order shouldBe 0
}
"File test for single file" should {

"should contain exactly one placeholder file node with `name=\"<unknown>\"/order=0`" in {
cpg.file(FileTraversal.UNKNOWN).order.l shouldBe List(0)
cpg.file(FileTraversal.UNKNOWN).hash.l shouldBe List()
}
val cpg = code("""
| int foo() {}
| int bar() {}
| struct my_struct { int x; };
|""".stripMargin)

"should allow traversing from file to its namespace blocks" in {
cpg.file.nameNot(FileTraversal.UNKNOWN).namespaceBlock.name.toSetMutable shouldBe Set(
NamespaceTraversal.globalNamespaceName
)
}
"contain the correct file nodes" in {
val List(fileTest, fileUnknown) = cpg.file.nameNot("<includes>").l
fileTest.name shouldBe "Test0.c"
fileTest.order shouldBe 0
fileUnknown.name shouldBe FileTraversal.UNKNOWN
fileUnknown.order shouldBe 0
}

"contain exactly one placeholder file node with `name=\"<unknown>\"/order=0`" in {
cpg.file(FileTraversal.UNKNOWN).order.l shouldBe List(0)
cpg.file(FileTraversal.UNKNOWN).hash.l shouldBe List()
}

"allow traversing from file to its namespace blocks" in {
cpg.file.nameNot(FileTraversal.UNKNOWN).namespaceBlock.name.toSetMutable shouldBe Set(
NamespaceTraversal.globalNamespaceName
)
}

"allow traversing from file to its methods via namespace block" in {
cpg.file.nameNot(FileTraversal.UNKNOWN).method.name.toSetMutable shouldBe Set(
NamespaceTraversal.globalNamespaceName,
"foo",
"bar"
)
}

"allow traversing from file to its type declarations via namespace block" in {
cpg.file
.nameNot(FileTraversal.UNKNOWN)
.typeDecl
.nameNot(NamespaceTraversal.globalNamespaceName)
.name
.l
.sorted shouldBe List("ANY", "int", "my_struct", "void")
}

"should allow traversing from file to its methods via namespace block" in {
cpg.file.nameNot(FileTraversal.UNKNOWN).method.name.toSetMutable shouldBe Set(
NamespaceTraversal.globalNamespaceName,
"foo",
"bar"
)
"allow traversing to namespaces" in {
val List(ns1, ns2, ns3) = cpg.file.namespaceBlock.l
ns1.filename shouldBe "Test0.c"
ns1.fullName shouldBe "Test0.c:<global>"
ns2.filename shouldBe "<includes>"
ns2.fullName shouldBe "<includes>:<global>"
ns3.filename shouldBe "<unknown>"
ns3.fullName shouldBe "<global>"
cpg.file.namespace.name(NamespaceTraversal.globalNamespaceName).l.size shouldBe 3
}
}

"should allow traversing from file to its type declarations via namespace block" in {
cpg.file
.nameNot(FileTraversal.UNKNOWN)
.typeDecl
.nameNot(NamespaceTraversal.globalNamespaceName)
.name
.l
.sorted shouldBe List("ANY", "int", "my_struct", "void")
"File test for multiple source files and preprocessed files" should {

val cpg = code("int foo() {}", "main.c")
.moreCode("int bar() {}", "main.cpp")
.moreCode("int foo() {}", "main.i")
.moreCode("int baz() {}", "main.h")
.moreCode("int other() {}", "other.h")
.moreCode("int other() {}", "other.i")
.withConfig(Config(withPreprocessedFiles = true))

"contain the correct file nodes" in {
cpg.method.nameNot("<global>").internal.name.sorted.l shouldBe List("foo", "other")
cpg.file.nameNot("<includes>", "<unknown>").name.sorted.l shouldBe List("main.i", "other.i")
}

}

"should allow traversing to namespaces" in {
val List(ns1, ns2, ns3) = cpg.file.namespaceBlock.l
ns1.filename shouldBe "Test0.c"
ns1.fullName shouldBe "Test0.c:<global>"
ns2.filename shouldBe "<includes>"
ns2.fullName shouldBe "<includes>:<global>"
ns3.filename shouldBe "<unknown>"
ns3.fullName shouldBe "<global>"
cpg.file.namespace.name(NamespaceTraversal.globalNamespaceName).l.size shouldBe 3
"File test for preprocessed files from C and CPP files" should {

val cpg = code(
"""
|# 1 "a.c" 1
|int bar() {}
|""".stripMargin,
"a.i"
).moreCode(
"""
|# 1 "b.cpp" 1
|class B {};
|""".stripMargin,
"b.i"
).withConfig(Config(withPreprocessedFiles = true))

"be parsed correctly" in {
cpg.file.nameNot("<includes>", "<unknown>").name.sorted.l shouldBe List("a.i", "b.i")
cpg.method.nameExact("bar").file.name.l shouldBe List("a.i")
cpg.typeDecl.nameExact("B").file.name.l shouldBe List("b.i")
}

}

}

0 comments on commit 1b252c9

Please sign in to comment.