Skip to content

Commit

Permalink
[ruby] Download & Summarize Dependencies (joernio#4327)
Browse files Browse the repository at this point in the history
* Unified the download dependency option as a trait under `X2Cpg` to be readily added to configurations
* Dependencies are parsed from  `Gemfile` and `Gemfile.lock`
* Downloads, unarchives, and parses dependencies for symbol information
* Library symbols successfully resolved against `RubyScope` and the `RubyPorgramSummary` classes
* Made sure all relative paths in `fullName` properties are Unix delimited
  • Loading branch information
DavidBakerEffendi authored Mar 13, 2024
1 parent e02af03 commit 7e81351
Show file tree
Hide file tree
Showing 18 changed files with 638 additions and 101 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,23 @@ import io.joern.csharpsrc2cpg.Frontend.{cmdLineParser, defaultConfig}
import io.joern.x2cpg.astgen.AstGenConfig
import io.joern.x2cpg.passes.frontend.{TypeRecoveryParserConfig, XTypeRecovery}
import io.joern.x2cpg.utils.Environment
import io.joern.x2cpg.{X2CpgConfig, X2CpgMain}
import io.joern.x2cpg.{DependencyDownloadConfig, X2CpgConfig, X2CpgMain}
import org.slf4j.LoggerFactory
import scopt.OParser

import java.nio.file.Paths

final case class Config(downloadDependencies: Boolean = false)
extends X2CpgConfig[Config]
with DependencyDownloadConfig[Config]
with TypeRecoveryParserConfig[Config]
with AstGenConfig[Config] {

override val astGenProgramName: String = "dotnetastgen"
override val astGenConfigPrefix: String = "csharpsrc2cpg"

def withDownloadDependencies(value: Boolean): Config = {
this.copy(downloadDependencies = value).withInheritedFields(this)
override def withDownloadDependencies(value: Boolean): Config = {
copy(downloadDependencies = value).withInheritedFields(this)
}

}
Expand All @@ -30,13 +31,7 @@ object Frontend {
val cmdLineParser: OParser[Unit, Config] = {
val builder = OParser.builder[Config]
import builder.*
OParser.sequence(
programName("csharpsrc2cpg"),
opt[Unit]("download-dependencies")
.text("Download the dependencies of the target project and use their symbols to resolve types.")
.action((_, c) => c.withDownloadDependencies(true)),
XTypeRecovery.parserOptions
)
OParser.sequence(programName("csharpsrc2cpg"), DependencyDownloadConfig.parserOptions, XTypeRecovery.parserOptions)
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ class DependencyDownloader(cpg: Cpg, config: Config, internalProgramSummary: CSh
s"Exception occurred while downloading $fileName (${dependency.name}:${dependency.version})",
exception
)
None
case Success(_) =>
logger.info(s"Successfully downloaded dependency ${dependency.name}:${dependency.version}")
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
package io.joern.kotlin2cpg

import io.joern.kotlin2cpg.Frontend._
import io.joern.x2cpg.{X2CpgConfig, X2CpgMain}
import io.joern.kotlin2cpg.Frontend.*
import io.joern.x2cpg.{DependencyDownloadConfig, X2CpgConfig, X2CpgMain}
import scopt.OParser

case class DefaultContentRootJarPath(path: String, isResource: Boolean)

final case class Config(
classpath: Set[String] = Set.empty,
withStdlibJarsInClassPath: Boolean = true,
downloadDependencies: Boolean = false,
gradleProjectName: Option[String] = None,
gradleConfigurationName: Option[String] = None,
jar4importServiceUrl: Option[String] = None,
includeJavaSourceFiles: Boolean = false,
generateNodesForDependencies: Boolean = false
) extends X2CpgConfig[Config] {
generateNodesForDependencies: Boolean = false,
downloadDependencies: Boolean = false
) extends X2CpgConfig[Config]
with DependencyDownloadConfig[Config] {

def withClasspath(classpath: Set[String]): Config = {
this.copy(classpath = classpath).withInheritedFields(this)
Expand All @@ -25,10 +26,6 @@ final case class Config(
this.copy(withStdlibJarsInClassPath = value).withInheritedFields(this)
}

def withDownloadDependencies(value: Boolean): Config = {
this.copy(downloadDependencies = value).withInheritedFields(this)
}

def withGradleProjectName(name: String): Config = {
this.copy(gradleProjectName = Some(name)).withInheritedFields(this)
}
Expand All @@ -48,6 +45,10 @@ final case class Config(
def withGenerateNodesForDependencies(value: Boolean): Config = {
this.copy(generateNodesForDependencies = value).withInheritedFields(this)
}

override def withDownloadDependencies(value: Boolean): Config = {
this.copy(downloadDependencies = value).withInheritedFields(this)
}
}

private object Frontend {
Expand All @@ -69,9 +70,6 @@ private object Frontend {
opt[String]("jar4import-url")
.text("Set URL of service which fetches necessary dependency jars for import names found in the project")
.action((value, c) => c.withJar4ImportServiceUrl(value)),
opt[Unit]("download-dependencies")
.text("Download the dependencies of the target project and add them to the classpath")
.action((_, c) => c.withDownloadDependencies(true)),
opt[String]("gradle-project-name")
.text("Name of the Gradle project used to download dependencies")
.action((value, c) => c.withGradleProjectName(value)),
Expand All @@ -83,7 +81,8 @@ private object Frontend {
.action((_, c) => c.withIncludeJavaSourceFiles(true)),
opt[Unit]("generate-nodes-for-dependencies")
.text("Generate nodes for the dependencies of the target project")
.action((_, c) => c.withGenerateNodesForDependencies(true))
.action((_, c) => c.withGenerateNodesForDependencies(true)),
DependencyDownloadConfig.parserOptions
)
}
}
Expand Down
1 change: 1 addition & 0 deletions joern-cli/frontends/rubysrc2cpg/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ dependsOn(Projects.dataflowengineoss % "compile->compile;test->test", Projects.x

libraryDependencies ++= Seq(
"io.shiftleft" %% "codepropertygraph" % Versions.cpg,
"org.apache.commons" % "commons-compress" % "1.26.1", // For unpacking Gems with `--download-dependencies`
"org.scalatest" %% "scalatest" % Versions.scalatest % Test,
"org.antlr" % "antlr4-runtime" % Versions.antlr
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,32 @@ package io.joern.rubysrc2cpg

import io.joern.rubysrc2cpg.Frontend.*
import io.joern.x2cpg.passes.frontend.{TypeRecoveryParserConfig, XTypeRecovery}
import io.joern.x2cpg.{X2CpgConfig, X2CpgMain}
import io.joern.x2cpg.{DependencyDownloadConfig, X2CpgConfig, X2CpgMain}
import scopt.OParser

final case class Config(
enableDependencyDownload: Boolean = false,
antlrCacheMemLimit: Double = 0.6d,
useDeprecatedFrontend: Boolean = false
useDeprecatedFrontend: Boolean = false,
downloadDependencies: Boolean = false
) extends X2CpgConfig[Config]
with DependencyDownloadConfig[Config]
with TypeRecoveryParserConfig[Config] {

this.defaultIgnoredFilesRegex = List("spec", "test").flatMap { directory =>
List(s"(^|\\\\)$directory($$|\\\\)".r.unanchored, s"(^|/)$directory($$|/)".r.unanchored)
}

def withEnableDependencyDownload(value: Boolean): Config = {
copy(enableDependencyDownload = value).withInheritedFields(this)
}

def withAntlrCacheMemoryLimit(value: Double): Config = {
copy(antlrCacheMemLimit = value).withInheritedFields(this)
}

def withUseDeprecatedFrontend(value: Boolean): Config = {
copy(useDeprecatedFrontend = value).withInheritedFields(this)
}

override def withDownloadDependencies(value: Boolean): Config = {
copy(downloadDependencies = value).withInheritedFields(this)
}
}

private object Frontend {
Expand All @@ -38,10 +39,6 @@ private object Frontend {
import builder.*
OParser.sequence(
programName("rubysrc2cpg"),
opt[Unit]("enableDependencyDownload")
.hidden()
.action((_, c) => c.withEnableDependencyDownload(true))
.text("enable dependency download for Unix System only"),
opt[Double]("antlrCacheMemLimit")
.hidden()
.action((x, c) => c.withAntlrCacheMemoryLimit(x))
Expand All @@ -57,6 +54,7 @@ private object Frontend {
opt[Unit]("useDeprecatedFrontend")
.action((_, c) => c.withUseDeprecatedFrontend(true))
.text("uses the original (but deprecated) Ruby frontend (default false)"),
DependencyDownloadConfig.parserOptions,
XTypeRecovery.parserOptions
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ package io.joern.rubysrc2cpg

import better.files.File
import io.joern.rubysrc2cpg.astcreation.AstCreator
import io.joern.rubysrc2cpg.astcreation.GlobalTypes
import io.joern.rubysrc2cpg.datastructures.RubyProgramSummary
import io.joern.rubysrc2cpg.deprecated.parser.DeprecatedRubyParser
import io.joern.rubysrc2cpg.deprecated.parser.DeprecatedRubyParser.*
import io.joern.rubysrc2cpg.parser.RubyParser
import io.joern.rubysrc2cpg.passes.{AstCreationPass, ConfigFileCreationPass}
import io.joern.rubysrc2cpg.passes.{AstCreationPass, ConfigFileCreationPass, DependencyPass, ImportsPass}
import io.joern.rubysrc2cpg.utils.DependencyDownloader
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.passes.base.AstLinkerPass
import io.joern.x2cpg.passes.callgraph.NaiveCallLinker
Expand All @@ -23,17 +23,16 @@ import org.slf4j.LoggerFactory
import java.nio.file.{Files, Paths}
import scala.util.matching.Regex
import scala.util.{Failure, Success, Try, Using}
import io.joern.rubysrc2cpg.passes.ImportsPass

class RubySrc2Cpg extends X2CpgFrontend[Config] {

private val logger = LoggerFactory.getLogger(this.getClass)
private val RubySourceFileExtensions: Set[String] = Set(".rb")
private val logger = LoggerFactory.getLogger(this.getClass)

override def createCpg(config: Config): Try[Cpg] = {
withNewEmptyCpg(config.outputPath, config: Config) { (cpg, config) =>
new MetaDataPass(cpg, Languages.RUBYSRC, config.inputPath).createAndApply()
new ConfigFileCreationPass(cpg).createAndApply()
new DependencyPass(cpg).createAndApply()
if (config.useDeprecatedFrontend) {
deprecatedCreateCpgAction(cpg, config)
} else {
Expand All @@ -44,22 +43,28 @@ class RubySrc2Cpg extends X2CpgFrontend[Config] {

private def newCreateCpgAction(cpg: Cpg, config: Config): Unit = {
Using.resource(new parser.ResourceManagedParser(config.antlrCacheMemLimit)) { parser =>
// TODO: enableDependencyDownload
val astCreators = ConcurrentTaskUtil
.runUsingThreadPool(generateParserTasks(parser, config, cpg.metaData.root.headOption))
.runUsingThreadPool(RubySrc2Cpg.generateParserTasks(parser, config, cpg.metaData.root.headOption))
.flatMap {
case Failure(exception) => logger.warn(s"Could not parse file, skipping - ", exception); None
case Success(astCreator) => Option(astCreator)
}
// Pre-parse the AST creators for high level structures
val programSummary = ConcurrentTaskUtil
val internalProgramSummary = ConcurrentTaskUtil
.runUsingThreadPool(astCreators.map(x => () => x.summarize()).iterator)
.flatMap {
case Failure(exception) => logger.warn(s"Unable to pre-parse Ruby file, skipping - ", exception); None
case Success(summary) => Option(summary)
}
.reduceOption((a, b) => a ++ b)
.getOrElse(RubyProgramSummary())

val programSummary = if (config.downloadDependencies) {
DependencyDownloader(cpg, internalProgramSummary).download()
} else {
internalProgramSummary
}

val astCreationPass = new AstCreationPass(cpg, astCreators.map(_.withSummary(programSummary)))
astCreationPass.createAndApply()
val importsPass = new ImportsPass(cpg)
Expand All @@ -68,31 +73,9 @@ class RubySrc2Cpg extends X2CpgFrontend[Config] {
}
}

private def generateParserTasks(
resourceManagedParser: parser.ResourceManagedParser,
config: Config,
projectRoot: Option[String]
): Iterator[() => AstCreator] = {
SourceFiles
.determine(
config.inputPath,
RubySourceFileExtensions,
ignoredDefaultRegex = Option(config.defaultIgnoredFilesRegex),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.map { fileName => () =>
resourceManagedParser.parse(fileName) match {
case Failure(exception) => throw exception
case Success(ctx) => new AstCreator(fileName, ctx, projectRoot)(config.schemaValidation)
}
}
.iterator
}

private def deprecatedCreateCpgAction(cpg: Cpg, config: Config): Unit = try {
Using.resource(new deprecated.astcreation.ResourceManagedParser(config.antlrCacheMemLimit)) { parser =>
if (config.enableDependencyDownload && !scala.util.Properties.isWin) {
if (config.downloadDependencies && !scala.util.Properties.isWin) {
val tempDir = File.newTemporaryDirectory()
try {
downloadDependency(config.inputPath, tempDir.toString())
Expand All @@ -111,7 +94,7 @@ class RubySrc2Cpg extends X2CpgFrontend[Config] {
val tasks = SourceFiles
.determine(
config.inputPath,
RubySourceFileExtensions,
RubySrc2Cpg.RubySourceFileExtensions,
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
Expand Down Expand Up @@ -158,7 +141,8 @@ class RubySrc2Cpg extends X2CpgFrontend[Config] {
object RubySrc2Cpg {

// TODO: Global mutable state is bad and should be avoided in the next iteration of the Ruby frontend
val packageTableInfo = new deprecated.utils.PackageTable()
val packageTableInfo = new deprecated.utils.PackageTable()
private val RubySourceFileExtensions: Set[String] = Set(".rb")

def postProcessingPasses(cpg: Cpg, config: Config): List[CpgPassBase] = {
if (config.useDeprecatedFrontend) {
Expand All @@ -176,4 +160,26 @@ object RubySrc2Cpg {
}
}

def generateParserTasks(
resourceManagedParser: parser.ResourceManagedParser,
config: Config,
projectRoot: Option[String]
): Iterator[() => AstCreator] = {
SourceFiles
.determine(
config.inputPath,
RubySourceFileExtensions,
ignoredDefaultRegex = Option(config.defaultIgnoredFilesRegex),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.map { fileName => () =>
resourceManagedParser.parse(fileName) match {
case Failure(exception) => throw exception
case Success(ctx) => new AstCreator(fileName, ctx, projectRoot)(config.schemaValidation)
}
}
.iterator
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import org.slf4j.{Logger, LoggerFactory}
import overflowdb.BatchedUpdate
import overflowdb.BatchedUpdate.DiffGraphBuilder

import java.util.regex.Matcher

class AstCreator(
val fileName: String,
protected val programCtx: RubyParser.ProgramContext,
Expand All @@ -38,7 +40,15 @@ class AstCreator(
protected var parseLevel: AstParseLevel = AstParseLevel.FULL_AST

protected val relativeFileName: String =
projectRoot.map(fileName.stripPrefix).map(_.stripPrefix(java.io.File.separator)).getOrElse(fileName)
projectRoot
.map(fileName.stripPrefix)
.map(_.stripPrefix(java.io.File.separator))
.getOrElse(fileName)

/** The relative file name, in a unix path delimited format.
*/
private def relativeUnixStyleFileName =
relativeFileName.replaceAll(Matcher.quoteReplacement(java.io.File.separator), "/")

override def createAst(): BatchedUpdate.DiffGraphBuilder = {
val rootNode = new RubyNodeCreator().visit(programCtx).asInstanceOf[StatementList]
Expand All @@ -53,7 +63,7 @@ class AstCreator(
*/
protected def astForRubyFile(rootStatements: StatementList): Ast = {
val fileNode = NewFile().name(relativeFileName)
val fullName = s"$relativeFileName:${NamespaceTraversal.globalNamespaceName}"
val fullName = s"$relativeUnixStyleFileName:${NamespaceTraversal.globalNamespaceName}"
val namespaceBlock = NewNamespaceBlock()
.filename(relativeFileName)
.name(NamespaceTraversal.globalNamespaceName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) {
// If this is a simple object instantiation assignment, we can give the LHS variable a type hint
if (node.rhs.isInstanceOf[ObjectInstantiation] && lhsAst.root.exists(_.isInstanceOf[NewIdentifier])) {
rhsAst.nodes.collectFirst {
case tmp: NewIdentifier if tmp.name.startsWith("<tmp") =>
case tmp: NewIdentifier if tmp.name.startsWith("<tmp") && tmp.typeFullName != Defines.Any =>
lhsAst.root.collectFirst { case i: NewIdentifier =>
scope.lookupVariable(i.name).foreach {
case x: NewLocal =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import io.shiftleft.codepropertygraph.generated.nodes.{Local, Member, Method, Ty
import io.shiftleft.semanticcpg.language.*
import overflowdb.{BatchedUpdate, Config}

import java.io.File as JavaFile
import java.util.regex.Matcher
import scala.util.Using

trait AstSummaryVisitor(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>
Expand Down Expand Up @@ -67,7 +69,9 @@ trait AstSummaryVisitor(implicit withSchemaValidation: ValidationMode) { this: A

val mappings =
cpg.namespaceBlock.flatMap { namespace =>
val path = namespace.filename.stripSuffix(".rb")
val path = namespace.filename
.replaceAll(Matcher.quoteReplacement(JavaFile.separator), "/") // handle Windows paths
.stripSuffix(".rb")
// Map module functions/variables
val moduleEntry = (path, namespace.fullName) -> namespace.method.map { module =>
val moduleTypeMap =
Expand Down
Loading

0 comments on commit 7e81351

Please sign in to comment.