Skip to content

Commit

Permalink
Graph Read Benchmarks (#257)
Browse files Browse the repository at this point in the history
* Implemented read benchmarks for ODB

* Implemented read benchmarks for TinkerGraph

* Fixed infinity ops issue

* Process commit

* Draft Neo4j implemented

* Neo4j working & started benchmarking SBT task

* Running benchmarking via SBT and Scala Scripts + updated readme

* Added full dataset

* Pushed other drivers to benchmarking set
  • Loading branch information
DavidBakerEffendi authored Jul 15, 2024
1 parent 5c97e7f commit 8e904e0
Show file tree
Hide file tree
Showing 13 changed files with 834 additions and 115 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ graph.xml
gsql_client.*
*.txt
*.csv
/workspace
/results

# Ignore Gradle GUI config
gradle-app.setting
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ database backends. While the binary explains the available functions, the execut
Jmh/runMain com.github.plume.oss.Benchmark overflowdb testprogram -o output -r results --storage-location test.cpg
```

An automated script to run the benchmarks versus programs from the `defects4j` dataset is available under
`runBenchmarks.sc`, which can be executed with:
```bash
scala runBenchmarks.sc
```

## Logging

Plume uses [SLF4J](http://www.slf4j.org/) as the logging fascade.
Expand Down
40 changes: 38 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ libraryDependencies ++= Seq(
"org.openjdk.jmh" % "jmh-generator-reflection" % Versions.jmh,
"org.openjdk.jmh" % "jmh-generator-asm" % Versions.jmh,
"org.slf4j" % "slf4j-api" % Versions.slf4j,
"org.apache.logging.log4j" % "log4j-core" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-core" % Versions.log4j % Test,
"org.apache.logging.log4j" % "log4j-slf4j-impl" % Versions.log4j % Test,
"org.scalatest" %% "scalatest" % Versions.scalatest % Test
)

Expand All @@ -64,3 +64,39 @@ developers := List(
Global / onChangedBuildSource := ReloadOnSourceChanges

publishMavenStyle := true

// Benchmark Tasks

lazy val datasetDir = taskKey[File]("Dataset directory")
datasetDir := baseDirectory.value / "workspace" / "defects4j"
lazy val driversToBenchmark = taskKey[Seq[String]]("Drivers to benchmark")
driversToBenchmark := Seq("overflowdb", "tinkergraph", "neo4j-embedded")

lazy val defect4jDataset = taskKey[Seq[(String, String)]]("JARs for projects used in `defects4j`")
defect4jDataset :=
Seq(
"Chart" -> "https://repo1.maven.org/maven2/org/jfree/jfreechart/1.5.5/jfreechart-1.5.5.jar",
"Cli" -> "https://repo1.maven.org/maven2/commons-cli/commons-cli/1.8.0/commons-cli-1.8.0.jar",
"Closure" -> "https://repo1.maven.org/maven2/com/google/javascript/closure-compiler/v20240317/closure-compiler-v20240317.jar",
"Codec" -> "https://repo1.maven.org/maven2/commons-codec/commons-codec/1.17.0/commons-codec-1.17.0.jar",
"Collections" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-collections4/4.4/commons-collections4-4.4.jar",
"Compress" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-compress/1.26.2/commons-compress-1.26.2.jar",
"Csv" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-csv/1.11.0/commons-csv-1.11.0.jar",
"Gson" -> "https://repo1.maven.org/maven2/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar",
"JacksonCore" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.17.2/jackson-core-2.17.2.jar",
"JacksonDatabind" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.17.2/jackson-databind-2.17.2.jar",
"JacksonXml" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/dataformat/jackson-dataformat-xml/2.17.2/jackson-dataformat-xml-2.17.2.jar",
"Jsoup" -> "https://repo1.maven.org/maven2/org/jsoup/jsoup/1.18.1/jsoup-1.18.1.jar",
"JxPath" -> "https://repo1.maven.org/maven2/commons-jxpath/commons-jxpath/1.3/commons-jxpath-1.3.jar",
"Lang" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.14.0/commons-lang3-3.14.0.jar",
"Math" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar",
"Mockito" -> "https://repo1.maven.org/maven2/org/mockito/mockito-core/5.12.0/mockito-core-5.12.0.jar",
"Time" -> "https://repo1.maven.org/maven2/joda-time/joda-time/2.12.7/joda-time-2.12.7.jar"
)

lazy val benchmarkDownloadTask = taskKey[Unit](s"Download `defects4j` candidates for benchmarking")
benchmarkDownloadTask := {
defect4jDataset.value.foreach { case (name, url) =>
DownloadHelper.ensureIsAvailable(url, datasetDir.value / s"$name.jar")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ abstract class GremlinDriver(txMax: Int = 50) extends IDriver {
* @return
* a Gremlin graph traversal source.
*/
protected def g(): GraphTraversalSource = {
def g(): GraphTraversalSource = {
traversalSource match {
case Some(conn) => conn
case None =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import com.github.plume.oss.util.BatchedUpdateUtil.*
import io.shiftleft.codepropertygraph.generated.nodes.StoredNode
import org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME
import org.neo4j.dbms.api.{DatabaseManagementService, DatabaseManagementServiceBuilder}
import org.neo4j.graphdb.{Label, Transaction}
import org.neo4j.graphdb.{GraphDatabaseService, Label, Transaction}
import org.slf4j.LoggerFactory
import overflowdb.BatchedUpdate.{CreateEdge, DiffOrBuilder, SetNodeProperty}
import overflowdb.{BatchedUpdate, DetachedNodeData}
Expand Down Expand Up @@ -41,6 +41,8 @@ final class Neo4jEmbeddedDriver(
})
}

def graph: GraphDatabaseService = graphDb

private def connect(): Unit = {
managementService = new DatabaseManagementServiceBuilder(databaseDir.path).build()
graphDb = managementService.database(databaseName)
Expand Down
48 changes: 48 additions & 0 deletions project/DownloadHelper.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import java.io.File
import java.net.URI
import java.nio.file.{Files, Path, Paths}

object DownloadHelper {
val LocalStorageDir = Paths.get(".local/source-urls")

/** Downloads the remote file from the given url if either
* - the localFile is not available,
* - or the url is different from the previously downloaded file
* - or we don't have the original url from the previously downloaded file
* We store the information about the previously downloaded urls and the localFile in `.local`
*/
def ensureIsAvailable(url: String, localFile: File): Unit = {
if (!localFile.exists() || Option(url) != previousUrlForLocalFile(localFile)) {
val localPath = localFile.toPath
Files.deleteIfExists(localPath)

println(s"[INFO] downloading $url to $localFile")
sbt.io.Using.urlInputStream(new URI(url).toURL) { inputStream =>
sbt.IO.transfer(inputStream, localFile)
}

// persist url in local storage
val storageFile = storageInfoFileFor(localFile)
Files.createDirectories(storageFile.getParent)
Files.writeString(storageFile, url)
}
}

private def relativePathToProjectRoot(path: Path): String =
Paths
.get("")
.toAbsolutePath
.normalize()
.relativize(path.toAbsolutePath)
.toString

private def previousUrlForLocalFile(localFile: File): Option[String] = {
Option(storageInfoFileFor(localFile))
.filter(Files.exists(_))
.map(Files.readString)
.filter(_.nonEmpty)
}

private def storageInfoFileFor(localFile: File): Path =
LocalStorageDir.resolve(relativePathToProjectRoot(localFile.toPath))
}
38 changes: 38 additions & 0 deletions runBenchmarks.sc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import scala.sys.process.*
import java.nio.file.Files
import java.nio.file.Path
import scala.jdk.CollectionConverters.*

@main def main(): Unit = {
println("[info] Ensuring compilation status and benchmark dataset availability...")
"sbt compile benchmarkDownloadTask".!

val datasetDir = Path.of("workspace", "defects4j")
val resultsDir = Path.of("results")

if (!Files.exists(resultsDir)) Files.createDirectory(resultsDir)

def benchmarkArgs(driver: String, project: String): String = {
val projectDir = Path.of(datasetDir.toString, project)
val projectName = project.toLowerCase.stripSuffix(".jar")
val resultsPath = Path.of(resultsDir.toString, s"results-$driver-$projectName")
val outputPath = Path.of(resultsDir.toString, s"output-$driver-$projectName")
s"Jmh/runMain com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.toAbsolutePath} -r ${resultsPath.toAbsolutePath}"
}

println("[info] Available projects:")
val projects = Files.list(datasetDir).filter(_.toString.endsWith(".jar")).toList.asScala.toList
projects.foreach(p => println(s" - ${p.getFileName.toString}"))

println("[info] Available drivers:")
val drivers = Seq("overflowdb")
drivers.foreach(d => println(s" - $d"))

drivers.foreach { driver =>
projects.foreach { project =>
val cmd = benchmarkArgs(driver, project.getFileName.toString)
println(s"[info] Benchmarking '$driver' on project '$project'")
s"sbt \"$cmd\"".!
}
}
}
178 changes: 67 additions & 111 deletions src/main/scala/com/github/plume/oss/Benchmark.scala
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
package com.github.plume.oss

import com.github.plume.oss.Benchmark.BenchmarkType.WRITE
import com.github.plume.oss.drivers.IDriver
import io.joern.jimple2cpg.Config
import better.files.File
import com.github.plume.oss.Benchmark.BenchmarkType.*
import com.github.plume.oss.benchmarking.{
GraphWriteBenchmark,
Neo4jEmbedReadBenchmark,
OverflowDbReadBenchmark,
TinkerGraphReadBenchmark
}
import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver}
import org.cache2k.benchmark.jmh.ForcedGcMemoryProfiler
import org.openjdk.jmh.annotations.{Benchmark, Level, Mode, Param, Scope, Setup, State, TearDown}
import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}
import org.openjdk.jmh.annotations.Mode
import org.openjdk.jmh.runner.Runner
import org.openjdk.jmh.runner.options.{ChainedOptionsBuilder, OptionsBuilder, TimeValue}
import upickle.default.*

import java.util
import java.util.concurrent.TimeUnit
import scala.compiletime.uninitialized

object Benchmark {

Expand All @@ -28,13 +33,36 @@ object Benchmark {
s"Finished WRITE JMH benchmarks. Results: ${config.jmhResultFile}-WRITE.csv; Output: ${config.jmhOutputFile}-WRITE.csv"
)

// val readOptsBenchmark = createOptionsBoilerPlate(config, READ)
// .include(classOf[OverflowDbBenchmark].getSimpleName)
// .build()
// new Runner(readOptsBenchmark).run()
// println(
// s"Finished READ JMH benchmarks. Results: ${config.jmhResultFile}-READ.csv; Output: ${config.jmhOutputFile}-READ.csv"
// )
val readOptsBenchmark = config.dbConfig match {
case _: TinkerGraphConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[TinkerGraphReadBenchmark].getSimpleName)
.build()
)
case _: OverflowDbConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[OverflowDbReadBenchmark].getSimpleName)
.build()
)
case _: Neo4jEmbeddedConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[Neo4jEmbedReadBenchmark].getSimpleName)
.build()
)
case x =>
println(s"Read benchmarks are not available for ${x.getClass.getSimpleName}, skipping...")
Option.empty
}
readOptsBenchmark.foreach { opts =>
new Runner(opts).run()
println(
s"Finished READ JMH benchmarks. Results: ${config.jmhResultFile}-READ.csv; Output: ${config.jmhOutputFile}-READ.csv"
)
}

}
}

Expand All @@ -58,105 +86,33 @@ object Benchmark {
case READ, WRITE
}

}

@State(Scope.Benchmark)
class GraphWriteBenchmark {

@Param(Array(""))
var configStr: String = ""
var config: PlumeConfig =
if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
var driver: IDriver = uninitialized

@Setup
def setupBenchmark(params: BenchmarkParams): Unit = {
config = if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
driver = config.dbConfig.toDriver
}

@Setup(Level.Iteration)
def clearDriver(params: BenchmarkParams): Unit = {
driver.clear()
}

@Benchmark
def createAst(blackhole: Blackhole): Unit = {
JimpleAst2Database(driver).createAst(Config().withInputPath(config.inputDir))
Option(blackhole).foreach(_.consume(driver))
}

@TearDown
def cleanupBenchmark(): Unit = {
driver.clear()
driver.close()
}

}

sealed trait GraphReadBenchmark[D <: IDriver](protected val driver: D) {

private var nodeStart: Array[Long] = new Array[Long](0)
private var fullNames: Array[String] = uninitialized
def initializeDriverAndInputDir(configStr: String, useCachedGraph: Boolean): (IDriver, PlumeConfig) = {
val config = if (!configStr.isBlank) read[PlumeConfig](configStr) else PlumeConfig()
if (!useCachedGraph) {
config.dbConfig match {
case OverflowDbConfig(storageLocation, _, _) if !useCachedGraph =>
File(storageLocation).delete(swallowIOExceptions = true)
case TinkerGraphConfig(Some(importPath), _) if !useCachedGraph =>
File(importPath).delete(swallowIOExceptions = true)
case Neo4jEmbeddedConfig(_, databaseDir, _) if !useCachedGraph =>
File(databaseDir).delete(swallowIOExceptions = true)
case _ =>
}
}

val driver = if (useCachedGraph) {
config.dbConfig match {
case TinkerGraphConfig(Some(importPath), _) if File(importPath).exists =>
val driver = config.dbConfig.toDriver.asInstanceOf[TinkerGraphDriver]
driver.importGraph(importPath)
driver
case _ => config.dbConfig.toDriver
}
} else {
config.dbConfig.toDriver
}

@Setup
def setupFun(params: BenchmarkParams): Unit = {
params.getBenchmark
driver -> config
}

@Benchmark
def astDFS(blackhole: Blackhole): Int

@Benchmark
def astUp(blackhole: Blackhole): Int

@Benchmark
def orderSumChecked(blackhole: Blackhole): Int

@Benchmark
def orderSumUnchecked(blackhole: Blackhole): Int

@Benchmark
def orderSumExplicit(blackhole: Blackhole): Int

@Benchmark
def callOrderTrav(blackhole: Blackhole): Int

@Benchmark
def callOrderExplicit(blackhole: Blackhole): Int

@Benchmark
def indexedMethodFullName(bh: Blackhole): Unit

@Benchmark
def unindexedMethodFullName(bh: Blackhole): Unit

}

//@State(Scope.Benchmark)
//class OverflowDbBenchmark(config: OverflowDbConfig)
// extends GraphReadBenchmark(
// ) {
//
// override def createAst(blackhole: Blackhole): Int = {
// 0
// }
//
// override def astDFS(blackhole: Blackhole): Int = ???
//
// override def astUp(blackhole: Blackhole): Int = ???
//
// override def orderSumChecked(blackhole: Blackhole): Int = ???
//
// override def orderSumUnchecked(blackhole: Blackhole): Int = ???
//
// override def orderSumExplicit(blackhole: Blackhole): Int = ???
//
// override def callOrderTrav(blackhole: Blackhole): Int = ???
//
// override def callOrderExplicit(blackhole: Blackhole): Int = ???
//
// override def indexedMethodFullName(bh: Blackhole): Unit = ???
//
// override def unindexedMethodFullName(bh: Blackhole): Unit = ???
//}
Loading

0 comments on commit 8e904e0

Please sign in to comment.