🔖 Release v1.2.3

plume-oss · Apr 5, 2022 · 19685b5 · 19685b5
1 parent cad90ee
commit 19685b5
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 116 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
+## [1.2.3] - 2022-04-05
+
+### Changed
+
+- Added ability to disable cache sharing on `flowsBetween`.
+- Simplified `EngineContext` and `Semantics` on `OverflowDbDriver` startup.
+- `OverflowDbDriver::methodSemantics` now public.
+
+## [1.2.2] - 2022-04-05
+
+### Changed
+
+- Upgraded Joern version to include configurations that disable cache sharing.
+
 ## [1.2.1] - 2022-04-01
 
 ### Changed

diff --git a/build.sbt b/build.sbt
@@ -3,7 +3,7 @@ name := "Plume"
 inThisBuild(
   List(
     organization := "com.github.plume-oss",
-    version := "1.2.2",
+    version := "1.2.3",
     scalaVersion := "2.13.8",
     crossScalaVersions := Seq("2.13.8", "3.1.1"),
     resolvers ++= Seq(

diff --git a/src/main/scala/com/github/plume/oss/Plume.scala b/src/main/scala/com/github/plume/oss/Plume.scala
@@ -1,14 +1,8 @@
 package com.github.plume.oss
 
 import better.files.File
-import com.github.plume.oss.drivers.{
-  IDriver,
-  Neo4jDriver,
-  NeptuneDriver,
-  OverflowDbDriver,
-  TigerGraphDriver,
-  TinkerGraphDriver
-}
+import com.github.plume.oss.drivers._
+import com.github.plume.oss.util.DataFlowCacheConfig
 import io.circe.Json
 import io.joern.x2cpg.{X2Cpg, X2CpgConfig}
 import scopt.OParser
@@ -60,14 +54,14 @@ object Plume extends App {
   private def createDriver(conf: DriverConfig): IDriver = {
     conf match {
       case _ if conf.database == "OverflowDB" =>
-        val d = new OverflowDbDriver(
+        new OverflowDbDriver(
           storageLocation = Option(conf.params.getOrElse("storageLocation", "cpg.odb")),
           heapPercentageThreshold = conf.params.getOrElse("heapPercentageThreshold", "80").toInt,
           serializationStatsEnabled =
-            conf.params.getOrElse("serializationStatsEnabled", "false").toBoolean
+            conf.params.getOrElse("serializationStatsEnabled", "false").toBoolean,
+          cacheConfig =
+            DataFlowCacheConfig(maxCallDepth = conf.params.getOrElse("maxCallDepth", "2").toInt)
         )
-        d.setDataflowContext(conf.params.getOrElse("maxCallDepth", "2").toInt)
-        d
       case _ if conf.database == "TinkerGraph" => new TinkerGraphDriver()
       case _ if conf.database == "Neo4j" =>
         new Neo4jDriver(

diff --git a/src/main/scala/com/github/plume/oss/domain/package.scala b/src/main/scala/com/github/plume/oss/domain/package.scala
@@ -28,14 +28,6 @@ package object domain {
     .addModule(DefaultScalaModule)
     .build()
 
-  case class DataFlowCacheConfig(
-      dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")),
-      compressDataFlowCache: Boolean = true,
-      maxCallDepth: Int = 2,
-      maxCachedPaths: Int = 1_000,
-      shareCacheBetweenTasks: Boolean = false
-  )
-
   /** Given an object and a path, will serialize the object to the given path.
     * @param o object to serialize.
     * @param p path to write serialized data to.

diff --git a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala
@@ -5,6 +5,8 @@ import com.github.plume.oss.domain._
 import com.github.plume.oss.drivers.OverflowDbDriver.newOverflowGraph
 import com.github.plume.oss.passes.callgraph.PlumeDynamicCallLinker
 import com.github.plume.oss.util.BatchedUpdateUtil._
+import com.github.plume.oss.util.DataFlowCacheConfig
+import com.github.plume.oss.util.DataFlowEngineUtil.setDataflowContext
 import io.joern.dataflowengineoss.language.toExtendedCfgNode
 import io.joern.dataflowengineoss.queryengine._
 import io.joern.dataflowengineoss.semanticsloader.{Parser, Semantics}
@@ -58,10 +60,24 @@ final case class OverflowDbDriver(
   val cpg: Cpg =
     PlumeStatistics.time(PlumeStatistics.TIME_OPEN_DRIVER, { newOverflowGraph(odbConfig) })
 
-  private val semanticsParser = new Parser()
-  private val defaultSemantics: Try[BufferedSource] = Try(
-    Source.fromInputStream(getClass.getClassLoader.getResourceAsStream("default.semantics"))
-  )
+  private val defaultSemanticsFile = "default.semantics"
+  val methodSemantics: Semantics = cacheConfig.methodSemantics match {
+    case Some(semantics) => Semantics.fromList(semantics)
+    case None =>
+      logger.info("No specified method semantics file given. Using default semantics.")
+      Try(
+        Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(defaultSemanticsFile))
+      ) match {
+        case Failure(e) =>
+          logger.warn(
+            "No 'default.semantics' file found under resources - data flow tracking will over-taint.",
+            e
+          )
+          Semantics.fromList(List())
+        case Success(input: BufferedSource) =>
+          Semantics.fromList(new Parser().parse(input.getLines().mkString("\n")))
+      }
+  }
 
   /** Reads the saved cache on the disk and retrieves it as a serializable object
     */
@@ -85,16 +101,6 @@ final case class OverflowDbDriver(
     { deserializeResultTable(fetchCacheFromDisk, cpg) }
   )
 
-  private implicit var context: EngineContext =
-    EngineContext(
-      Semantics.fromList(List()),
-      EngineConfig(
-        maxCallDepth = cacheConfig.maxCallDepth,
-        initialTable = resultTable,
-        shareCacheBetweenTasks = cacheConfig.shareCacheBetweenTasks
-      )
-    )
-
   private def saveDataflowCache(): Unit = cacheConfig.dataFlowCacheFile match {
     case Some(filePath) if resultTable.isDefined && resultTable.get.table.nonEmpty =>
       PlumeStatistics.time(
@@ -112,59 +118,6 @@ final case class OverflowDbDriver(
     case _ => // Do nothing
   }
 
-  /** Sets the context for the data-flow engine when performing [[flowsBetween]] queries.
-    *
-    * @param maxCallDepth the new method call depth.
-    * @param methodSemantics the file containing method semantics for external methods.
-    * @param initialCache an initializer for the data-flow cache containing pre-calculated paths.
-    */
-  def setDataflowContext(
-      maxCallDepth: Int,
-      methodSemantics: Option[BufferedSource] = None,
-      initialCache: Option[ResultTable] = None,
-      shareCacheBetweenTasks: Boolean = false
-  ): EngineContext = {
-    val cache =
-      if (initialCache.isDefined) initialCache else resultTable
-
-    if (methodSemantics.isDefined) {
-      setDataflowContext(
-        maxCallDepth,
-        Semantics.fromList(semanticsParser.parse(methodSemantics.get.getLines().mkString("\n"))),
-        cache,
-        shareCacheBetweenTasks
-      )
-    } else if (defaultSemantics.isSuccess) {
-      logger.info(
-        "No specified method semantics file given. Using default semantics."
-      )
-      setDataflowContext(
-        maxCallDepth,
-        Semantics.fromList(semanticsParser.parse(defaultSemantics.get.getLines().mkString("\n"))),
-        cache,
-        shareCacheBetweenTasks
-      )
-    } else {
-      logger.warn(
-        "No \"default.semantics\" file found under resources - data flow tracking may not perform correctly."
-      )
-      setDataflowContext(maxCallDepth, Semantics.fromList(List()), cache, shareCacheBetweenTasks)
-    }
-  }
-
-  private def setDataflowContext(
-      maxCallDepth: Int,
-      methodSemantics: Semantics,
-      cache: Option[ResultTable],
-      shareCacheBetweenTasks: Boolean
-  ): EngineContext = {
-    context = EngineContext(
-      methodSemantics,
-      EngineConfig(maxCallDepth, cache, shareCacheBetweenTasks)
-    )
-    context
-  }
-
   override def isConnected: Boolean = !cpg.graph.isClosed
 
   override def close(): Unit = PlumeStatistics.time(
@@ -380,19 +333,21 @@ final case class OverflowDbDriver(
     * @param source the source query to match.
     * @param sink the sink query to match.
     * @param sanitizers a set of full method names to filter paths out with.
+    * @param noCacheSharing specifies if this run should not share cache results between tasks.
     * @return the source nodes whose data flows to the given sinks uninterrupted.
     */
   def flowsBetween(
       source: Traversal[CfgNode],
       sink: Traversal[CfgNode],
-      sanitizers: Set[String] = Set.empty[String]
+      sanitizers: Set[String] = Set.empty[String],
+      noCacheSharing: Boolean = false
   ): List[ReachableByResult] =
     PlumeStatistics.time(
       PlumeStatistics.TIME_REACHABLE_BY_QUERYING, {
         import io.shiftleft.semanticcpg.language._
 
-        prepareInitialTable()
-        val results: List[ReachableByResult] = sink.reachableByDetailed(source)(context)
+        val engineContext                    = prepareInitialTable(noCacheSharing)
+        val results: List[ReachableByResult] = sink.reachableByDetailed(source)(engineContext)
         captureDataflowCache(results)
 
         results
@@ -412,16 +367,16 @@ final case class OverflowDbDriver(
       }
     )
 
-  private def prepareInitialTable(): Unit = {
+  private def prepareInitialTable(noCacheSharing: Boolean): EngineContext = {
     cacheConfig.dataFlowCacheFile match {
       case Some(_) =>
         val oldCache = resultTable.getOrElse(new ResultTable)
         if (oldCache.table.map(_._2.size).sum <= cacheConfig.maxCachedPaths) {
           setDataflowContext(
-            context.config.maxCallDepth,
-            context.semantics,
+            cacheConfig.maxCallDepth,
+            methodSemantics,
             Some(oldCache),
-            cacheConfig.shareCacheBetweenTasks
+            shareCacheBetweenTasks = !noCacheSharing
           )
         } else {
           val newCache         = new ResultTable
@@ -439,13 +394,19 @@ final case class OverflowDbDriver(
           oldCache.table.clear()
           resultTable = Some(newCache)
           setDataflowContext(
-            context.config.maxCallDepth,
-            context.semantics,
+            cacheConfig.maxCallDepth,
+            methodSemantics,
             resultTable,
-            cacheConfig.shareCacheBetweenTasks
+            shareCacheBetweenTasks = !noCacheSharing
           )
         }
       case _ =>
+        setDataflowContext(
+          cacheConfig.maxCallDepth,
+          methodSemantics,
+          None,
+          shareCacheBetweenTasks = !noCacheSharing
+        )
     }
   }
 

diff --git a/src/main/scala/com/github/plume/oss/util/DataFlowEngineUtil.scala b/src/main/scala/com/github/plume/oss/util/DataFlowEngineUtil.scala
@@ -0,0 +1,40 @@
+package com.github.plume.oss.util
+
+import io.joern.dataflowengineoss.queryengine.{EngineConfig, EngineContext, ResultTable}
+import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics}
+
+import java.nio.file.{Path, Paths}
+
+/** Helper methods for setting up the data flow engine execution context.
+  */
+object DataFlowEngineUtil {
+
+  /** Sets the context for the data-flow engine when performing
+    * [[com.github.plume.oss.drivers.OverflowDbDriver.flowsBetween()]] queries.
+    *
+    * @param maxCallDepth the new method call depth.
+    * @param methodSemantics the file containing method semantics for external methods.
+    * @param initialCache an initializer for the data-flow cache containing pre-calculated paths.
+    * @param shareCacheBetweenTasks enables the sharing of cache between data flow tasks.
+    */
+  def setDataflowContext(
+      maxCallDepth: Int,
+      methodSemantics: Semantics,
+      initialCache: Option[ResultTable],
+      shareCacheBetweenTasks: Boolean
+  ): EngineContext = {
+    EngineContext(
+      methodSemantics,
+      EngineConfig(maxCallDepth, initialCache, shareCacheBetweenTasks)
+    )
+  }
+}
+
+case class DataFlowCacheConfig(
+    methodSemantics: Option[List[FlowSemantic]] = None,
+    dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")),
+    compressDataFlowCache: Boolean = true,
+    maxCallDepth: Int = 2,
+    maxCachedPaths: Int = 1_000,
+    shareCacheBetweenTasks: Boolean = false
+)
diff --git a/src/test/scala/com/github/plume/oss/drivers/OverflowDbTests.scala b/src/test/scala/com/github/plume/oss/drivers/OverflowDbTests.scala
@@ -2,6 +2,8 @@ package com.github.plume.oss.drivers
 
 import com.github.plume.oss.testfixtures.PlumeDriverFixture
 import com.github.plume.oss.testfixtures.PlumeDriverFixture.{b1, m1}
+import com.github.plume.oss.util.DataFlowCacheConfig
+import io.joern.dataflowengineoss.semanticsloader.Parser
 import io.shiftleft.codepropertygraph.generated.{Cpg, EdgeTypes}
 import io.shiftleft.passes.IntervalKeyPool
 import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph
@@ -22,27 +24,20 @@ class OverflowDbTests extends PlumeDriverFixture(new OverflowDbDriver()) {
   Files.write(methodSemanticsPath, "\"Foo.bar\" 1->-1\n".getBytes(StandardCharsets.UTF_8))
 
   "should allow for custom method semantics to be defined" in {
-    driver match {
-      case x: OverflowDbDriver =>
-        x.setDataflowContext(
-          2,
-          Some(Source.fromInputStream(Files.newInputStream(methodSemanticsPath)))
-        )
-    }
+    val parser = new Parser()
+    val rawSemantics = Source
+      .fromInputStream(Files.newInputStream(methodSemanticsPath))
+      .getLines()
+      .mkString("\n")
+    val config = DataFlowCacheConfig(methodSemantics = Some(parser.parse(rawSemantics)))
+    new OverflowDbDriver(cacheConfig = config).close()
   }
 
   "should handle the case where no default semantics can be retrieved" in {
-    val field: Field = driver.getClass.getDeclaredField("defaultSemantics")
+    val field: Field = driver.getClass.getDeclaredField("defaultSemanticsFile")
     field.setAccessible(true)
-    field.set(driver, Try.apply(throw new Exception("Foo")))
-
-    driver match {
-      case x: OverflowDbDriver =>
-        x.setDataflowContext(
-          2,
-          Some(Source.fromInputStream(Files.newInputStream(methodSemanticsPath)))
-        )
-    }
+    field.set(driver, null)
+    new OverflowDbDriver().close()
   }
 
   "should be able to serialize and deserialize XML graphs without throwing an exception" in {

diff --git a/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala b/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala
@@ -3,7 +3,7 @@ package com.github.plume.oss.testfixtures
 import com.github.plume.oss.{Jimple2Cpg, PlumeStatistics}
 import com.github.plume.oss.drivers.OverflowDbDriver
 import com.github.plume.oss.JavaCompiler.compileJava
-import com.github.plume.oss.domain.DataFlowCacheConfig
+import com.github.plume.oss.util.DataFlowCacheConfig
 import io.joern.x2cpg.testfixtures.{CodeToCpgFixture, LanguageFrontend}
 import io.shiftleft.codepropertygraph.Cpg
 import org.slf4j.LoggerFactory