Skip to content

Commit

Permalink
🔖 Release v1.2.3
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBakerEffendi committed Apr 5, 2022
1 parent cad90ee commit 19685b5
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 116 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [1.2.3] - 2022-04-05

### Changed

- Added ability to disable cache sharing on `flowsBetween`.
- Simplified `EngineContext` and `Semantics` on `OverflowDbDriver` startup.
- `OverflowDbDriver::methodSemantics` now public.

## [1.2.2] - 2022-04-05

### Changed

- Upgraded Joern version to include configurations that disable cache sharing.

## [1.2.1] - 2022-04-01

### Changed
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name := "Plume"
inThisBuild(
List(
organization := "com.github.plume-oss",
version := "1.2.2",
version := "1.2.3",
scalaVersion := "2.13.8",
crossScalaVersions := Seq("2.13.8", "3.1.1"),
resolvers ++= Seq(
Expand Down
18 changes: 6 additions & 12 deletions src/main/scala/com/github/plume/oss/Plume.scala
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
package com.github.plume.oss

import better.files.File
import com.github.plume.oss.drivers.{
IDriver,
Neo4jDriver,
NeptuneDriver,
OverflowDbDriver,
TigerGraphDriver,
TinkerGraphDriver
}
import com.github.plume.oss.drivers._
import com.github.plume.oss.util.DataFlowCacheConfig
import io.circe.Json
import io.joern.x2cpg.{X2Cpg, X2CpgConfig}
import scopt.OParser
Expand Down Expand Up @@ -60,14 +54,14 @@ object Plume extends App {
private def createDriver(conf: DriverConfig): IDriver = {
conf match {
case _ if conf.database == "OverflowDB" =>
val d = new OverflowDbDriver(
new OverflowDbDriver(
storageLocation = Option(conf.params.getOrElse("storageLocation", "cpg.odb")),
heapPercentageThreshold = conf.params.getOrElse("heapPercentageThreshold", "80").toInt,
serializationStatsEnabled =
conf.params.getOrElse("serializationStatsEnabled", "false").toBoolean
conf.params.getOrElse("serializationStatsEnabled", "false").toBoolean,
cacheConfig =
DataFlowCacheConfig(maxCallDepth = conf.params.getOrElse("maxCallDepth", "2").toInt)
)
d.setDataflowContext(conf.params.getOrElse("maxCallDepth", "2").toInt)
d
case _ if conf.database == "TinkerGraph" => new TinkerGraphDriver()
case _ if conf.database == "Neo4j" =>
new Neo4jDriver(
Expand Down
8 changes: 0 additions & 8 deletions src/main/scala/com/github/plume/oss/domain/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,6 @@ package object domain {
.addModule(DefaultScalaModule)
.build()

case class DataFlowCacheConfig(
dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")),
compressDataFlowCache: Boolean = true,
maxCallDepth: Int = 2,
maxCachedPaths: Int = 1_000,
shareCacheBetweenTasks: Boolean = false
)

/** Given an object and a path, will serialize the object to the given path.
* @param o object to serialize.
* @param p path to write serialized data to.
Expand Down
115 changes: 38 additions & 77 deletions src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import com.github.plume.oss.domain._
import com.github.plume.oss.drivers.OverflowDbDriver.newOverflowGraph
import com.github.plume.oss.passes.callgraph.PlumeDynamicCallLinker
import com.github.plume.oss.util.BatchedUpdateUtil._
import com.github.plume.oss.util.DataFlowCacheConfig
import com.github.plume.oss.util.DataFlowEngineUtil.setDataflowContext
import io.joern.dataflowengineoss.language.toExtendedCfgNode
import io.joern.dataflowengineoss.queryengine._
import io.joern.dataflowengineoss.semanticsloader.{Parser, Semantics}
Expand Down Expand Up @@ -58,10 +60,24 @@ final case class OverflowDbDriver(
val cpg: Cpg =
PlumeStatistics.time(PlumeStatistics.TIME_OPEN_DRIVER, { newOverflowGraph(odbConfig) })

private val semanticsParser = new Parser()
private val defaultSemantics: Try[BufferedSource] = Try(
Source.fromInputStream(getClass.getClassLoader.getResourceAsStream("default.semantics"))
)
private val defaultSemanticsFile = "default.semantics"
val methodSemantics: Semantics = cacheConfig.methodSemantics match {
case Some(semantics) => Semantics.fromList(semantics)
case None =>
logger.info("No specified method semantics file given. Using default semantics.")
Try(
Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(defaultSemanticsFile))
) match {
case Failure(e) =>
logger.warn(
"No 'default.semantics' file found under resources - data flow tracking will over-taint.",
e
)
Semantics.fromList(List())
case Success(input: BufferedSource) =>
Semantics.fromList(new Parser().parse(input.getLines().mkString("\n")))
}
}

/** Reads the saved cache on the disk and retrieves it as a serializable object
*/
Expand All @@ -85,16 +101,6 @@ final case class OverflowDbDriver(
{ deserializeResultTable(fetchCacheFromDisk, cpg) }
)

private implicit var context: EngineContext =
EngineContext(
Semantics.fromList(List()),
EngineConfig(
maxCallDepth = cacheConfig.maxCallDepth,
initialTable = resultTable,
shareCacheBetweenTasks = cacheConfig.shareCacheBetweenTasks
)
)

private def saveDataflowCache(): Unit = cacheConfig.dataFlowCacheFile match {
case Some(filePath) if resultTable.isDefined && resultTable.get.table.nonEmpty =>
PlumeStatistics.time(
Expand All @@ -112,59 +118,6 @@ final case class OverflowDbDriver(
case _ => // Do nothing
}

/** Sets the context for the data-flow engine when performing [[flowsBetween]] queries.
*
* @param maxCallDepth the new method call depth.
* @param methodSemantics the file containing method semantics for external methods.
* @param initialCache an initializer for the data-flow cache containing pre-calculated paths.
*/
def setDataflowContext(
maxCallDepth: Int,
methodSemantics: Option[BufferedSource] = None,
initialCache: Option[ResultTable] = None,
shareCacheBetweenTasks: Boolean = false
): EngineContext = {
val cache =
if (initialCache.isDefined) initialCache else resultTable

if (methodSemantics.isDefined) {
setDataflowContext(
maxCallDepth,
Semantics.fromList(semanticsParser.parse(methodSemantics.get.getLines().mkString("\n"))),
cache,
shareCacheBetweenTasks
)
} else if (defaultSemantics.isSuccess) {
logger.info(
"No specified method semantics file given. Using default semantics."
)
setDataflowContext(
maxCallDepth,
Semantics.fromList(semanticsParser.parse(defaultSemantics.get.getLines().mkString("\n"))),
cache,
shareCacheBetweenTasks
)
} else {
logger.warn(
"No \"default.semantics\" file found under resources - data flow tracking may not perform correctly."
)
setDataflowContext(maxCallDepth, Semantics.fromList(List()), cache, shareCacheBetweenTasks)
}
}

private def setDataflowContext(
maxCallDepth: Int,
methodSemantics: Semantics,
cache: Option[ResultTable],
shareCacheBetweenTasks: Boolean
): EngineContext = {
context = EngineContext(
methodSemantics,
EngineConfig(maxCallDepth, cache, shareCacheBetweenTasks)
)
context
}

override def isConnected: Boolean = !cpg.graph.isClosed

override def close(): Unit = PlumeStatistics.time(
Expand Down Expand Up @@ -380,19 +333,21 @@ final case class OverflowDbDriver(
* @param source the source query to match.
* @param sink the sink query to match.
* @param sanitizers a set of full method names to filter paths out with.
* @param noCacheSharing specifies if this run should not share cache results between tasks.
* @return the source nodes whose data flows to the given sinks uninterrupted.
*/
def flowsBetween(
source: Traversal[CfgNode],
sink: Traversal[CfgNode],
sanitizers: Set[String] = Set.empty[String]
sanitizers: Set[String] = Set.empty[String],
noCacheSharing: Boolean = false
): List[ReachableByResult] =
PlumeStatistics.time(
PlumeStatistics.TIME_REACHABLE_BY_QUERYING, {
import io.shiftleft.semanticcpg.language._

prepareInitialTable()
val results: List[ReachableByResult] = sink.reachableByDetailed(source)(context)
val engineContext = prepareInitialTable(noCacheSharing)
val results: List[ReachableByResult] = sink.reachableByDetailed(source)(engineContext)
captureDataflowCache(results)

results
Expand All @@ -412,16 +367,16 @@ final case class OverflowDbDriver(
}
)

private def prepareInitialTable(): Unit = {
private def prepareInitialTable(noCacheSharing: Boolean): EngineContext = {
cacheConfig.dataFlowCacheFile match {
case Some(_) =>
val oldCache = resultTable.getOrElse(new ResultTable)
if (oldCache.table.map(_._2.size).sum <= cacheConfig.maxCachedPaths) {
setDataflowContext(
context.config.maxCallDepth,
context.semantics,
cacheConfig.maxCallDepth,
methodSemantics,
Some(oldCache),
cacheConfig.shareCacheBetweenTasks
shareCacheBetweenTasks = !noCacheSharing
)
} else {
val newCache = new ResultTable
Expand All @@ -439,13 +394,19 @@ final case class OverflowDbDriver(
oldCache.table.clear()
resultTable = Some(newCache)
setDataflowContext(
context.config.maxCallDepth,
context.semantics,
cacheConfig.maxCallDepth,
methodSemantics,
resultTable,
cacheConfig.shareCacheBetweenTasks
shareCacheBetweenTasks = !noCacheSharing
)
}
case _ =>
setDataflowContext(
cacheConfig.maxCallDepth,
methodSemantics,
None,
shareCacheBetweenTasks = !noCacheSharing
)
}
}

Expand Down
40 changes: 40 additions & 0 deletions src/main/scala/com/github/plume/oss/util/DataFlowEngineUtil.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package com.github.plume.oss.util

import io.joern.dataflowengineoss.queryengine.{EngineConfig, EngineContext, ResultTable}
import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics}

import java.nio.file.{Path, Paths}

/** Helper methods for setting up the data flow engine execution context.
*/
object DataFlowEngineUtil {

/** Sets the context for the data-flow engine when performing
* [[com.github.plume.oss.drivers.OverflowDbDriver.flowsBetween()]] queries.
*
* @param maxCallDepth the new method call depth.
* @param methodSemantics the file containing method semantics for external methods.
* @param initialCache an initializer for the data-flow cache containing pre-calculated paths.
* @param shareCacheBetweenTasks enables the sharing of cache between data flow tasks.
*/
def setDataflowContext(
maxCallDepth: Int,
methodSemantics: Semantics,
initialCache: Option[ResultTable],
shareCacheBetweenTasks: Boolean
): EngineContext = {
EngineContext(
methodSemantics,
EngineConfig(maxCallDepth, initialCache, shareCacheBetweenTasks)
)
}
}

case class DataFlowCacheConfig(
methodSemantics: Option[List[FlowSemantic]] = None,
dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")),
compressDataFlowCache: Boolean = true,
maxCallDepth: Int = 2,
maxCachedPaths: Int = 1_000,
shareCacheBetweenTasks: Boolean = false
)
29 changes: 12 additions & 17 deletions src/test/scala/com/github/plume/oss/drivers/OverflowDbTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package com.github.plume.oss.drivers

import com.github.plume.oss.testfixtures.PlumeDriverFixture
import com.github.plume.oss.testfixtures.PlumeDriverFixture.{b1, m1}
import com.github.plume.oss.util.DataFlowCacheConfig
import io.joern.dataflowengineoss.semanticsloader.Parser
import io.shiftleft.codepropertygraph.generated.{Cpg, EdgeTypes}
import io.shiftleft.passes.IntervalKeyPool
import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph
Expand All @@ -22,27 +24,20 @@ class OverflowDbTests extends PlumeDriverFixture(new OverflowDbDriver()) {
Files.write(methodSemanticsPath, "\"Foo.bar\" 1->-1\n".getBytes(StandardCharsets.UTF_8))

"should allow for custom method semantics to be defined" in {
driver match {
case x: OverflowDbDriver =>
x.setDataflowContext(
2,
Some(Source.fromInputStream(Files.newInputStream(methodSemanticsPath)))
)
}
val parser = new Parser()
val rawSemantics = Source
.fromInputStream(Files.newInputStream(methodSemanticsPath))
.getLines()
.mkString("\n")
val config = DataFlowCacheConfig(methodSemantics = Some(parser.parse(rawSemantics)))
new OverflowDbDriver(cacheConfig = config).close()
}

"should handle the case where no default semantics can be retrieved" in {
val field: Field = driver.getClass.getDeclaredField("defaultSemantics")
val field: Field = driver.getClass.getDeclaredField("defaultSemanticsFile")
field.setAccessible(true)
field.set(driver, Try.apply(throw new Exception("Foo")))

driver match {
case x: OverflowDbDriver =>
x.setDataflowContext(
2,
Some(Source.fromInputStream(Files.newInputStream(methodSemanticsPath)))
)
}
field.set(driver, null)
new OverflowDbDriver().close()
}

"should be able to serialize and deserialize XML graphs without throwing an exception" in {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package com.github.plume.oss.testfixtures
import com.github.plume.oss.{Jimple2Cpg, PlumeStatistics}
import com.github.plume.oss.drivers.OverflowDbDriver
import com.github.plume.oss.JavaCompiler.compileJava
import com.github.plume.oss.domain.DataFlowCacheConfig
import com.github.plume.oss.util.DataFlowCacheConfig
import io.joern.x2cpg.testfixtures.{CodeToCpgFixture, LanguageFrontend}
import io.shiftleft.codepropertygraph.Cpg
import org.slf4j.LoggerFactory
Expand Down

0 comments on commit 19685b5

Please sign in to comment.