Skip to content

Commit

Permalink
🔖 Release v1.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBakerEffendi committed Apr 1, 2022
1 parent e0398c5 commit a3d5ea1
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 47 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [1.2.1] - 2022-04-01

### Changed

- `OverflowDbDriver::flowsBetween` performance improvement on initial cache preparation.

## [1.2.0] - 2022-03-31

### Changed
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name := "Plume"
inThisBuild(
List(
organization := "com.github.plume-oss",
version := "1.2.0",
version := "1.2.1",
scalaVersion := "2.13.8",
crossScalaVersions := Seq("2.13.8", "3.1.1"),
resolvers ++= Seq(
Expand Down
79 changes: 40 additions & 39 deletions src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import java.util.concurrent.ConcurrentHashMap
import scala.collection.mutable
import scala.io.{BufferedSource, Source}
import scala.jdk.CollectionConverters.{IteratorHasAsScala, MapHasAsScala}
import scala.util.{Failure, Success, Try, Using}
import scala.util._

/** Driver to create an OverflowDB database file.
* @param storageLocation where the database will serialize to and deserialize from.
Expand Down Expand Up @@ -378,42 +378,18 @@ final case class OverflowDbDriver(
* @return the source nodes whose data flows to the given sinks uninterrupted.
*/
def flowsBetween(
source: () => Traversal[CfgNode],
sink: () => Traversal[CfgNode],
source: Traversal[CfgNode],
sink: Traversal[CfgNode],
sanitizers: Set[String] = Set.empty[String]
): List[ReachableByResult] =
PlumeStatistics.time(
PlumeStatistics.TIME_REACHABLE_BY_QUERYING, {
import io.shiftleft.semanticcpg.language._
// Strip the cache of only nodes that will be used the most in this query to get fast starts/finishes
cacheConfig.dataFlowCacheFile match {
case Some(_) =>
val newCache = new ResultTable
val oldCache = resultTable.getOrElse(new ResultTable)
var currPathsInCache = 0
scala.util.Random
.shuffle(source().l ++ sink().l)
.flatMap { x =>
oldCache.get(x) match {
case Some(paths) => Some((x, paths))
case None => None
}
}
.foreach { case (startOrEndNode, paths) =>
if (currPathsInCache + paths.size <= cacheConfig.maxCachedPaths) {
currPathsInCache += paths.size
newCache.add(startOrEndNode, paths)
}
}
oldCache.table.clear()
resultTable = Some(newCache)
setDataflowContext(context.config.maxCallDepth, context.semantics, resultTable)
case _ =>
}

val results: List[ReachableByResult] = sink()
.reachableByDetailed(source())(context)
prepareInitialTable()
val results: List[ReachableByResult] = sink.reachableByDetailed(source)(context)
captureDataflowCache(results)

results
// Remove a source/sink arguments referring to itself
.filter(x => x.path.head.node.astParent != x.path.last.node.astParent)
Expand All @@ -431,12 +407,40 @@ final case class OverflowDbDriver(
}
)

private def prepareInitialTable(): Unit = {
cacheConfig.dataFlowCacheFile match {
case Some(_) =>
val oldCache = resultTable.getOrElse(new ResultTable)
if (oldCache.table.map(_._2.size).sum <= cacheConfig.maxCachedPaths) {
setDataflowContext(context.config.maxCallDepth, context.semantics, Some(oldCache))
} else {
val newCache = new ResultTable
var currPathsInCache = 0
// let the gods decide which entries will go through the maxCachedPaths limit
Random
.shuffle(oldCache.table.iterator)
.takeWhile { case (_, paths) =>
currPathsInCache + paths.size <= cacheConfig.maxCachedPaths
}
.foreach { case (startOrEndNode, paths) =>
currPathsInCache += paths.size
newCache.add(startOrEndNode, paths)
}
oldCache.table.clear()
resultTable = Some(newCache)
setDataflowContext(context.config.maxCallDepth, context.semantics, resultTable)
}
case _ =>
}
}

private def captureDataflowCache(results: List[ReachableByResult]): Unit = {
cacheConfig.dataFlowCacheFile match {
case Some(_) =>
// Capture latest results
resultTable = (results
.map(_.table) ++ List(resultTable).flatten).distinct
.map(_.table)
.distinct ++ List(resultTable).flatten)
.reduceOption((a: ResultTable, b: ResultTable) => {
b.table.foreach { case (k, v) => a.add(k, v) }
a
Expand Down Expand Up @@ -467,9 +471,11 @@ final case class OverflowDbDriver(

val newTab = oldTab.table
.filter { case (k: StoredNode, _) => isNodeUnderTypes(k, unchangedTypes) }
.map { case (k: StoredNode, v: Vector[ReachableByResult]) =>
val filteredPaths = v.filterNot(isResultExpired)
(k, filteredPaths)
.flatMap { case (k: StoredNode, v: Vector[ReachableByResult]) =>
v.collectFirst { case v: ReachableByResult if isResultExpired(v) => v } match {
case Some(_) => None // discard entry
case None => Some((k, v))
}
}
.toMap
// Refresh old table and add new entries
Expand All @@ -482,11 +488,6 @@ final case class OverflowDbDriver(
s"Able to re-use ${(leftOverPSize.toDouble / startPSize) * 100.0}% of the saved paths. " +
s"Removed ${startPSize - leftOverPSize} expired paths from $startPSize saved paths."
)
setDataflowContext(
context.config.maxCallDepth,
context.semantics,
Some(oldTab)
)
}
)
case None => // Do nothing
Expand Down
4 changes: 2 additions & 2 deletions src/test/scala/com/github/plume/oss/DiffTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
val sinkNodesId1 = driver.cpg.call(Operators.addition).id.l

val r1 = driver
.flowsBetween( () => driver.cpg.parameter("a"), () => driver.cpg.call(Operators.addition))
.flowsBetween(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition))
.map(_.path.map(_.node.id()))
val cH1 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS)
val cM1 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES)
Expand All @@ -119,7 +119,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
val sinkNodesId2 = driver.cpg.call(Operators.addition).id.l

val r2 = driver
.flowsBetween(() => driver.cpg.parameter("a"), () => driver.cpg.call(Operators.addition))
.flowsBetween(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition))
.map(_.path.map(_.node.id()))
val cH2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS)
val cM2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES)
Expand Down
10 changes: 5 additions & 5 deletions src/test/scala/com/github/plume/oss/querying/DataFlowTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) {
val cpg = CPG(driver.cpg.graph)

val r = driver
.flowsBetween(() => cpg.parameter("a"), () => cpg.call("<operator>.*"))
.flowsBetween(cpg.parameter("a"), cpg.call("<operator>.*"))
val List(v1) = r.map(r => r.path.map(x => (x.node.method.name, x.node.code)))

v1.head shouldBe ("foo", "int a")
Expand All @@ -58,7 +58,7 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) {
val cpg = CPG(driver.cpg.graph)

val r = driver
.flowsBetween(() => cpg.parameter("a"), () => cpg.call("bar"))
.flowsBetween(cpg.parameter("a"), cpg.call("bar"))
val List(v1) = r.map(r => r.path.map(x => (x.node.method.name, x.node.code)))

v1.head shouldBe ("foo", "int a")
Expand All @@ -69,7 +69,7 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) {
val cpg = CPG(driver.cpg.graph)

val r = driver
.flowsBetween(() => cpg.parameter("a"), () => cpg.call("println"))
.flowsBetween(cpg.parameter("a"), cpg.call("println"))

r.map(r => r.path.map(x => (x.node.method.name, x.node.code))).foreach(println)

Expand All @@ -89,11 +89,11 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) {
def source = cpg.call("taint").argument
def sink = cpg.call("baz")

val r1 = driver.flowsBetween(() => source, () => sink)
val r1 = driver.flowsBetween(source, sink)
r1.map(r => r.path.map(x => (x.node.method.name, x.node.code))).foreach(println)
r1.size shouldBe 1

val r2 = driver.flowsBetween(() => source, () => sink, Set("Foo.falseClean:int(int)"))
val r2 = driver.flowsBetween(source, sink, Set("Foo.falseClean:int(int)"))
r2.size shouldBe 0
}

Expand Down

0 comments on commit a3d5ea1

Please sign in to comment.