From a3d5ea1e928db815b5343660b345a061d80a2a98 Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Fri, 1 Apr 2022 14:56:12 +0200 Subject: [PATCH] :bookmark: Release v1.2.1 --- CHANGELOG.md | 6 ++ build.sbt | 2 +- .../plume/oss/drivers/OverflowDbDriver.scala | 79 ++++++++++--------- .../com/github/plume/oss/DiffTests.scala | 4 +- .../plume/oss/querying/DataFlowTests.scala | 10 +-- 5 files changed, 54 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fddcfb4e..574d895f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [1.2.1] - 2022-04-01 + +### Changed + +- `OverflowDbDriver::flowsBetween` performance improvement on initial cache preparation. + ## [1.2.0] - 2022-03-31 ### Changed diff --git a/build.sbt b/build.sbt index c447d2d7..671a8efc 100644 --- a/build.sbt +++ b/build.sbt @@ -3,7 +3,7 @@ name := "Plume" inThisBuild( List( organization := "com.github.plume-oss", - version := "1.2.0", + version := "1.2.1", scalaVersion := "2.13.8", crossScalaVersions := Seq("2.13.8", "3.1.1"), resolvers ++= Seq( diff --git a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala index 74a6141a..cb7a023a 100644 --- a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala +++ b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala @@ -25,7 +25,7 @@ import java.util.concurrent.ConcurrentHashMap import scala.collection.mutable import scala.io.{BufferedSource, Source} import scala.jdk.CollectionConverters.{IteratorHasAsScala, MapHasAsScala} -import scala.util.{Failure, Success, Try, Using} +import scala.util._ /** Driver to create an OverflowDB database file. * @param storageLocation where the database will serialize to and deserialize from. @@ -378,42 +378,18 @@ final case class OverflowDbDriver( * @return the source nodes whose data flows to the given sinks uninterrupted. */ def flowsBetween( - source: () => Traversal[CfgNode], - sink: () => Traversal[CfgNode], + source: Traversal[CfgNode], + sink: Traversal[CfgNode], sanitizers: Set[String] = Set.empty[String] ): List[ReachableByResult] = PlumeStatistics.time( PlumeStatistics.TIME_REACHABLE_BY_QUERYING, { import io.shiftleft.semanticcpg.language._ - // Strip the cache of only nodes that will be used the most in this query to get fast starts/finishes - cacheConfig.dataFlowCacheFile match { - case Some(_) => - val newCache = new ResultTable - val oldCache = resultTable.getOrElse(new ResultTable) - var currPathsInCache = 0 - scala.util.Random - .shuffle(source().l ++ sink().l) - .flatMap { x => - oldCache.get(x) match { - case Some(paths) => Some((x, paths)) - case None => None - } - } - .foreach { case (startOrEndNode, paths) => - if (currPathsInCache + paths.size <= cacheConfig.maxCachedPaths) { - currPathsInCache += paths.size - newCache.add(startOrEndNode, paths) - } - } - oldCache.table.clear() - resultTable = Some(newCache) - setDataflowContext(context.config.maxCallDepth, context.semantics, resultTable) - case _ => - } - val results: List[ReachableByResult] = sink() - .reachableByDetailed(source())(context) + prepareInitialTable() + val results: List[ReachableByResult] = sink.reachableByDetailed(source)(context) captureDataflowCache(results) + results // Remove a source/sink arguments referring to itself .filter(x => x.path.head.node.astParent != x.path.last.node.astParent) @@ -431,12 +407,40 @@ final case class OverflowDbDriver( } ) + private def prepareInitialTable(): Unit = { + cacheConfig.dataFlowCacheFile match { + case Some(_) => + val oldCache = resultTable.getOrElse(new ResultTable) + if (oldCache.table.map(_._2.size).sum <= cacheConfig.maxCachedPaths) { + setDataflowContext(context.config.maxCallDepth, context.semantics, Some(oldCache)) + } else { + val newCache = new ResultTable + var currPathsInCache = 0 + // let the gods decide which entries will go through the maxCachedPaths limit + Random + .shuffle(oldCache.table.iterator) + .takeWhile { case (_, paths) => + currPathsInCache + paths.size <= cacheConfig.maxCachedPaths + } + .foreach { case (startOrEndNode, paths) => + currPathsInCache += paths.size + newCache.add(startOrEndNode, paths) + } + oldCache.table.clear() + resultTable = Some(newCache) + setDataflowContext(context.config.maxCallDepth, context.semantics, resultTable) + } + case _ => + } + } + private def captureDataflowCache(results: List[ReachableByResult]): Unit = { cacheConfig.dataFlowCacheFile match { case Some(_) => // Capture latest results resultTable = (results - .map(_.table) ++ List(resultTable).flatten).distinct + .map(_.table) + .distinct ++ List(resultTable).flatten) .reduceOption((a: ResultTable, b: ResultTable) => { b.table.foreach { case (k, v) => a.add(k, v) } a @@ -467,9 +471,11 @@ final case class OverflowDbDriver( val newTab = oldTab.table .filter { case (k: StoredNode, _) => isNodeUnderTypes(k, unchangedTypes) } - .map { case (k: StoredNode, v: Vector[ReachableByResult]) => - val filteredPaths = v.filterNot(isResultExpired) - (k, filteredPaths) + .flatMap { case (k: StoredNode, v: Vector[ReachableByResult]) => + v.collectFirst { case v: ReachableByResult if isResultExpired(v) => v } match { + case Some(_) => None // discard entry + case None => Some((k, v)) + } } .toMap // Refresh old table and add new entries @@ -482,11 +488,6 @@ final case class OverflowDbDriver( s"Able to re-use ${(leftOverPSize.toDouble / startPSize) * 100.0}% of the saved paths. " + s"Removed ${startPSize - leftOverPSize} expired paths from $startPSize saved paths." ) - setDataflowContext( - context.config.maxCallDepth, - context.semantics, - Some(oldTab) - ) } ) case None => // Do nothing diff --git a/src/test/scala/com/github/plume/oss/DiffTests.scala b/src/test/scala/com/github/plume/oss/DiffTests.scala index e322cbc3..77e98bd6 100644 --- a/src/test/scala/com/github/plume/oss/DiffTests.scala +++ b/src/test/scala/com/github/plume/oss/DiffTests.scala @@ -102,7 +102,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { val sinkNodesId1 = driver.cpg.call(Operators.addition).id.l val r1 = driver - .flowsBetween( () => driver.cpg.parameter("a"), () => driver.cpg.call(Operators.addition)) + .flowsBetween(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition)) .map(_.path.map(_.node.id())) val cH1 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS) val cM1 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES) @@ -119,7 +119,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { val sinkNodesId2 = driver.cpg.call(Operators.addition).id.l val r2 = driver - .flowsBetween(() => driver.cpg.parameter("a"), () => driver.cpg.call(Operators.addition)) + .flowsBetween(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition)) .map(_.path.map(_.node.id())) val cH2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS) val cM2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES) diff --git a/src/test/scala/com/github/plume/oss/querying/DataFlowTests.scala b/src/test/scala/com/github/plume/oss/querying/DataFlowTests.scala index 92e9e79c..43f4a312 100644 --- a/src/test/scala/com/github/plume/oss/querying/DataFlowTests.scala +++ b/src/test/scala/com/github/plume/oss/querying/DataFlowTests.scala @@ -47,7 +47,7 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) { val cpg = CPG(driver.cpg.graph) val r = driver - .flowsBetween(() => cpg.parameter("a"), () => cpg.call(".*")) + .flowsBetween(cpg.parameter("a"), cpg.call(".*")) val List(v1) = r.map(r => r.path.map(x => (x.node.method.name, x.node.code))) v1.head shouldBe ("foo", "int a") @@ -58,7 +58,7 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) { val cpg = CPG(driver.cpg.graph) val r = driver - .flowsBetween(() => cpg.parameter("a"), () => cpg.call("bar")) + .flowsBetween(cpg.parameter("a"), cpg.call("bar")) val List(v1) = r.map(r => r.path.map(x => (x.node.method.name, x.node.code))) v1.head shouldBe ("foo", "int a") @@ -69,7 +69,7 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) { val cpg = CPG(driver.cpg.graph) val r = driver - .flowsBetween(() => cpg.parameter("a"), () => cpg.call("println")) + .flowsBetween(cpg.parameter("a"), cpg.call("println")) r.map(r => r.path.map(x => (x.node.method.name, x.node.code))).foreach(println) @@ -89,11 +89,11 @@ class DataFlowTests extends Jimple2CpgFixture(Some(new OverflowDbDriver())) { def source = cpg.call("taint").argument def sink = cpg.call("baz") - val r1 = driver.flowsBetween(() => source, () => sink) + val r1 = driver.flowsBetween(source, sink) r1.map(r => r.path.map(x => (x.node.method.name, x.node.code))).foreach(println) r1.size shouldBe 1 - val r2 = driver.flowsBetween(() => source, () => sink, Set("Foo.falseClean:int(int)")) + val r2 = driver.flowsBetween(source, sink, Set("Foo.falseClean:int(int)")) r2.size shouldBe 0 }