🔖 Release v1.1.12

plume-oss · Mar 24, 2022 · a2b466e · a2b466e
1 parent 856a55b
commit a2b466e
Show file tree

Hide file tree

Showing 7 changed files with 90 additions and 79 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
+## [1.1.12] - 2022-03-24
+
+### Fixed
+
+- Do not write a data-flow cache file if no results were saved.
+- Data flow engine context regenerated with latest cache after each query.
+
 ## [1.1.11] - 2022-03-24
 
 ### Fixed

diff --git a/build.sbt b/build.sbt
@@ -3,7 +3,7 @@ name := "Plume"
 inThisBuild(
   List(
     organization := "com.github.plume-oss",
-    version := "1.1.11",
+    version := "1.1.12",
     scalaVersion := "2.13.8",
     crossScalaVersions := Seq("2.13.8", "3.1.1"),
     resolvers ++= Seq(
@@ -14,8 +14,8 @@ inThisBuild(
   )
 )
 
-val cpgVersion         = "1.3.519"
-val joernVersion       = "1.1.643"
+val cpgVersion         = "1.3.521"
+val joernVersion       = "1.1.653"
 val sootVersion        = "4.3.0"
 val tinkerGraphVersion = "3.4.11"
 val neo4jVersion       = "4.4.3"

diff --git a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala
@@ -88,8 +88,9 @@ final case class OverflowDbDriver(
     )
 
   private def saveDataflowCache(): Unit = dataFlowCacheFile match {
-    case Some(filePath) if table.isDefined => compressToFile(table.get, filePath)
-    case _                                 => // Do nothing
+    case Some(filePath) if table.isDefined && !table.get.isEmpty =>
+      compressToFile(table.get, filePath)
+    case _ => // Do nothing
   }
 
   /** Sets the context for the data-flow engine when performing [[nodesReachableBy]] queries.
@@ -360,7 +361,7 @@ final case class OverflowDbDriver(
   ): List[ReachableByResult] =
     PlumeStatistics.time(
       PlumeStatistics.TIME_REACHABLE_BY_QUERYING, {
-        val results: List[ReachableByResult] = sink.reachableByDetailed(source)
+        val results: List[ReachableByResult] = sink.reachableByDetailed(source)(context)
         // TODO: Right now the results are saved in a serializable format ready for a binary blob. We should look into
         // storing these reliably on the graph.
         captureResultsBlob(results)
@@ -391,6 +392,10 @@ final case class OverflowDbDriver(
           }
       case None => // Do nothing
     }
+    // Reload latest results to the query engine context
+    results.map(_.table).collectFirst { resultTable =>
+      setDataflowContext(context.config.maxCallDepth, context.semantics, Some(resultTable))
+    }
   }
 
   /** Will traverse the deserialized cache and remove nodes belonging to types not in the given set of unchanged types.

diff --git a/src/test/resources/diff/Foo1.java b/src/test/resources/diff/Foo1.java
@@ -2,10 +2,9 @@
 
 class Foo {
 
-    public static void foo() {
-        var x = 1;
-        var y = 2;
-        Bar.bar(x, y);
+    public static void foo(int a) {
+        var b = 1;
+        Bar.bar(a, b);
     }
 
 }
diff --git a/src/test/resources/diff/Foo2.java b/src/test/resources/diff/Foo2.java
@@ -2,10 +2,9 @@
 
 class Foo {
 
-    public static void foo() {
-        var x = 3;
-        var y = 5;
-        Bar.bar(x, y);
+    public static void foo(int a) {
+        var b = 3;
+        Bar.bar(a, b);
     }
 
 }
diff --git a/src/test/scala/com/github/plume/oss/DiffTests.scala b/src/test/scala/com/github/plume/oss/DiffTests.scala
@@ -1,12 +1,14 @@
 package com.github.plume.oss
 
 import com.github.plume.oss.drivers.OverflowDbDriver
-import io.shiftleft.codepropertygraph.generated.{Cpg, NodeTypes, PropertyNames}
+import io.joern.dataflowengineoss.queryengine.QueryEngineStatistics
+import io.shiftleft.codepropertygraph.generated.{Cpg, NodeTypes, Operators, PropertyNames}
 import io.shiftleft.codepropertygraph.{Cpg => CPG}
 import io.shiftleft.semanticcpg.language._
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.matchers.should.Matchers
 import org.scalatest.wordspec.AnyWordSpec
+import org.slf4j.LoggerFactory
 
 import java.io.{File, FileInputStream, FileOutputStream}
 import java.nio.file.{Files, Paths}
@@ -16,6 +18,8 @@ import scala.util.Using
   */
 class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
 
+  private val logger = LoggerFactory.getLogger(classOf[DiffTests])
+
   private def getTestResource(dir: String): File = {
     val resourceURL = getClass.getResource(dir)
     if (resourceURL == null) throw new NullPointerException("Unable to obtain test resource")
@@ -43,7 +47,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
   private val bar              = new File(s"${sandboxDir.getAbsolutePath}${File.separator}Bar.java")
   private var driver           = new OverflowDbDriver()
   private var cpg: Option[Cpg] = None
-  private val storage = Some("./cpg_test.odb")
+  private val storage          = Some("./cpg_test.odb")
 
   override def beforeAll(): Unit = {
     rewriteFileContents(foo, foo1)
@@ -58,6 +62,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
 
   override def afterAll(): Unit = {
     driver.clear()
+    driver.close()
     Paths.get(storage.get).toFile.delete()
   }
 
@@ -66,8 +71,10 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
     driver.propertyFromNodes(NodeTypes.METHOD).size shouldBe 7
     driver.propertyFromNodes(NodeTypes.TYPE, PropertyNames.FULL_NAME).size shouldBe 7
     driver.propertyFromNodes(NodeTypes.NAMESPACE_BLOCK, PropertyNames.FULL_NAME).size shouldBe 3
-    driver.propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.FULL_NAME).size shouldBe 9
-    driver.propertyFromNodes(NodeTypes.LOCAL, PropertyNames.FULL_NAME).size shouldBe 5
+    driver
+      .propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.FULL_NAME)
+      .size shouldBe 10
+    driver.propertyFromNodes(NodeTypes.LOCAL, PropertyNames.FULL_NAME).size shouldBe 4
 
     val List(barM, fooM) = driver
       .propertyFromNodes(NodeTypes.TYPE_DECL, PropertyNames.FULL_NAME, PropertyNames.IS_EXTERNAL)
@@ -77,13 +84,51 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
       }
     fooM.get(PropertyNames.FULL_NAME) shouldBe Some("diff.Foo")
     barM.get(PropertyNames.FULL_NAME) shouldBe Some("diff.Bar")
-    val List(lx, ly) = driver
+    val List(lb) = driver
       .propertyFromNodes(NodeTypes.LITERAL, PropertyNames.CODE)
-      .sortWith { case (x, y) =>
-        x(PropertyNames.CODE).toString < y(PropertyNames.CODE).toString
-      }
-    lx.get(PropertyNames.CODE) shouldBe Some("1")
-    ly.get(PropertyNames.CODE) shouldBe Some("2")
+    lb.get(PropertyNames.CODE) shouldBe Some("1")
+    val paramA = driver
+      .propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.CODE)
+      .filter(_(PropertyNames.CODE) == "int a")
+    paramA.size shouldBe 1
+  }
+
+  "should have more cache misses than hits on the first data-flow query than the second" in {
+    val sourceNodesId1 = driver.cpg.parameter("a").id.l
+    val sinkNodesId1   = driver.cpg.call(Operators.addition).id.l
+
+    val r1 = driver
+      .nodesReachableBy(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition))
+      .map(_.path.map(_.node.id()))
+    val cH1       = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS)
+    val cM1       = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES)
+    val hitRatio1 = cH1.toDouble / (cH1 + cM1) * 100
+    logger.info(s"Cache hit ratio $hitRatio1%")
+    cH1 should be <= cM1
+    QueryEngineStatistics.reset()
+
+    logger.info("Closing driver")
+    driver.close()
+    driver = new OverflowDbDriver(storage)
+
+    val sourceNodesId2 = driver.cpg.parameter("a").id.l
+    val sinkNodesId2   = driver.cpg.call(Operators.addition).id.l
+
+    val r2 = driver
+      .nodesReachableBy(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition))
+      .map(_.path.map(_.node.id()))
+    val cH2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS)
+    val cM2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES)
+
+    // Assert ODB deserialized the graph correctly
+    sourceNodesId1 shouldBe sourceNodesId2
+    sinkNodesId1 shouldBe sinkNodesId2
+    // The same path should be picked up
+    r1 shouldBe r2
+    // The cache should have a higher number of hits now from re-using the first query
+    val hitRatio2 = cH2.toDouble / (cH2 + cM2) * 100
+    logger.info(s"Cache hit ratio $hitRatio2%")
+    hitRatio2 should be >= hitRatio1
   }
 
   "should update Foo on file changes" in {
@@ -97,8 +142,10 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
     driver.propertyFromNodes(NodeTypes.METHOD).size shouldBe 7
     driver.propertyFromNodes(NodeTypes.TYPE, PropertyNames.FULL_NAME).size shouldBe 7
     driver.propertyFromNodes(NodeTypes.NAMESPACE_BLOCK, PropertyNames.FULL_NAME).size shouldBe 3
-    driver.propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.FULL_NAME).size shouldBe 9
-    driver.propertyFromNodes(NodeTypes.LOCAL, PropertyNames.FULL_NAME).size shouldBe 5
+    driver
+      .propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.FULL_NAME)
+      .size shouldBe 10
+    driver.propertyFromNodes(NodeTypes.LOCAL, PropertyNames.FULL_NAME).size shouldBe 4
 
     val List(barM, fooM) = driver
       .propertyFromNodes(NodeTypes.TYPE_DECL, PropertyNames.FULL_NAME, PropertyNames.IS_EXTERNAL)
@@ -108,59 +155,13 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll {
       }
     fooM.get(PropertyNames.FULL_NAME) shouldBe Some("diff.Foo")
     barM.get(PropertyNames.FULL_NAME) shouldBe Some("diff.Bar")
-    val List(lx, ly) = driver
+    val List(lb) = driver
       .propertyFromNodes(NodeTypes.LITERAL, PropertyNames.CODE)
-      .sortWith { case (x, y) =>
-        x(PropertyNames.CODE).toString < y(PropertyNames.CODE).toString
-      }
-    lx.get(PropertyNames.CODE) shouldBe Some("3")
-    ly.get(PropertyNames.CODE) shouldBe Some("5")
-  }
-
-  "should succeed to re-use some data-flow results and take shorter time than the first" in {
-    var cpg = CPG(driver.cpg.graph)
-
-    val t1 = System.nanoTime
-    driver.nodesReachableBy(cpg.identifier("x"), cpg.call("bar"))
-    val d1 = System.nanoTime - t1
-
-    rewriteFileContents(bar, bar1)
-    rewriteFileContents(foo, foo2)
-    JavaCompiler.compileJava(foo, bar)
-
-    driver.close()
-    driver = new OverflowDbDriver(storage)
-    cpg = CPG(new Jimple2Cpg().createCpg(sandboxDir.getAbsolutePath, driver = driver).graph)
-
-    val t2 = System.nanoTime
-    driver.nodesReachableBy(cpg.identifier("x"), cpg.call("bar"))
-    val d2 = System.nanoTime - t2
-    val speedup = 100.0 - d2 / (d1 + 0.0)
-
-    d1 should be >= d2
-    speedup should be < 99.999
-    speedup should be >= 99.700
-  }
-
-  "should succeed to re-use all data-flow results and take shorter time than the first" in {
-    var cpg = CPG(driver.cpg.graph)
-
-    val t1 = System.nanoTime
-    driver.nodesReachableBy(cpg.identifier("x"), cpg.call("bar"))
-    val d1 = System.nanoTime - t1
-
-    driver.close()
-    driver = new OverflowDbDriver(storage)
-    cpg = CPG(new Jimple2Cpg().createCpg(sandboxDir.getAbsolutePath, driver = driver).graph)
-
-    val t2 = System.nanoTime
-    driver.nodesReachableBy(cpg.identifier("x"), cpg.call("bar"))
-    val d2 = System.nanoTime - t2
-    val speedup = 100.0 - d2 / (d1 + 0.0)
-
-    d1 should be >= d2
-    speedup should be < 99.999
-    speedup should be >= 99.800
+    lb.get(PropertyNames.CODE) shouldBe Some("3")
+    val paramA = driver
+      .propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.CODE)
+      .filter(_(PropertyNames.CODE) == "int a")
+    paramA.size shouldBe 1
   }
 
 }
diff --git a/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala b/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala
@@ -33,7 +33,7 @@ class Jimple2CpgFixture(_driver: Option[OverflowDbDriver] = None)
 
   val driver: OverflowDbDriver = frontend.asInstanceOf[PlumeFrontend].driver
 
-  override def createEnhancements(cpg: Cpg): Unit = {}
+  override def passes(cpg: Cpg): Unit = {}
 
   override def writeCodeToFile(sourceCode: String): File = {
     val tmpDir = Files.createTempDirectory("semanticcpgtest").toFile