diff --git a/CHANGELOG.md b/CHANGELOG.md index b404788b..19083c37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [1.1.12] - 2022-03-24 + +### Fixed + +- Do not write a data-flow cache file if no results were saved. +- Data flow engine context regenerated with latest cache after each query. + ## [1.1.11] - 2022-03-24 ### Fixed diff --git a/build.sbt b/build.sbt index 10809b58..d4296dc7 100644 --- a/build.sbt +++ b/build.sbt @@ -3,7 +3,7 @@ name := "Plume" inThisBuild( List( organization := "com.github.plume-oss", - version := "1.1.11", + version := "1.1.12", scalaVersion := "2.13.8", crossScalaVersions := Seq("2.13.8", "3.1.1"), resolvers ++= Seq( @@ -14,8 +14,8 @@ inThisBuild( ) ) -val cpgVersion = "1.3.519" -val joernVersion = "1.1.643" +val cpgVersion = "1.3.521" +val joernVersion = "1.1.653" val sootVersion = "4.3.0" val tinkerGraphVersion = "3.4.11" val neo4jVersion = "4.4.3" diff --git a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala index 73479baa..d768e864 100644 --- a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala +++ b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala @@ -88,8 +88,9 @@ final case class OverflowDbDriver( ) private def saveDataflowCache(): Unit = dataFlowCacheFile match { - case Some(filePath) if table.isDefined => compressToFile(table.get, filePath) - case _ => // Do nothing + case Some(filePath) if table.isDefined && !table.get.isEmpty => + compressToFile(table.get, filePath) + case _ => // Do nothing } /** Sets the context for the data-flow engine when performing [[nodesReachableBy]] queries. @@ -360,7 +361,7 @@ final case class OverflowDbDriver( ): List[ReachableByResult] = PlumeStatistics.time( PlumeStatistics.TIME_REACHABLE_BY_QUERYING, { - val results: List[ReachableByResult] = sink.reachableByDetailed(source) + val results: List[ReachableByResult] = sink.reachableByDetailed(source)(context) // TODO: Right now the results are saved in a serializable format ready for a binary blob. We should look into // storing these reliably on the graph. captureResultsBlob(results) @@ -391,6 +392,10 @@ final case class OverflowDbDriver( } case None => // Do nothing } + // Reload latest results to the query engine context + results.map(_.table).collectFirst { resultTable => + setDataflowContext(context.config.maxCallDepth, context.semantics, Some(resultTable)) + } } /** Will traverse the deserialized cache and remove nodes belonging to types not in the given set of unchanged types. diff --git a/src/test/resources/diff/Foo1.java b/src/test/resources/diff/Foo1.java index ccfab149..0a492de9 100644 --- a/src/test/resources/diff/Foo1.java +++ b/src/test/resources/diff/Foo1.java @@ -2,10 +2,9 @@ class Foo { - public static void foo() { - var x = 1; - var y = 2; - Bar.bar(x, y); + public static void foo(int a) { + var b = 1; + Bar.bar(a, b); } } \ No newline at end of file diff --git a/src/test/resources/diff/Foo2.java b/src/test/resources/diff/Foo2.java index 66609ef8..ec2404f3 100644 --- a/src/test/resources/diff/Foo2.java +++ b/src/test/resources/diff/Foo2.java @@ -2,10 +2,9 @@ class Foo { - public static void foo() { - var x = 3; - var y = 5; - Bar.bar(x, y); + public static void foo(int a) { + var b = 3; + Bar.bar(a, b); } } \ No newline at end of file diff --git a/src/test/scala/com/github/plume/oss/DiffTests.scala b/src/test/scala/com/github/plume/oss/DiffTests.scala index 518f83fb..b97ddba9 100644 --- a/src/test/scala/com/github/plume/oss/DiffTests.scala +++ b/src/test/scala/com/github/plume/oss/DiffTests.scala @@ -1,12 +1,14 @@ package com.github.plume.oss import com.github.plume.oss.drivers.OverflowDbDriver -import io.shiftleft.codepropertygraph.generated.{Cpg, NodeTypes, PropertyNames} +import io.joern.dataflowengineoss.queryengine.QueryEngineStatistics +import io.shiftleft.codepropertygraph.generated.{Cpg, NodeTypes, Operators, PropertyNames} import io.shiftleft.codepropertygraph.{Cpg => CPG} import io.shiftleft.semanticcpg.language._ import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec +import org.slf4j.LoggerFactory import java.io.{File, FileInputStream, FileOutputStream} import java.nio.file.{Files, Paths} @@ -16,6 +18,8 @@ import scala.util.Using */ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { + private val logger = LoggerFactory.getLogger(classOf[DiffTests]) + private def getTestResource(dir: String): File = { val resourceURL = getClass.getResource(dir) if (resourceURL == null) throw new NullPointerException("Unable to obtain test resource") @@ -43,7 +47,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { private val bar = new File(s"${sandboxDir.getAbsolutePath}${File.separator}Bar.java") private var driver = new OverflowDbDriver() private var cpg: Option[Cpg] = None - private val storage = Some("./cpg_test.odb") + private val storage = Some("./cpg_test.odb") override def beforeAll(): Unit = { rewriteFileContents(foo, foo1) @@ -58,6 +62,7 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { override def afterAll(): Unit = { driver.clear() + driver.close() Paths.get(storage.get).toFile.delete() } @@ -66,8 +71,10 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { driver.propertyFromNodes(NodeTypes.METHOD).size shouldBe 7 driver.propertyFromNodes(NodeTypes.TYPE, PropertyNames.FULL_NAME).size shouldBe 7 driver.propertyFromNodes(NodeTypes.NAMESPACE_BLOCK, PropertyNames.FULL_NAME).size shouldBe 3 - driver.propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.FULL_NAME).size shouldBe 9 - driver.propertyFromNodes(NodeTypes.LOCAL, PropertyNames.FULL_NAME).size shouldBe 5 + driver + .propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.FULL_NAME) + .size shouldBe 10 + driver.propertyFromNodes(NodeTypes.LOCAL, PropertyNames.FULL_NAME).size shouldBe 4 val List(barM, fooM) = driver .propertyFromNodes(NodeTypes.TYPE_DECL, PropertyNames.FULL_NAME, PropertyNames.IS_EXTERNAL) @@ -77,13 +84,51 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { } fooM.get(PropertyNames.FULL_NAME) shouldBe Some("diff.Foo") barM.get(PropertyNames.FULL_NAME) shouldBe Some("diff.Bar") - val List(lx, ly) = driver + val List(lb) = driver .propertyFromNodes(NodeTypes.LITERAL, PropertyNames.CODE) - .sortWith { case (x, y) => - x(PropertyNames.CODE).toString < y(PropertyNames.CODE).toString - } - lx.get(PropertyNames.CODE) shouldBe Some("1") - ly.get(PropertyNames.CODE) shouldBe Some("2") + lb.get(PropertyNames.CODE) shouldBe Some("1") + val paramA = driver + .propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.CODE) + .filter(_(PropertyNames.CODE) == "int a") + paramA.size shouldBe 1 + } + + "should have more cache misses than hits on the first data-flow query than the second" in { + val sourceNodesId1 = driver.cpg.parameter("a").id.l + val sinkNodesId1 = driver.cpg.call(Operators.addition).id.l + + val r1 = driver + .nodesReachableBy(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition)) + .map(_.path.map(_.node.id())) + val cH1 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS) + val cM1 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES) + val hitRatio1 = cH1.toDouble / (cH1 + cM1) * 100 + logger.info(s"Cache hit ratio $hitRatio1%") + cH1 should be <= cM1 + QueryEngineStatistics.reset() + + logger.info("Closing driver") + driver.close() + driver = new OverflowDbDriver(storage) + + val sourceNodesId2 = driver.cpg.parameter("a").id.l + val sinkNodesId2 = driver.cpg.call(Operators.addition).id.l + + val r2 = driver + .nodesReachableBy(driver.cpg.parameter("a"), driver.cpg.call(Operators.addition)) + .map(_.path.map(_.node.id())) + val cH2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_HITS) + val cM2 = QueryEngineStatistics.results()(QueryEngineStatistics.PATH_CACHE_MISSES) + + // Assert ODB deserialized the graph correctly + sourceNodesId1 shouldBe sourceNodesId2 + sinkNodesId1 shouldBe sinkNodesId2 + // The same path should be picked up + r1 shouldBe r2 + // The cache should have a higher number of hits now from re-using the first query + val hitRatio2 = cH2.toDouble / (cH2 + cM2) * 100 + logger.info(s"Cache hit ratio $hitRatio2%") + hitRatio2 should be >= hitRatio1 } "should update Foo on file changes" in { @@ -97,8 +142,10 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { driver.propertyFromNodes(NodeTypes.METHOD).size shouldBe 7 driver.propertyFromNodes(NodeTypes.TYPE, PropertyNames.FULL_NAME).size shouldBe 7 driver.propertyFromNodes(NodeTypes.NAMESPACE_BLOCK, PropertyNames.FULL_NAME).size shouldBe 3 - driver.propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.FULL_NAME).size shouldBe 9 - driver.propertyFromNodes(NodeTypes.LOCAL, PropertyNames.FULL_NAME).size shouldBe 5 + driver + .propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.FULL_NAME) + .size shouldBe 10 + driver.propertyFromNodes(NodeTypes.LOCAL, PropertyNames.FULL_NAME).size shouldBe 4 val List(barM, fooM) = driver .propertyFromNodes(NodeTypes.TYPE_DECL, PropertyNames.FULL_NAME, PropertyNames.IS_EXTERNAL) @@ -108,59 +155,13 @@ class DiffTests extends AnyWordSpec with Matchers with BeforeAndAfterAll { } fooM.get(PropertyNames.FULL_NAME) shouldBe Some("diff.Foo") barM.get(PropertyNames.FULL_NAME) shouldBe Some("diff.Bar") - val List(lx, ly) = driver + val List(lb) = driver .propertyFromNodes(NodeTypes.LITERAL, PropertyNames.CODE) - .sortWith { case (x, y) => - x(PropertyNames.CODE).toString < y(PropertyNames.CODE).toString - } - lx.get(PropertyNames.CODE) shouldBe Some("3") - ly.get(PropertyNames.CODE) shouldBe Some("5") - } - - "should succeed to re-use some data-flow results and take shorter time than the first" in { - var cpg = CPG(driver.cpg.graph) - - val t1 = System.nanoTime - driver.nodesReachableBy(cpg.identifier("x"), cpg.call("bar")) - val d1 = System.nanoTime - t1 - - rewriteFileContents(bar, bar1) - rewriteFileContents(foo, foo2) - JavaCompiler.compileJava(foo, bar) - - driver.close() - driver = new OverflowDbDriver(storage) - cpg = CPG(new Jimple2Cpg().createCpg(sandboxDir.getAbsolutePath, driver = driver).graph) - - val t2 = System.nanoTime - driver.nodesReachableBy(cpg.identifier("x"), cpg.call("bar")) - val d2 = System.nanoTime - t2 - val speedup = 100.0 - d2 / (d1 + 0.0) - - d1 should be >= d2 - speedup should be < 99.999 - speedup should be >= 99.700 - } - - "should succeed to re-use all data-flow results and take shorter time than the first" in { - var cpg = CPG(driver.cpg.graph) - - val t1 = System.nanoTime - driver.nodesReachableBy(cpg.identifier("x"), cpg.call("bar")) - val d1 = System.nanoTime - t1 - - driver.close() - driver = new OverflowDbDriver(storage) - cpg = CPG(new Jimple2Cpg().createCpg(sandboxDir.getAbsolutePath, driver = driver).graph) - - val t2 = System.nanoTime - driver.nodesReachableBy(cpg.identifier("x"), cpg.call("bar")) - val d2 = System.nanoTime - t2 - val speedup = 100.0 - d2 / (d1 + 0.0) - - d1 should be >= d2 - speedup should be < 99.999 - speedup should be >= 99.800 + lb.get(PropertyNames.CODE) shouldBe Some("3") + val paramA = driver + .propertyFromNodes(NodeTypes.METHOD_PARAMETER_IN, PropertyNames.CODE) + .filter(_(PropertyNames.CODE) == "int a") + paramA.size shouldBe 1 } } diff --git a/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala b/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala index b8ef5858..30bf2044 100644 --- a/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala +++ b/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala @@ -33,7 +33,7 @@ class Jimple2CpgFixture(_driver: Option[OverflowDbDriver] = None) val driver: OverflowDbDriver = frontend.asInstanceOf[PlumeFrontend].driver - override def createEnhancements(cpg: Cpg): Unit = {} + override def passes(cpg: Cpg): Unit = {} override def writeCodeToFile(sourceCode: String): File = { val tmpDir = Files.createTempDirectory("semanticcpgtest").toFile