diff --git a/CHANGELOG.md b/CHANGELOG.md index 574d895f..ac749901 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [1.2.3] - 2022-04-05 + +### Changed + +- Added ability to disable cache sharing on `flowsBetween`. +- Simplified `EngineContext` and `Semantics` on `OverflowDbDriver` startup. +- `OverflowDbDriver::methodSemantics` now public. + +## [1.2.2] - 2022-04-05 + +### Changed + +- Upgraded Joern version to include configurations that disable cache sharing. + ## [1.2.1] - 2022-04-01 ### Changed diff --git a/build.sbt b/build.sbt index f041219f..f720e4ab 100644 --- a/build.sbt +++ b/build.sbt @@ -3,7 +3,7 @@ name := "Plume" inThisBuild( List( organization := "com.github.plume-oss", - version := "1.2.2", + version := "1.2.3", scalaVersion := "2.13.8", crossScalaVersions := Seq("2.13.8", "3.1.1"), resolvers ++= Seq( diff --git a/src/main/scala/com/github/plume/oss/Plume.scala b/src/main/scala/com/github/plume/oss/Plume.scala index 2c0c8e8d..f3b0bd3f 100644 --- a/src/main/scala/com/github/plume/oss/Plume.scala +++ b/src/main/scala/com/github/plume/oss/Plume.scala @@ -1,14 +1,8 @@ package com.github.plume.oss import better.files.File -import com.github.plume.oss.drivers.{ - IDriver, - Neo4jDriver, - NeptuneDriver, - OverflowDbDriver, - TigerGraphDriver, - TinkerGraphDriver -} +import com.github.plume.oss.drivers._ +import com.github.plume.oss.util.DataFlowCacheConfig import io.circe.Json import io.joern.x2cpg.{X2Cpg, X2CpgConfig} import scopt.OParser @@ -60,14 +54,14 @@ object Plume extends App { private def createDriver(conf: DriverConfig): IDriver = { conf match { case _ if conf.database == "OverflowDB" => - val d = new OverflowDbDriver( + new OverflowDbDriver( storageLocation = Option(conf.params.getOrElse("storageLocation", "cpg.odb")), heapPercentageThreshold = conf.params.getOrElse("heapPercentageThreshold", "80").toInt, serializationStatsEnabled = - conf.params.getOrElse("serializationStatsEnabled", "false").toBoolean + conf.params.getOrElse("serializationStatsEnabled", "false").toBoolean, + cacheConfig = + DataFlowCacheConfig(maxCallDepth = conf.params.getOrElse("maxCallDepth", "2").toInt) ) - d.setDataflowContext(conf.params.getOrElse("maxCallDepth", "2").toInt) - d case _ if conf.database == "TinkerGraph" => new TinkerGraphDriver() case _ if conf.database == "Neo4j" => new Neo4jDriver( diff --git a/src/main/scala/com/github/plume/oss/domain/package.scala b/src/main/scala/com/github/plume/oss/domain/package.scala index 319ca757..0b5a93d3 100644 --- a/src/main/scala/com/github/plume/oss/domain/package.scala +++ b/src/main/scala/com/github/plume/oss/domain/package.scala @@ -28,14 +28,6 @@ package object domain { .addModule(DefaultScalaModule) .build() - case class DataFlowCacheConfig( - dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")), - compressDataFlowCache: Boolean = true, - maxCallDepth: Int = 2, - maxCachedPaths: Int = 1_000, - shareCacheBetweenTasks: Boolean = false - ) - /** Given an object and a path, will serialize the object to the given path. * @param o object to serialize. * @param p path to write serialized data to. diff --git a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala index bcc3a364..53040d87 100644 --- a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala +++ b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala @@ -5,6 +5,8 @@ import com.github.plume.oss.domain._ import com.github.plume.oss.drivers.OverflowDbDriver.newOverflowGraph import com.github.plume.oss.passes.callgraph.PlumeDynamicCallLinker import com.github.plume.oss.util.BatchedUpdateUtil._ +import com.github.plume.oss.util.DataFlowCacheConfig +import com.github.plume.oss.util.DataFlowEngineUtil.setDataflowContext import io.joern.dataflowengineoss.language.toExtendedCfgNode import io.joern.dataflowengineoss.queryengine._ import io.joern.dataflowengineoss.semanticsloader.{Parser, Semantics} @@ -58,10 +60,24 @@ final case class OverflowDbDriver( val cpg: Cpg = PlumeStatistics.time(PlumeStatistics.TIME_OPEN_DRIVER, { newOverflowGraph(odbConfig) }) - private val semanticsParser = new Parser() - private val defaultSemantics: Try[BufferedSource] = Try( - Source.fromInputStream(getClass.getClassLoader.getResourceAsStream("default.semantics")) - ) + private val defaultSemanticsFile = "default.semantics" + val methodSemantics: Semantics = cacheConfig.methodSemantics match { + case Some(semantics) => Semantics.fromList(semantics) + case None => + logger.info("No specified method semantics file given. Using default semantics.") + Try( + Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(defaultSemanticsFile)) + ) match { + case Failure(e) => + logger.warn( + "No 'default.semantics' file found under resources - data flow tracking will over-taint.", + e + ) + Semantics.fromList(List()) + case Success(input: BufferedSource) => + Semantics.fromList(new Parser().parse(input.getLines().mkString("\n"))) + } + } /** Reads the saved cache on the disk and retrieves it as a serializable object */ @@ -85,16 +101,6 @@ final case class OverflowDbDriver( { deserializeResultTable(fetchCacheFromDisk, cpg) } ) - private implicit var context: EngineContext = - EngineContext( - Semantics.fromList(List()), - EngineConfig( - maxCallDepth = cacheConfig.maxCallDepth, - initialTable = resultTable, - shareCacheBetweenTasks = cacheConfig.shareCacheBetweenTasks - ) - ) - private def saveDataflowCache(): Unit = cacheConfig.dataFlowCacheFile match { case Some(filePath) if resultTable.isDefined && resultTable.get.table.nonEmpty => PlumeStatistics.time( @@ -112,59 +118,6 @@ final case class OverflowDbDriver( case _ => // Do nothing } - /** Sets the context for the data-flow engine when performing [[flowsBetween]] queries. - * - * @param maxCallDepth the new method call depth. - * @param methodSemantics the file containing method semantics for external methods. - * @param initialCache an initializer for the data-flow cache containing pre-calculated paths. - */ - def setDataflowContext( - maxCallDepth: Int, - methodSemantics: Option[BufferedSource] = None, - initialCache: Option[ResultTable] = None, - shareCacheBetweenTasks: Boolean = false - ): EngineContext = { - val cache = - if (initialCache.isDefined) initialCache else resultTable - - if (methodSemantics.isDefined) { - setDataflowContext( - maxCallDepth, - Semantics.fromList(semanticsParser.parse(methodSemantics.get.getLines().mkString("\n"))), - cache, - shareCacheBetweenTasks - ) - } else if (defaultSemantics.isSuccess) { - logger.info( - "No specified method semantics file given. Using default semantics." - ) - setDataflowContext( - maxCallDepth, - Semantics.fromList(semanticsParser.parse(defaultSemantics.get.getLines().mkString("\n"))), - cache, - shareCacheBetweenTasks - ) - } else { - logger.warn( - "No \"default.semantics\" file found under resources - data flow tracking may not perform correctly." - ) - setDataflowContext(maxCallDepth, Semantics.fromList(List()), cache, shareCacheBetweenTasks) - } - } - - private def setDataflowContext( - maxCallDepth: Int, - methodSemantics: Semantics, - cache: Option[ResultTable], - shareCacheBetweenTasks: Boolean - ): EngineContext = { - context = EngineContext( - methodSemantics, - EngineConfig(maxCallDepth, cache, shareCacheBetweenTasks) - ) - context - } - override def isConnected: Boolean = !cpg.graph.isClosed override def close(): Unit = PlumeStatistics.time( @@ -380,19 +333,21 @@ final case class OverflowDbDriver( * @param source the source query to match. * @param sink the sink query to match. * @param sanitizers a set of full method names to filter paths out with. + * @param noCacheSharing specifies if this run should not share cache results between tasks. * @return the source nodes whose data flows to the given sinks uninterrupted. */ def flowsBetween( source: Traversal[CfgNode], sink: Traversal[CfgNode], - sanitizers: Set[String] = Set.empty[String] + sanitizers: Set[String] = Set.empty[String], + noCacheSharing: Boolean = false ): List[ReachableByResult] = PlumeStatistics.time( PlumeStatistics.TIME_REACHABLE_BY_QUERYING, { import io.shiftleft.semanticcpg.language._ - prepareInitialTable() - val results: List[ReachableByResult] = sink.reachableByDetailed(source)(context) + val engineContext = prepareInitialTable(noCacheSharing) + val results: List[ReachableByResult] = sink.reachableByDetailed(source)(engineContext) captureDataflowCache(results) results @@ -412,16 +367,16 @@ final case class OverflowDbDriver( } ) - private def prepareInitialTable(): Unit = { + private def prepareInitialTable(noCacheSharing: Boolean): EngineContext = { cacheConfig.dataFlowCacheFile match { case Some(_) => val oldCache = resultTable.getOrElse(new ResultTable) if (oldCache.table.map(_._2.size).sum <= cacheConfig.maxCachedPaths) { setDataflowContext( - context.config.maxCallDepth, - context.semantics, + cacheConfig.maxCallDepth, + methodSemantics, Some(oldCache), - cacheConfig.shareCacheBetweenTasks + shareCacheBetweenTasks = !noCacheSharing ) } else { val newCache = new ResultTable @@ -439,13 +394,19 @@ final case class OverflowDbDriver( oldCache.table.clear() resultTable = Some(newCache) setDataflowContext( - context.config.maxCallDepth, - context.semantics, + cacheConfig.maxCallDepth, + methodSemantics, resultTable, - cacheConfig.shareCacheBetweenTasks + shareCacheBetweenTasks = !noCacheSharing ) } case _ => + setDataflowContext( + cacheConfig.maxCallDepth, + methodSemantics, + None, + shareCacheBetweenTasks = !noCacheSharing + ) } } diff --git a/src/main/scala/com/github/plume/oss/util/DataFlowEngineUtil.scala b/src/main/scala/com/github/plume/oss/util/DataFlowEngineUtil.scala new file mode 100644 index 00000000..581b1730 --- /dev/null +++ b/src/main/scala/com/github/plume/oss/util/DataFlowEngineUtil.scala @@ -0,0 +1,40 @@ +package com.github.plume.oss.util + +import io.joern.dataflowengineoss.queryengine.{EngineConfig, EngineContext, ResultTable} +import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics} + +import java.nio.file.{Path, Paths} + +/** Helper methods for setting up the data flow engine execution context. + */ +object DataFlowEngineUtil { + + /** Sets the context for the data-flow engine when performing + * [[com.github.plume.oss.drivers.OverflowDbDriver.flowsBetween()]] queries. + * + * @param maxCallDepth the new method call depth. + * @param methodSemantics the file containing method semantics for external methods. + * @param initialCache an initializer for the data-flow cache containing pre-calculated paths. + * @param shareCacheBetweenTasks enables the sharing of cache between data flow tasks. + */ + def setDataflowContext( + maxCallDepth: Int, + methodSemantics: Semantics, + initialCache: Option[ResultTable], + shareCacheBetweenTasks: Boolean + ): EngineContext = { + EngineContext( + methodSemantics, + EngineConfig(maxCallDepth, initialCache, shareCacheBetweenTasks) + ) + } +} + +case class DataFlowCacheConfig( + methodSemantics: Option[List[FlowSemantic]] = None, + dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")), + compressDataFlowCache: Boolean = true, + maxCallDepth: Int = 2, + maxCachedPaths: Int = 1_000, + shareCacheBetweenTasks: Boolean = false +) diff --git a/src/test/scala/com/github/plume/oss/drivers/OverflowDbTests.scala b/src/test/scala/com/github/plume/oss/drivers/OverflowDbTests.scala index d0dafe5d..63fb9994 100644 --- a/src/test/scala/com/github/plume/oss/drivers/OverflowDbTests.scala +++ b/src/test/scala/com/github/plume/oss/drivers/OverflowDbTests.scala @@ -2,6 +2,8 @@ package com.github.plume.oss.drivers import com.github.plume.oss.testfixtures.PlumeDriverFixture import com.github.plume.oss.testfixtures.PlumeDriverFixture.{b1, m1} +import com.github.plume.oss.util.DataFlowCacheConfig +import io.joern.dataflowengineoss.semanticsloader.Parser import io.shiftleft.codepropertygraph.generated.{Cpg, EdgeTypes} import io.shiftleft.passes.IntervalKeyPool import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph @@ -22,27 +24,20 @@ class OverflowDbTests extends PlumeDriverFixture(new OverflowDbDriver()) { Files.write(methodSemanticsPath, "\"Foo.bar\" 1->-1\n".getBytes(StandardCharsets.UTF_8)) "should allow for custom method semantics to be defined" in { - driver match { - case x: OverflowDbDriver => - x.setDataflowContext( - 2, - Some(Source.fromInputStream(Files.newInputStream(methodSemanticsPath))) - ) - } + val parser = new Parser() + val rawSemantics = Source + .fromInputStream(Files.newInputStream(methodSemanticsPath)) + .getLines() + .mkString("\n") + val config = DataFlowCacheConfig(methodSemantics = Some(parser.parse(rawSemantics))) + new OverflowDbDriver(cacheConfig = config).close() } "should handle the case where no default semantics can be retrieved" in { - val field: Field = driver.getClass.getDeclaredField("defaultSemantics") + val field: Field = driver.getClass.getDeclaredField("defaultSemanticsFile") field.setAccessible(true) - field.set(driver, Try.apply(throw new Exception("Foo"))) - - driver match { - case x: OverflowDbDriver => - x.setDataflowContext( - 2, - Some(Source.fromInputStream(Files.newInputStream(methodSemanticsPath))) - ) - } + field.set(driver, null) + new OverflowDbDriver().close() } "should be able to serialize and deserialize XML graphs without throwing an exception" in { diff --git a/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala b/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala index aaa2d1b8..f1868dce 100644 --- a/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala +++ b/src/test/scala/com/github/plume/oss/testfixtures/Jimple2CpgFixture.scala @@ -3,7 +3,7 @@ package com.github.plume.oss.testfixtures import com.github.plume.oss.{Jimple2Cpg, PlumeStatistics} import com.github.plume.oss.drivers.OverflowDbDriver import com.github.plume.oss.JavaCompiler.compileJava -import com.github.plume.oss.domain.DataFlowCacheConfig +import com.github.plume.oss.util.DataFlowCacheConfig import io.joern.x2cpg.testfixtures.{CodeToCpgFixture, LanguageFrontend} import io.shiftleft.codepropertygraph.Cpg import org.slf4j.LoggerFactory