diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c602059..5adae195 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [1.1.18] - 2022-03-30 + +### Added + +- New `PlumeStastistics` entries related to measuring result re-use and fetching speeds. + +### Changed + +- Data flow cache now writes to `.cbor` instead of `.json` for improved I/O performance. + ## [1.1.17] - 2022-03-29 ### Changed diff --git a/build.sbt b/build.sbt index d753872e..dc93d5d2 100644 --- a/build.sbt +++ b/build.sbt @@ -3,7 +3,7 @@ name := "Plume" inThisBuild( List( organization := "com.github.plume-oss", - version := "1.1.15-SNAP", + version := "1.1.18", scalaVersion := "2.13.8", crossScalaVersions := Seq("2.13.8", "3.1.1"), resolvers ++= Seq( @@ -42,30 +42,31 @@ Test / fork := true Test / parallelExecution := false libraryDependencies ++= Seq( - "io.shiftleft" %% "codepropertygraph" % cpgVersion, - "io.shiftleft" %% "semanticcpg" % cpgVersion, - "io.joern" %% "dataflowengineoss" % joernVersion, - "io.joern" %% "x2cpg" % joernVersion, - "io.joern" %% "jimple2cpg" % joernVersion, - "io.joern" %% "x2cpg" % joernVersion % Test classifier "tests", - "org.soot-oss" % "soot" % sootVersion, - "org.apache.tinkerpop" % "tinkergraph-gremlin" % tinkerGraphVersion, - "org.apache.tinkerpop" % "gremlin-driver" % tinkerGraphVersion, - "org.neo4j.driver" % "neo4j-java-driver" % neo4jVersion, - "commons-codec" % "commons-codec" % apacheCodecVersion, - "commons-io" % "commons-io" % apacheIoVersion, - "org.apache.commons" % "commons-lang3" % apacheLangVersion, - "com.softwaremill.sttp.client3" %% "core" % sttpVersion, - "com.softwaremill.sttp.client3" %% "circe" % sttpVersion, - "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion, - "com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonVersion, - "org.scalaj" % "scalaj-http_2.13" % scalajHttpVersion, - "org.lz4" % "lz4-java" % lz4Version, - "org.slf4j" % "slf4j-api" % slf4jVersion, - "org.scala-lang" % "scala-reflect" % scalaVersion.value, - "org.apache.logging.log4j" % "log4j-core" % log4jVersion % Test, - "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4jVersion % Test, - "org.scalatest" %% "scalatest" % scalatestVersion % Test + "io.shiftleft" %% "codepropertygraph" % cpgVersion, + "io.shiftleft" %% "semanticcpg" % cpgVersion, + "io.joern" %% "dataflowengineoss" % joernVersion, + "io.joern" %% "x2cpg" % joernVersion, + "io.joern" %% "jimple2cpg" % joernVersion, + "io.joern" %% "x2cpg" % joernVersion % Test classifier "tests", + "org.soot-oss" % "soot" % sootVersion, + "org.apache.tinkerpop" % "tinkergraph-gremlin" % tinkerGraphVersion, + "org.apache.tinkerpop" % "gremlin-driver" % tinkerGraphVersion, + "org.neo4j.driver" % "neo4j-java-driver" % neo4jVersion, + "commons-codec" % "commons-codec" % apacheCodecVersion, + "commons-io" % "commons-io" % apacheIoVersion, + "org.apache.commons" % "commons-lang3" % apacheLangVersion, + "com.softwaremill.sttp.client3" %% "core" % sttpVersion, + "com.softwaremill.sttp.client3" %% "circe" % sttpVersion, + "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion, + "com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonVersion, + "com.fasterxml.jackson.dataformat" % "jackson-dataformat-cbor" % jacksonVersion, + "org.scalaj" % "scalaj-http_2.13" % scalajHttpVersion, + "org.lz4" % "lz4-java" % lz4Version, + "org.slf4j" % "slf4j-api" % slf4jVersion, + "org.scala-lang" % "scala-reflect" % scalaVersion.value, + "org.apache.logging.log4j" % "log4j-core" % log4jVersion % Test, + "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4jVersion % Test, + "org.scalatest" %% "scalatest" % scalatestVersion % Test ) ++ Seq( "io.circe" %% "circe-core", "io.circe" %% "circe-generic", diff --git a/src/main/scala/com/github/plume/oss/PlumeStatistics.scala b/src/main/scala/com/github/plume/oss/PlumeStatistics.scala index 4f301990..546ea5fe 100644 --- a/src/main/scala/com/github/plume/oss/PlumeStatistics.scala +++ b/src/main/scala/com/github/plume/oss/PlumeStatistics.scala @@ -11,7 +11,8 @@ object PlumeStatistics extends Enumeration { type PlumeStatistic = Value val TIME_OPEN_DRIVER, TIME_CLOSE_DRIVER, TIME_EXTRACTION, TIME_REACHABLE_BY_QUERYING, - TIME_REMOVING_OUTDATED_GRAPH, PROGRAM_CLASSES, PROGRAM_METHODS = Value + TIME_REMOVING_OUTDATED_GRAPH, TIME_REMOVING_OUTDATED_CACHE, TIME_RETRIEVING_CACHE, + TIME_STORING_CACHE, PROGRAM_CLASSES, PROGRAM_METHODS = Value private val statistics: mutable.Map[PlumeStatistic, Long] = PlumeStatistics.values.map((_, 0L)).to(collection.mutable.Map) diff --git a/src/main/scala/com/github/plume/oss/domain/package.scala b/src/main/scala/com/github/plume/oss/domain/package.scala index 0693b9f5..f69e2675 100644 --- a/src/main/scala/com/github/plume/oss/domain/package.scala +++ b/src/main/scala/com/github/plume/oss/domain/package.scala @@ -2,8 +2,8 @@ package com.github.plume.oss import com.fasterxml.jackson.core.util.DefaultPrettyPrinter import com.fasterxml.jackson.databind.annotation.JsonDeserialize -import com.fasterxml.jackson.databind.json.JsonMapper -import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule} +import com.fasterxml.jackson.dataformat.cbor.databind.CBORMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule import io.joern.dataflowengineoss.queryengine.{PathElement, ReachableByResult, ResultTable} import io.shiftleft.codepropertygraph.generated.Cpg import io.shiftleft.codepropertygraph.generated.nodes.{Call, CfgNode, StoredNode} @@ -23,10 +23,10 @@ import scala.util.Using package object domain { private val logger = LoggerFactory.getLogger("com.github.plume.oss.domain") - private val mapper = JsonMapper + private val mapper = CBORMapper .builder() .addModule(DefaultScalaModule) - .build() :: ClassTagExtensions + .build() /** Given an object and a path, will serialize the object to the given path. * @param o object to serialize. diff --git a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala index 2e624946..0eaae7ef 100644 --- a/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala +++ b/src/main/scala/com/github/plume/oss/drivers/OverflowDbDriver.scala @@ -46,7 +46,7 @@ final case class OverflowDbDriver( ), heapPercentageThreshold: Int = 80, serializationStatsEnabled: Boolean = false, - dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.json")), + dataFlowCacheFile: Option[Path] = Some(Paths.get("dataFlowCache.cbor")), compressDataFlowCache: Boolean = true ) extends IDriver { @@ -77,7 +77,12 @@ final case class OverflowDbDriver( dataFlowCacheFile match { case Some(filePath) => if (Files.isRegularFile(filePath)) - Some(deserializeCache(filePath, compressDataFlowCache)) + Some( + PlumeStatistics.time( + PlumeStatistics.TIME_RETRIEVING_CACHE, + { deserializeCache(filePath, compressDataFlowCache) } + ) + ) else Some(new ConcurrentHashMap[Long, Vector[SerialReachableByResult]]()) case None => None @@ -86,12 +91,21 @@ final case class OverflowDbDriver( private implicit var context: EngineContext = EngineContext( Semantics.fromList(List()), - EngineConfig(initialTable = deserializeResultTable(table, cpg)) + EngineConfig(initialTable = + PlumeStatistics.time( + PlumeStatistics.TIME_RETRIEVING_CACHE, + { deserializeResultTable(table, cpg) } + ) + ) ) private def saveDataflowCache(): Unit = dataFlowCacheFile match { case Some(filePath) if table.isDefined && !table.get.isEmpty => - serializeCache(table.get, filePath, compressDataFlowCache) + PlumeStatistics.time( + PlumeStatistics.TIME_STORING_CACHE, { + serializeCache(table.get, filePath, compressDataFlowCache) + } + ) case _ => // Do nothing } @@ -423,29 +437,33 @@ final case class OverflowDbDriver( table match { case Some(oldTab) => - val startPSize = oldTab.asScala.flatMap(_._2).size - - val newTab = oldTab.asScala - .filter { case (k: Long, _) => isNodeUnderTypes(k, unchangedTypes) } - .map { case (k: Long, v: Vector[SerialReachableByResult]) => - val filteredPaths = v.filterNot(isResultExpired) - (k, filteredPaths) + PlumeStatistics.time( + PlumeStatistics.TIME_REMOVING_OUTDATED_CACHE, { + val startPSize = oldTab.asScala.flatMap(_._2).size + + val newTab = oldTab.asScala + .filter { case (k: Long, _) => isNodeUnderTypes(k, unchangedTypes) } + .map { case (k: Long, v: Vector[SerialReachableByResult]) => + val filteredPaths = v.filterNot(isResultExpired) + (k, filteredPaths) + } + .toMap + // Refresh old table and add new entries + oldTab.clear() + newTab.foreach { case (k, v) => oldTab.put(k, v) } + + val leftOverPSize = newTab.flatMap(_._2).size + if (startPSize > 0) + logger.info( + s"Able to re-use ${(leftOverPSize.toDouble / startPSize) * 100.0}% of the saved paths. " + + s"Removed ${startPSize - leftOverPSize} expired paths from $startPSize saved paths." + ) + setDataflowContext( + context.config.maxCallDepth, + context.semantics, + deserializeResultTable(Some(oldTab), cpg) + ) } - .toMap - // Refresh old table and add new entries - oldTab.clear() - newTab.foreach { case (k, v) => oldTab.put(k, v) } - - val leftOverPSize = newTab.flatMap(_._2).size - if (startPSize > 0) - logger.info( - s"Able to re-use ${(leftOverPSize.toDouble / startPSize) * 100.0}% of the saved paths. " + - s"Removed ${startPSize - leftOverPSize} expired paths from $startPSize saved paths." - ) - setDataflowContext( - context.config.maxCallDepth, - context.semantics, - deserializeResultTable(Some(oldTab), cpg) ) case None => // Do nothing }