Skip to content

Commit

Permalink
Fork Join Pass Update + New BulkTx(overflowdb.BatchUpdate) API (#240)
Browse files Browse the repository at this point in the history
* Moved diff pass into parent since it runs in serial

* Basic infrastructure down, time to handle new applied diff graphs

* Made sure nDiffT mutates the correct variable

* OverflowDB new bulkTx handles create edges well

* OverflowDB new bulkTx handles the rest

* Handling method resolve failure on invoke expr

* Working new diff graph tests for overflowdb

* Working new diff for gremlin

* Styling

* Neo4j new bulkTx ready but untested

* TigerGraph new bulkTx ready but untested

* Updated changelog

* Added to changelog

* Fixed TG:bulkTx

* See if this fixes things

* See if this fixes things

* Moved batched update helpers to BatchedUpdateUtil

* Upgraded to latest CPG and Joern and accounted for those changes too

* Quick blacklist bugfix

* MethodStub blacklist fix
  • Loading branch information
DavidBakerEffendi authored Mar 4, 2022
1 parent 586b874 commit 94a40b2
Show file tree
Hide file tree
Showing 29 changed files with 1,339 additions and 441 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Added

- Overloaded `bulkTx` to handle new `overflowdb.BatchedUpdate` objects.

### Fixed

- Instance where dynamic `InvokeExpr::getMethod` would fail by using `getMethodRef` instead.

### Changed

- Updated passes to handle new `ForkJoinParallel` passes.
- Upgraded CPG and Joern versions to latest.

## [1.0.16] - 2022-03-01

### Added
Expand Down
6 changes: 4 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ inThisBuild(
)
)

val cpgVersion = "1.3.493"
val joernVersion = "1.1.502"
val cpgVersion = "1.3.509"
val joernVersion = "1.1.590"
val sootVersion = "4.2.1"
val tinkerGraphVersion = "3.4.8"
val neo4jVersion = "4.4.3"
Expand All @@ -36,11 +36,13 @@ lazy val NeptuneIntTest = config("nepTest") extend Test

trapExit := false
Test / fork := true
Test / parallelExecution := false

libraryDependencies ++= Seq(
"io.shiftleft" %% "codepropertygraph" % cpgVersion,
"io.shiftleft" %% "semanticcpg" % cpgVersion,
"io.joern" %% "dataflowengineoss" % joernVersion,
"io.joern" %% "x2cpg" % joernVersion,
"io.shiftleft" %% "semanticcpg" % cpgVersion % Test classifier "tests",
"org.soot-oss" % "soot" % sootVersion,
"org.apache.tinkerpop" % "tinkergraph-gremlin" % tinkerGraphVersion,
Expand Down
6 changes: 3 additions & 3 deletions src/main/scala/com/github/plume/oss/Jimple2Cpg.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ import com.github.plume.oss.passes._
import com.github.plume.oss.passes.concurrent.{
PlumeCfgCreationPass,
PlumeContainsEdgePass,
PlumeDiffPass,
PlumeHashPass
}
import com.github.plume.oss.passes.forkjoin.{PlumeCdgPass, PlumeCfgDominatorPass}
import com.github.plume.oss.passes.parallel._
import com.github.plume.oss.util.ProgramHandlingUtil
import com.github.plume.oss.util.ProgramHandlingUtil.{extractSourceFilesFromArchive, moveClassFiles}
import io.joern.x2cpg.SourceFiles
import io.joern.x2cpg.X2Cpg.newEmptyCpg
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.{NodeTypes, PropertyNames}
import io.shiftleft.passes.CpgPassBase
import io.shiftleft.x2cpg.SourceFiles
import io.shiftleft.x2cpg.X2Cpg.newEmptyCpg
import org.slf4j.LoggerFactory
import soot.options.Options
import soot.{G, PhaseOptions, Scene, SootClass}
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/com/github/plume/oss/Plume.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import com.github.plume.oss.drivers.{
TinkerGraphDriver
}
import io.circe.Json
import io.shiftleft.x2cpg.{X2Cpg, X2CpgConfig}
import io.joern.x2cpg.{X2Cpg, X2CpgConfig}
import scopt.OParser

import java.io.InputStreamReader
Expand Down
78 changes: 78 additions & 0 deletions src/main/scala/com/github/plume/oss/drivers/GremlinDriver.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.github.plume.oss.drivers

import com.github.plume.oss.PlumeStatistics
import com.github.plume.oss.util.BatchedUpdateUtil._
import io.shiftleft.codepropertygraph.generated.nodes.{AbstractNode, NewNode, StoredNode}
import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames}
import io.shiftleft.passes.AppliedDiffGraph
Expand All @@ -18,6 +19,8 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.{
import org.apache.tinkerpop.gremlin.structure.{Edge, Graph, T, Vertex}
import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph
import org.slf4j.{Logger, LoggerFactory}
import overflowdb.BatchedUpdate.AppliedDiff
import overflowdb.{BatchedUpdate, DetachedNodeData}

import java.util.concurrent.atomic.AtomicBoolean
import scala.collection.mutable
Expand Down Expand Up @@ -90,6 +93,81 @@ abstract class GremlinDriver(txMax: Int = 50) extends IDriver {
.foreach { ops: Seq[Change] => bulkEdgeTx(g(), ops, dg) }
}

override def bulkTx(dg: AppliedDiff): Unit = {
dg.getDiffGraph.iterator.asScala
.collect {
case c: BatchedUpdate.RemoveNode => c
case c: BatchedUpdate.SetNodeProperty => c
case c: DetachedNodeData => c
}
.grouped(txMax)
.foreach { changes =>
var ptr: Option[GraphTraversal[Vertex, Vertex]] = None
changes.foreach {
case node: DetachedNodeData =>
val nodeId = typedNodeId(idFromNodeData(node))
val propMap = propertiesFromNodeData(node)
ptr match {
case Some(p) =>
ptr = Some(p.addV(node.label).property(T.id, nodeId))
serializeLists(propMap).foreach { case (k, v) => p.property(k, v) }
case None =>
ptr = Some(g().addV(node.label).property(T.id, nodeId))
serializeLists(propMap).foreach { case (k, v) => ptr.get.property(k, v) }
}
case c: BatchedUpdate.RemoveNode =>
val nodeId = typedNodeId(c.node.id())
ptr match {
case Some(p) => ptr = Some(p.V(nodeId).drop())
case None => ptr = Some(g().V(nodeId).drop())
}
case c: BatchedUpdate.SetNodeProperty =>
val v =
if (
c.label == PropertyNames.INHERITS_FROM_TYPE_FULL_NAME || c.label == PropertyNames.OVERLAYS
)
c.value.toString.split(",")
else c.value
val nodeId = typedNodeId(c.node.id())
ptr match {
case Some(p) => ptr = Some(p.V(nodeId).property(c.label, v))
case None => ptr = Some(g().V(nodeId).property(c.label, v))
}
}
// Commit transaction
ptr match {
case Some(p) => p.iterate()
case None =>
}
}
dg.getDiffGraph.iterator.asScala
.collect { case c: BatchedUpdate.CreateEdge => c }
.grouped(txMax)
.foreach { changes =>
var ptr: Option[GraphTraversal[Vertex, Edge]] = None
changes.foreach { c: BatchedUpdate.CreateEdge =>
val srcId = typedNodeId(idFromNodeData(c.src))
val dstId = typedNodeId(idFromNodeData(c.dst))
ptr match {
case Some(p) => ptr = Some(p.V(srcId).addE(c.label).to(__.V(dstId)))
case None => ptr = Some(g().V(srcId).addE(c.label).to(__.V(dstId)))
}
Option(c.propertiesAndKeys) match {
case Some(edgeKeyValues) =>
propertiesFromObjectArray(edgeKeyValues).foreach { case (k, v) =>
ptr.get.property(k, v)
}
case None =>
}
}
// Commit transaction
ptr match {
case Some(p) => p.iterate()
case None =>
}
}
}

private def bulkNodeTx(
g: GraphTraversalSource,
ops: Seq[Change],
Expand Down
39 changes: 8 additions & 31 deletions src/main/scala/com/github/plume/oss/drivers/IDriver.scala
Original file line number Diff line number Diff line change
@@ -1,39 +1,11 @@
package com.github.plume.oss.drivers

import io.shiftleft.codepropertygraph.generated.nodes.{
AbstractNode,
Block,
Call,
ControlStructure,
FieldIdentifier,
File,
Identifier,
JumpTarget,
Literal,
Local,
Member,
MetaData,
Method,
MethodParameterIn,
MethodParameterOut,
MethodRef,
MethodReturn,
Modifier,
Namespace,
NamespaceBlock,
NewNode,
Return,
StoredNode,
Type,
TypeArgument,
TypeDecl,
TypeParameter,
TypeRef,
Unknown
}
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.codepropertygraph.generated.{EdgeTypes, NodeTypes, PropertyNames}
import io.shiftleft.passes.AppliedDiffGraph
import org.slf4j.LoggerFactory
import overflowdb.BatchedUpdate.AppliedDiff
import overflowdb.{DetachedNodeData, DetachedNodeGeneric, Node}

import scala.collection.mutable

Expand Down Expand Up @@ -64,6 +36,11 @@ trait IDriver extends AutoCloseable {
*/
def bulkTx(dg: AppliedDiffGraph): Unit

/** Executes all changes contained within the given overflowdb.BatchedUpdate.AppliedDiff as a (or set of)
* bulk transaction(s).
*/
def bulkTx(dg: AppliedDiff): Unit

/** Given filenames, will remove related TYPE, TYPE_DECL, METHOD (with AST children), and NAMESPACE_BLOCK.
*/
def removeSourceFiles(filenames: String*): Unit
Expand Down
Loading

0 comments on commit 94a40b2

Please sign in to comment.