From c677a03f4c58e81d863de1a524733275deb262b7 Mon Sep 17 00:00:00 2001 From: KuechA <31155350+KuechA@users.noreply.github.com> Date: Fri, 3 Mar 2023 17:01:35 +0100 Subject: [PATCH] Even faster Control Flow Sensitive DFG Pass (#1088) * Remove unnecessary nodes to terminate faster * Handle loops separately * Small cleanup --------- Co-authored-by: Konrad Weiss --- .../de/fraunhofer/aisec/cpg/helpers/Util.kt | 66 +++++++------------ .../cpg/passes/ControlFlowSensitiveDFGPass.kt | 48 ++++++++++---- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/cpg-core/src/main/java/de/fraunhofer/aisec/cpg/helpers/Util.kt b/cpg-core/src/main/java/de/fraunhofer/aisec/cpg/helpers/Util.kt index 986e47846c..5bfbd2c075 100644 --- a/cpg-core/src/main/java/de/fraunhofer/aisec/cpg/helpers/Util.kt +++ b/cpg-core/src/main/java/de/fraunhofer/aisec/cpg/helpers/Util.kt @@ -29,9 +29,7 @@ import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend import de.fraunhofer.aisec.cpg.graph.Node import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration -import de.fraunhofer.aisec.cpg.graph.declarations.ParamVariableDeclaration import de.fraunhofer.aisec.cpg.graph.edge.Properties -import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation import java.io.ByteArrayOutputStream @@ -40,10 +38,8 @@ import java.io.IOException import java.io.InputStream import java.nio.charset.StandardCharsets import java.util.* -import java.util.function.Consumer import java.util.function.Function import java.util.function.Predicate -import java.util.stream.Collectors import java.util.stream.IntStream import java.util.stream.Stream import org.slf4j.Logger @@ -57,10 +53,9 @@ object Util { * @return a list of nodes with the specified String. */ fun subnodesOfCode(node: Node?, searchCode: String): List { - return SubgraphWalker.flattenAST(node) - .stream() - .filter { n: Node -> n.code != null && n.code == searchCode } - .collect(Collectors.toList()) + return SubgraphWalker.flattenAST(node).filter { n: Node -> + n.code != null && n.code == searchCode + } } /** @@ -115,7 +110,7 @@ object Util { if (cn == Connect.SUBTREE) { val border = SubgraphWalker.getEOGPathEdges(n) if (en == Edge.ENTRIES) { - val pe = border.entries.flatMap { r: Node -> r.prevEOGEdges } + val pe = border.entries.flatMap { it.prevEOGEdges } if (Quantifier.ALL == q && pe.any { !it.containsProperties(props) }) return false pe.filter { it.containsProperties(props) }.map { it.start } @@ -132,11 +127,11 @@ object Util { } refSide = if (cr == Connect.SUBTREE) { - val borders = refs.map { n -> SubgraphWalker.getEOGPathEdges(n) } + val borders = refs.map { SubgraphWalker.getEOGPathEdges(it) } - borders.flatMap { border: SubgraphWalker.Border -> + borders.flatMap { border -> if (Edge.ENTRIES == er) { - val pe = border.entries.flatMap { r: Node -> r.prevEOGEdges } + val pe = border.entries.flatMap { it.prevEOGEdges } if (Quantifier.ALL == q && pe.any { !it.containsProperties(props) }) return false pe.filter { it.containsProperties(props) }.map { it.start } @@ -149,11 +144,11 @@ object Util { if (Quantifier.ALL == q && pe.any { !it.containsProperties(props) }) return false pe.filter { it.containsProperties(props) }.map { it.start } - } else java.util.List.of(node) + } else listOf(node) } } val refNodes = refSide - return if (Quantifier.ANY == q) nodeSide.any { o -> refNodes.contains(o) } + return if (Quantifier.ANY == q) nodeSide.any { it in refNodes } else refNodes.containsAll(nodeSide) } @@ -171,7 +166,7 @@ object Util { @JvmStatic fun distinctBy(by: Function): Predicate { - val seen: MutableSet = HashSet() + val seen = mutableSetOf() return Predicate { t: T -> seen.add(by.apply(t)) } } @@ -248,7 +243,7 @@ object Util { * @return A list of all parts of the input, as divided by any delimiter */ fun splitLeavingParenthesisContents(toSplit: String, delimiters: String): List { - val result: MutableList = ArrayList() + val result = mutableListOf() var openParentheses = 0 var currPart = StringBuilder() for (c in toSplit.toCharArray()) { @@ -263,7 +258,7 @@ object Util { } else if (delimiters.contains("" + c)) { if (openParentheses == 0) { val toAdd = currPart.toString().strip() - if (!toAdd.isEmpty()) { + if (toAdd.isNotEmpty()) { result.add(currPart.toString().strip()) } currPart = StringBuilder() @@ -274,7 +269,7 @@ object Util { currPart.append(c) } } - if (currPart.length > 0) { + if (currPart.isNotEmpty()) { result.add(currPart.toString().strip()) } return result @@ -339,11 +334,7 @@ object Util { * @param arguments The call's arguments to be connected to the target's parameters */ fun attachCallParameters(target: FunctionDeclaration, arguments: List) { - target.parameterEdges.sortWith( - Comparator.comparing { pe: PropertyEdge -> - pe.end.argumentIndex - } - ) + target.parameterEdges.sortWith(Comparator.comparing { it.end.argumentIndex }) var j = 0 while (j < arguments.size) { val parameters = target.parameters @@ -382,7 +373,7 @@ object Util { var name = name if (language != null) { val delimiter = language.namespaceDelimiter - if (name.contains(delimiter)) { + if (delimiter in name) { name = name.substring(0, name.lastIndexOf(delimiter)) } } @@ -398,18 +389,14 @@ object Util { fun detachCallParameters(target: FunctionDeclaration, arguments: List) { for (param in target.parameters) { // A param could be variadic, so multiple arguments could be set as incoming DFG - param.prevDFG - .stream() - .filter { o: Node? -> arguments.contains(o) } - .forEach { next: Node? -> param.removeNextDFG(next) } + param.prevDFG.filter { o: Node? -> o in arguments }.forEach { param.removeNextDFG(it) } } } @JvmStatic fun reverse(input: Stream): Stream { val temp = input.toArray() - return IntStream.range(0, temp.size).mapToObj { i: Int -> temp[temp.size - i - 1] } - as Stream + return IntStream.range(0, temp.size).mapToObj { i -> temp[temp.size - i - 1] } as Stream } /** @@ -421,18 +408,11 @@ object Util { */ fun getAdjacentDFGNodes(n: Node?, incoming: Boolean): MutableList { val subnodes = SubgraphWalker.getAstChildren(n) - val adjacentNodes: MutableList - adjacentNodes = + val adjacentNodes = if (incoming) { - subnodes - .stream() - .filter { prevCandidate: Node -> prevCandidate.nextDFG.contains(n) } - .collect(Collectors.toList()) + subnodes.filter { it.nextDFG.contains(n) }.toMutableList() } else { - subnodes - .stream() - .filter { nextCandidate: Node -> nextCandidate.prevDFG.contains(n) } - .collect(Collectors.toList()) + subnodes.filter { it.prevDFG.contains(n) }.toMutableList() } return adjacentNodes } @@ -451,14 +431,14 @@ object Util { branchingExp: Node?, branchingDecl: Node? ) { - var conditionNodes: MutableList = ArrayList() + var conditionNodes = mutableListOf() if (branchingExp != null) { - conditionNodes = ArrayList() + conditionNodes = mutableListOf() conditionNodes.add(branchingExp) } else if (branchingDecl != null) { conditionNodes = getAdjacentDFGNodes(branchingDecl, true) } - conditionNodes.forEach(Consumer { prev: Node? -> n.addPrevDFG(prev!!) }) + conditionNodes.forEach { n.addPrevDFG(it) } } enum class Connect { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt index 477e8deb31..72fd969122 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt @@ -257,7 +257,7 @@ open class ControlFlowSensitiveDFGPass : Pass() { } nodesOutsideTheLoop .map { it.end } - .forEach { worklist.add(Pair(it, copyMap(previousWrites))) } + .forEach { worklist.add(Pair(it, copyMap(previousWrites, it))) } } iterable?.let { writtenTo?.addPrevDFG(it) } @@ -282,9 +282,8 @@ open class ControlFlowSensitiveDFGPass : Pass() { .filter { it.getProperty(Properties.UNREACHABLE) != true } .map { it.end } .forEach { - val newPair = Pair(it, copyMap(previousWrites)) - if (!worklistHasSimilarPair(worklist, alreadyProcessed, newPair)) - worklist.add(newPair) + val newPair = Pair(it, copyMap(previousWrites, it)) + if (!worklistHasSimilarPair(worklist, newPair)) worklist.add(newPair) } } } @@ -318,7 +317,6 @@ open class ControlFlowSensitiveDFGPass : Pass() { */ private fun worklistHasSimilarPair( worklist: MutableList>>>, - alreadyProcessed: MutableSet>>, newPair: Pair>> ): Boolean { // We collect all states in the worklist which are only a subset of the new pair. We will @@ -330,7 +328,7 @@ open class ControlFlowSensitiveDFGPass : Pass() { // The next nodes match. Now check the last writes for each declaration. var allWritesMatch = true var allExistingWritesMatch = true - for ((lastWriteDecl, lastWriteList) in newPairLastMap) { + for ((lastWriteDecl, lastWrite) in newPairLastMap) { // We ignore FieldDeclarations because we cannot be sure how interprocedural // data flows affect the field. Handling them in the state would only blow up @@ -340,19 +338,19 @@ open class ControlFlowSensitiveDFGPass : Pass() { // Will we generate the same "prev DFG" with the item that is already in the // list? allWritesMatch = - allWritesMatch && - existingPair.second[lastWriteDecl]?.last() == lastWriteList + allWritesMatch && existingPair.second[lastWriteDecl]?.last() == lastWrite // All last writes which exist in the "existing pair" match but we have new // declarations in the current one allExistingWritesMatch = allExistingWritesMatch && (lastWriteDecl !in existingPair.second || - existingPair.second[lastWriteDecl]?.last() == lastWriteList) + existingPair.second[lastWriteDecl]?.last() == lastWrite) } // We found a matching pair in the worklist? Done. Otherwise, maybe there's another // pair... if (allWritesMatch) return true - // The new state is a superset of the old one? We delete the old one. + // The new state is a superset of the old one? We will add the missing pieces to the + // old one. if (allExistingWritesMatch) { subsets.add(existingPair) } @@ -438,15 +436,37 @@ open class ControlFlowSensitiveDFGPass : Pass() { ((previousWrites[writtenDecl]?.filter { it == currentWritten }?.size ?: 0) >= 2) } - /** Copies the map */ + /** + * Copies the map. We remove all the declarations which are no longer relevant because they are + * in a child scope of the next hop. + */ private fun copyMap( - map: Map> + map: Map>, + nextNode: Node ): MutableMap> { val result = mutableMapOf>() for ((k, v) in map) { - result[k] = mutableListOf() - result[k]?.addAll(v) + if ( + nextNode.scope == k.scope || + !nextNode.hasOuterScopeOf(k) || + ((nextNode is ForStatement || nextNode is ForEachStatement) && + k.scope?.parent == nextNode.scope) + ) { + result[k] = mutableListOf() + result[k]?.addAll(v) + } } return result } + + private fun Node.hasOuterScopeOf(node: Node): Boolean { + var parentScope = node.scope?.parent + while (parentScope != null) { + if (this.scope == parentScope) { + return true + } + parentScope = parentScope.parent + } + return false + } }