From 2176c9adc0b015346eb1e3674b7a711acce6687a Mon Sep 17 00:00:00 2001 From: Alexander Kuechler Date: Wed, 6 Dec 2023 10:36:54 +0100 Subject: [PATCH] Try to remove some confusing edges from PDG --- .../de/fraunhofer/aisec/cpg/graph/Node.kt | 7 +- .../cpg/passes/ControlDependenceGraphPass.kt | 181 +++++++++++++++--- .../cpg/passes/ControlFlowSensitiveDFGPass.kt | 1 + .../cpg/passes/ProgramDependenceGraphPass.kt | 72 ++++++- .../passes/ProgramDependenceGraphPassTest.kt | 48 ++++- 5 files changed, 264 insertions(+), 45 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt index 129f9e03926..5b07b19f233 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt @@ -279,8 +279,11 @@ open class Node : IVisitable, Persistable, LanguageProvider, ScopeProvider prev.nextDFGEdges.add(edge) } - fun addPrevCDG(prev: Node) { - val edge = PropertyEdge(prev, this) + fun addPrevCDG( + prev: Node, + properties: MutableMap = EnumMap(Properties::class.java) + ) { + val edge = PropertyEdge(prev, this, properties) prevCDGEdges.add(edge) prev.nextCDGEdges.add(edge) } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlDependenceGraphPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlDependenceGraphPass.kt index a1e12fb5df7..fdccb49c177 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlDependenceGraphPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlDependenceGraphPass.kt @@ -31,14 +31,17 @@ import de.fraunhofer.aisec.cpg.graph.Node import de.fraunhofer.aisec.cpg.graph.allChildren import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.cyclomaticComplexity import de.fraunhofer.aisec.cpg.graph.edge.Properties import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge import de.fraunhofer.aisec.cpg.graph.functions import de.fraunhofer.aisec.cpg.graph.statements.IfStatement +import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement import de.fraunhofer.aisec.cpg.graph.statements.expressions.ConditionalExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.ShortCircuitOperator import de.fraunhofer.aisec.cpg.helpers.* import de.fraunhofer.aisec.cpg.passes.order.DependsOn +import java.util.* /** This pass builds the Control Dependence Graph (CDG) by iterating through the EOG. */ @DependsOn(EvaluationOrderGraphPass::class) @@ -63,14 +66,22 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : TranslationUnit * Repeat step 3) until you cannot move the node upwards in the CDG anymore. */ private fun handle(functionDeclaration: FunctionDeclaration) { + val max = passConfig()?.maxComplexity + val c = functionDeclaration.body?.cyclomaticComplexity ?: 0 + if (max != null && c > max) { + log.info( + "Ignoring function ${functionDeclaration.name} because its complexity (${c}) is greater than the configured maximum (${max})" + ) + return + } + // Maps nodes to their "cdg parent" (i.e. the dominator) and also has the information // through which path it is reached. If all outgoing paths of the node's dominator result in // the node, we use the dominator's state instead (i.e., we move the node one layer upwards) val startState = PrevEOGState() - startState.push( - functionDeclaration, - PrevEOGLattice(mapOf(Pair(functionDeclaration, setOf(functionDeclaration)))) - ) + val identityMap = IdentityHashMap>() + identityMap[functionDeclaration] = identitySetOf(functionDeclaration) + startState.push(functionDeclaration, PrevEOGLattice(identityMap)) val finalState = iterateEOG(functionDeclaration.nextEOGEdges, startState, ::handleEdge) ?: return @@ -83,15 +94,43 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : TranslationUnit .map { (k, v) -> Pair(k, v.toMutableSet()) } .toMutableList() val finalDominators = mutableListOf>>() + val conditionKeys = + dominatorPaths.elements.entries + .filter { (k, _) -> + (k as? BranchingNode)?.branchedBy == node || + node in + ((k as? BranchingNode)?.branchedBy?.allChildren() ?: listOf()) + } + .map { (k, _) -> k } + if (conditionKeys.isNotEmpty()) { + // The node is part of the condition. For loops, it happens that these nodes are + // somehow put in the CDG of the surrounding statement (e.g. the loop) but we don't + // want this. Move it one layer up. + for (k1 in conditionKeys) { + dominatorsList.removeIf { k1 == it.first } + finalState[k1]?.elements?.forEach { (newK, newV) -> + val entry = dominatorsList.firstOrNull { it.first == newK } + entry?.let { + dominatorsList.remove(entry) + val update = entry.second.addAll(newV) + if (update) dominatorsList.add(entry) else finalDominators.add(entry) + } + ?: dominatorsList.add(Pair(newK, newV.toMutableSet())) + } + } + } + val alreadySeen = mutableSetOf>>() + while (dominatorsList.isNotEmpty()) { val (k, v) = dominatorsList.removeFirst() + alreadySeen.add(Pair(k, v)) if ( k != functionDeclaration && v.containsAll(branchingNodeConditionals[k] ?: setOf()) ) { - // We are reachable from all the branches of branch. Add this parent to the - // worklist or update an existing entry. Also consider already existing entries - // in finalDominators list and update it (if necessary) + // We are reachable from all the branches of a branching node. Add this parent + // to the worklist or update an existing entry. Also consider already existing + // entries in finalDominators list and update it (if necessary) val newDominatorMap = finalState[k]?.elements newDominatorMap?.forEach { (newK, newV) -> if (dominatorsList.any { it.first == newK }) { @@ -103,10 +142,22 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : TranslationUnit val entry = finalDominators.first { it.first == newK } finalDominators.remove(entry) val update = entry.second.addAll(newV) - if (update) dominatorsList.add(entry) else finalDominators.add(entry) - } else { + if ( + update && + alreadySeen.none { + it.first == entry.first && it.second == entry.second + } + ) + dominatorsList.add(entry) + else finalDominators.add(entry) + } else if (alreadySeen.none { it.first == newK && it.second == newV }) { // We don't have an entry yet => add a new one - dominatorsList.add(Pair(newK, newV.toMutableSet())) + val newEntry = Pair(newK, newV.toMutableSet()) + dominatorsList.add(newEntry) + } else { + // Not sure what to do, there seems to be a cycle but this entry is not + // in finalDominators for some reason. Add to finalDominators now. + finalDominators.add(Pair(newK, newV.toMutableSet())) } } } else { @@ -115,9 +166,33 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : TranslationUnit finalDominators.add(Pair(k, v)) } } + // We have all the dominators of this node and potentially traversed the graph // "upwards". Add the CDG edges - finalDominators.filter { (k, _) -> k != node }.forEach { (k, _) -> node.addPrevCDG(k) } + finalDominators + .filter { (k, _) -> k != node } + .forEach { (k, v) -> + val properties = EnumMap(Properties::class.java) + val branchesSet = + k.nextEOGEdges + .filter { edge -> edge.end in v } + .mapNotNull { it.getProperty(Properties.BRANCH) } + .toSet() + if (branchesSet.size == 1) { + properties[Properties.BRANCH] = branchesSet.single() + } else if (branchesSet.isNotEmpty()) { + properties[Properties.BRANCH] = branchesSet + } else if ( + k is IfStatement && + branchesSet.isEmpty() && + (branchingNodeConditionals[k]?.size ?: 0) > 1 + ) { + // The if statement has only a then branch but there's a way to "jump out" + // of this branch. In this case, we want to set the false property here + properties[Properties.BRANCH] = setOf(false) + } + node.addPrevCDG(k, properties) + } } } @@ -176,19 +251,27 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : TranslationUnit */ fun handleEdge( currentEdge: PropertyEdge, - currentState: State>>, - currentWorklist: Worklist, Node, Map>> -): State>> { + currentState: State>>, + currentWorklist: Worklist, Node, IdentityHashMap>> +): State>> { // Check if we start in a branching node and if this edge leads to the conditional // branch. In this case, the next node will move "one layer downwards" in the CDG. if (currentEdge.start is BranchingNode) { // && currentEdge.isConditionalBranch()) { // We start in a branching node and end in one of the branches, so we have the // following state: // for the branching node "start", we have a path through "end". - currentState.push( - currentEdge.end, - PrevEOGLattice(mapOf(Pair(currentEdge.start, setOf(currentEdge.end)))) - ) + val prevPathLattice = + PrevEOGLattice( + IdentityHashMap( + currentState[currentEdge.start]?.elements?.filter { (k, v) -> + k == currentEdge.start + } + ) + ) + val map = IdentityHashMap>() + map[currentEdge.start] = identitySetOf(currentEdge.end) + val newPath = PrevEOGLattice(map).lub(prevPathLattice) as PrevEOGLattice + currentState.push(currentEdge.end, newPath) } else { // We did not start in a branching node, so for the next node, we have the same path // (last branching + first end node) as for the start node of this edge. @@ -198,7 +281,9 @@ fun handleEdge( val state = PrevEOGLattice( currentState[currentEdge.start]?.elements - ?: mapOf(Pair(currentEdge.start, setOf(currentEdge.end))) + ?: IdentityHashMap( + mutableMapOf(Pair(currentEdge.start, identitySetOf(currentEdge.end))) + ) ) currentState.push(currentEdge.end, state) } @@ -224,36 +309,70 @@ private fun PropertyEdge.isConditionalBranch(): Boolean { } else (this.start is IfStatement || this.start is ConditionalExpression || - this.start is ShortCircuitOperator) && this.getProperty(Properties.BRANCH) == false + this.start is ShortCircuitOperator) && this.getProperty(Properties.BRANCH) == false || + (this.start is IfStatement && + !(this.start as IfStatement).allBranchesFromMyThenBranchGoThrough( + (this.start as IfStatement).nextUnconditionalNode + )) +} + +private val IfStatement.nextUnconditionalNode: Node? + get() = this.nextEOGEdges.firstOrNull { it.getProperty(Properties.BRANCH) == null }?.end + +private fun IfStatement.allBranchesFromMyThenBranchGoThrough(node: Node?): Boolean { + if (this.thenStatement.allChildren().isNotEmpty()) return false + + if (node == null) return true + + val alreadySeen = mutableSetOf() + val nextNodes = + this.nextEOGEdges + .filter { it.getProperty(Properties.BRANCH) == true } + .map { it.end } + .toMutableList() + + while (nextNodes.isNotEmpty()) { + val nextNode = nextNodes.removeFirst() + if (nextNode == node) { + continue + } else if (nextNode.nextEOG.isEmpty()) { + // We're at the end of the EOG but didn't see "node" on this path. Fail + return false + } + alreadySeen.add(nextNode) + nextNodes.addAll(nextNode.nextEOG.filter { it !in alreadySeen }) + } + + return true } /** * Implements the [LatticeElement] over a set of nodes and their set of "nextEOG" nodes which reach * this node. */ -class PrevEOGLattice(override val elements: Map>) : - LatticeElement>>(elements) { +class PrevEOGLattice(override val elements: IdentityHashMap>) : + LatticeElement>>(elements) { override fun lub( - other: LatticeElement>> - ): LatticeElement>> { - val newMap = (other.elements).mapValues { (_, v) -> v.toMutableSet() }.toMutableMap() + other: LatticeElement>> + ): LatticeElement>> { + val newMap = IdentityHashMap(other.elements.mapValues { (_, v) -> v.toIdentitySet() }) for ((key, value) in this.elements) { - newMap.computeIfAbsent(key, ::mutableSetOf).addAll(value) + newMap.computeIfAbsent(key, ::identitySetOf).addAll(value) } return PrevEOGLattice(newMap) } - override fun duplicate() = PrevEOGLattice(this.elements.toMap()) + override fun duplicate() = PrevEOGLattice(IdentityHashMap(this.elements)) - override fun compareTo(other: LatticeElement>>): Int { + override fun compareTo(other: LatticeElement>>): Int { return if ( this.elements.keys.containsAll(other.elements.keys) && - this.elements.all { (k, v) -> v.containsAll(other.elements[k] ?: setOf()) } + this.elements.all { (k, v) -> v.containsAll(other.elements[k] ?: identitySetOf()) } ) { if ( this.elements.keys.size > (other.elements.keys.size) || - this.elements.any { (k, v) -> v.size > (other.elements[k] ?: setOf()).size } + this.elements.any { (k, v) -> v.size > (other.elements[k]?.size ?: 0) } ) 1 else 0 @@ -267,4 +386,4 @@ class PrevEOGLattice(override val elements: Map>) : * A state which actually holds a state for all [PropertyEdge]s. It maps the node to its * [BranchingNode]-parent and the path through which it is reached. */ -class PrevEOGState : State>>() +class PrevEOGState : State>>() diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt index f86d84b4293..226fec961b7 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt @@ -132,6 +132,7 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass * code [node]. */ protected fun clearFlowsOfVariableDeclarations(node: Node) { + // TODO: We seem to have a ConcurrentModificationException in this function sometimes?? for (varDecl in node.variables.filter { it !is FieldDeclaration && !it.isGlobal }) { varDecl.clearPrevDFG() varDecl.clearNextDFG() diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPass.kt index c31d82aeb1e..e83baaf4310 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPass.kt @@ -28,7 +28,10 @@ package de.fraunhofer.aisec.cpg.passes import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.ValueDeclaration import de.fraunhofer.aisec.cpg.graph.edge.DependenceType +import de.fraunhofer.aisec.cpg.graph.edge.PropertyEdge +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.passes.order.DependsOn import de.fraunhofer.aisec.cpg.processing.IVisitor import de.fraunhofer.aisec.cpg.processing.strategy.Strategy @@ -48,11 +51,76 @@ class ProgramDependenceGraphPass(ctx: TranslationContext) : TranslationUnitPass( * dependence edges */ override fun visit(t: Node) { - t.addAllPrevPDGEdges(t.prevDFGEdges, DependenceType.DATA) - t.addAllPrevPDGEdges(t.prevCDGEdges, DependenceType.CONTROL) + if (t is Reference) { + // We filter all prevDFGEdges if the condition affects the variable of t and + // if there's a flow from the prevDFGEdge's node through the condition to t. + val prevDFGToConsider = mutableListOf>() + t.prevDFGEdges.forEach { prevDfgEdge -> + val prevDfgNode = prevDfgEdge.start + // The prevDfgNode also flows into the condition. This is suspicious because + // if the condition is + // on each path between prevDfgNode and t, the condition is more relevant. + if (prevDfgNode is Reference || prevDfgNode is ValueDeclaration) { + val cdgConditionChildren = + t.prevCDG.flatMap { + (it as? BranchingNode)?.branchedBy?.allChildren { c + -> + c in prevDfgNode.nextDFG + } + ?: listOf() + } + if ( + cdgConditionChildren.isNotEmpty() && + cdgConditionChildren.all { + it != t && allEOGsFromToFlowThrough(prevDfgNode, t, it) + } + ) { + // All data flows from the prevDfgNode to t flow through all the + // relevant CDG condition's children. This means that we can safely + // avoid the prevDfgNode in the PDG because it will be added + // transitively. + } else { + prevDFGToConsider.add(prevDfgEdge) + } + } else { + prevDFGToConsider.add(prevDfgEdge) + } + } + t.addAllPrevPDGEdges(prevDFGToConsider, DependenceType.DATA) + t.addAllPrevPDGEdges(t.prevCDGEdges, DependenceType.CONTROL) + } else { + t.addAllPrevPDGEdges(t.prevDFGEdges, DependenceType.DATA) + t.addAllPrevPDGEdges(t.prevCDGEdges, DependenceType.CONTROL) + } } } + private fun allEOGsFromToFlowThrough(from: Node, to: Node, through: Node): Boolean { + val worklist = mutableListOf(from) + while (worklist.isNotEmpty()) { + val currentStatus = worklist.removeFirst() + val nextEOG = currentStatus.nextEOG.filter { it != through } + if (nextEOG.isEmpty()) { + // This path always flows through "through" or has not seen "to", so we're good + continue + } else if (nextEOG.any { it == to }) { + // We reached "to". This means that "through" has not been on the path for this EOG + // path. + return false + } else { + if (nextEOG.size == 1) { + worklist.add(nextEOG.single()) + } else if (nextEOG.isEmpty()) { + // Nothing to do. This path doesn't lead us to "to". + continue + } else { + worklist.addAll(nextEOG) + } + } + } + return true + } + override fun accept(tu: TranslationUnitDeclaration) { tu.statements.forEach(::handle) tu.namespaces.forEach(::handle) diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPassTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPassTest.kt index 36581aab5cc..b0977e7bc27 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPassTest.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPassTest.kt @@ -62,9 +62,16 @@ class ProgramDependenceGraphPassTest { t.prevCDGEdges.map { it.apply { addProperty(Properties.DEPENDENCE, DependenceType.CONTROL) } } + - t.prevDFG.map { - PropertyEdge(it, t).apply { - addProperty(Properties.DEPENDENCE, DependenceType.DATA) + t.prevDFG.mapNotNull { + if ( + "remove next" in it.comment ?: "" && + "remove prev" in t.comment ?: "" + ) { + null + } else { + PropertyEdge(it, t).apply { + addProperty(Properties.DEPENDENCE, DependenceType.DATA) + } } } assertTrue( @@ -79,9 +86,16 @@ class ProgramDependenceGraphPassTest { t.nextCDGEdges.map { it.apply { addProperty(Properties.DEPENDENCE, DependenceType.CONTROL) } } + - t.nextDFG.map { - PropertyEdge(t, it).apply { - addProperty(Properties.DEPENDENCE, DependenceType.DATA) + t.nextDFG.mapNotNull { + if ( + "remove next" in t.comment ?: "" && + "remove prev" in it.comment ?: "" + ) { + null + } else { + PropertyEdge(t, it).apply { + addProperty(Properties.DEPENDENCE, DependenceType.DATA) + } } } assertTrue( @@ -136,11 +150,20 @@ class ProgramDependenceGraphPassTest { // The main method function("main", t("int")) { body { - declare { variable("i", t("int")) { call("rand") } } + declare { + variable("i", t("int")) { + comment = "remove next" + call("rand") + } + } ifStmt { condition { ref("i") lt literal(0, t("int")) } thenStmt { - ref("i") assign (ref("i") * literal(-1, t("int"))) + ref("i") assign + { + ref("i") { comment = "remove prev" } * + literal(-1, t("int")) + } } } returnStmt { ref("i") } @@ -165,12 +188,17 @@ class ProgramDependenceGraphPassTest { // The main method function("main", t("int")) { body { - declare { variable("i", t("int")) { call("rand") } } + declare { + variable("i", t("int")) { + comment = "remove next" + call("rand") + } + } whileStmt { whileCondition { ref("i") gt literal(0, t("int")) } loopBody { call("printf") { literal("#", t("string")) } - ref("i").dec() + ref("i") { comment = "remove prev, remove next" }.dec() } } call("printf") { literal("\n", t("string")) }