diff --git a/cpg-core/specifications/dfg.md b/cpg-core/specifications/dfg.md index 89ac9b7b68..77fc8e727e 100644 --- a/cpg-core/specifications/dfg.md +++ b/cpg-core/specifications/dfg.md @@ -408,14 +408,37 @@ Specific statements lead to a branch in the control flow of a program. A value t ### ForEachStatement Interesting fields: -* `variable: Statement`: The statement which is used in each iteration to assign the current iteration value + +* `variable: Statement`: The statement which is used in each iteration to assign the current iteration value. * `iterable: Statement`: The statement or expression, which is iterated +The value of the iterable flow to the `VariableDeclaration` in the `variable`. Since some languages allow arbitrary logic, we differentiate between two cases: + +### Case 1. The `variable` is a `DeclarationStatement`. + +This is the case for most languages where we can have only a variable in this place (e.g., `for(e in list)`). Here, we get the declaration(s) in the statement and add the DFG from the iterable to this declaration. + + +Scheme: +```mermaid + flowchart LR + node([ForEachStatement]) -.- variable[variable: DeclarationStatement] + node -.- iterable[iterable] + variable -.- declarations["declarations[i]"] + iterable -- for all i: DFG --> declarations + ``` + +### Case 2. The `variable` is another type of `Statement`. + +In this case, we assume that the last VariableDeclaration is the one used for looping. We add a DFG edge only to this declaration. + Scheme: ```mermaid flowchart LR - node([ForEachStatement]) -.- variable(variable) - node -.- iterable(iterable) + node([ForEachStatement]) -.- statement[variable] + node -.- iterable[iterable] + statement -.- localVars[variables] + localVars -. "last" .-> variable iterable -- DFG --> variable variable -- DFG --> node ``` diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt index 346dfe8fb9..477e8deb31 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt @@ -35,6 +35,7 @@ import de.fraunhofer.aisec.cpg.graph.edge.Properties import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator import de.fraunhofer.aisec.cpg.graph.statements.expressions.DeclaredReferenceExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression import de.fraunhofer.aisec.cpg.graph.statements.expressions.UnaryOperator import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.IterativeGraphWalker import de.fraunhofer.aisec.cpg.passes.order.DependsOn @@ -68,15 +69,15 @@ open class ControlFlowSensitiveDFGPass : Pass() { protected fun handle(node: Node) { if (node is FunctionDeclaration) { clearFlowsOfVariableDeclarations(node) - handleFunction(node) + handleStatementHolder(node) } } /** - * Removes all the incoming and outgoing DFG edges for each variable declaration in the function - * [node]. + * Removes all the incoming and outgoing DFG edges for each variable declaration in the block of + * code [node]. */ - private fun clearFlowsOfVariableDeclarations(node: FunctionDeclaration) { + private fun clearFlowsOfVariableDeclarations(node: Node) { for (varDecl in node.variables) { varDecl.clearPrevDFG() varDecl.clearNextDFG() @@ -93,7 +94,7 @@ open class ControlFlowSensitiveDFGPass : Pass() { * - Assignments with an operation e.g. of the form "variable += rhs" * - Read operations on a variable */ - private fun handleFunction(node: FunctionDeclaration) { + private fun handleStatementHolder(node: Node) { // The list of nodes that we have to consider and the last write operations to the different // variables. val worklist = @@ -208,6 +209,64 @@ open class ControlFlowSensitiveDFGPass : Pass() { previousWrites[currentNode.refersTo]?.lastOrNull()?.let { currentNode.addPrevDFG(it) } + } else if (currentNode is ForEachStatement && currentNode.variable != null) { + // The VariableDeclaration in the ForEachStatement doesn't have an initializer, so + // the "normal" case won't work. We handle this case separately here... + // This is what we write to the declaration + val iterable = currentNode.iterable as? Expression + + val writtenTo = + when (currentNode.variable) { + is DeclarationStatement -> + (currentNode.variable as DeclarationStatement).singleDeclaration + else -> currentNode.variable + } + + // We wrote something to this variable declaration + writtenDecl = + when (writtenTo) { + is Declaration -> writtenTo + is DeclaredReferenceExpression -> writtenTo.refersTo + else -> { + log.error( + "The variable of type ${writtenTo?.javaClass} is not yet supported in the foreach loop" + ) + null + } + } + + currentNode.variable?.let { currentWritten = it } + + if (writtenTo is DeclaredReferenceExpression) { + if (currentNode !in loopPoints) { + // We haven't been here before, so there's no chance we added something + // already. However, as the variable is processed before, we have already + // added the DFG edge from the VariableDeclaration to the + // DeclaredReferenceExpression. This doesn't make any sense, so we have to + // remove it again. + writtenTo.removePrevDFG(writtenDecl) + } + + // This is a special case: We add the nextEOGEdge which goes out of the loop but + // with the old previousWrites map. + val nodesOutsideTheLoop = + currentNode.nextEOGEdges.filter { + it.getProperty(Properties.UNREACHABLE) != true && + it.end != currentNode.statement && + it.end !in currentNode.statement.allChildren() + } + nodesOutsideTheLoop + .map { it.end } + .forEach { worklist.add(Pair(it, copyMap(previousWrites))) } + } + + iterable?.let { writtenTo?.addPrevDFG(it) } + + if (writtenDecl != null && writtenTo != null) { + // Add the variable declaration (or the reference) to the list of previous write + // nodes in this path + previousWrites.computeIfAbsent(writtenDecl, ::mutableListOf).add(writtenTo) + } } else if (currentNode is ReturnStatement) { returnStatements.add(currentNode) } @@ -363,7 +422,7 @@ open class ControlFlowSensitiveDFGPass : Pass() { val state = loopPoints.computeIfAbsent(currentNode) { mutableMapOf() } if ( previousWrites.all { (decl, prevs) -> - decl in state && prevs.last() in state[decl]!! + (state[decl]?.contains(prevs.last())) == true } ) { // The current state of last write operations has already been seen before => @@ -376,7 +435,7 @@ open class ControlFlowSensitiveDFGPass : Pass() { } } return writtenDecl != null && - previousWrites[writtenDecl]!!.filter { it == currentWritten }.size >= 2 + ((previousWrites[writtenDecl]?.filter { it == currentWritten }?.size ?: 0) >= 2) } /** Copies the map */ diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt index e83d8c91ea..dd6d873815 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt @@ -35,6 +35,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.* +import de.fraunhofer.aisec.cpg.graph.variables import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.IterativeGraphWalker import de.fraunhofer.aisec.cpg.helpers.Util import de.fraunhofer.aisec.cpg.passes.order.DependsOn @@ -110,8 +111,8 @@ class DFGPass : Pass() { } /** - * Adds the DFG edge for a [VariableDeclaration]. The data flows from the return statement(s) to - * the function. + * Adds the DFG edge for a [VariableDeclaration]. The data flows from initializer to the + * variable. */ private fun handleVariableDeclaration(node: VariableDeclaration) { node.initializer?.let { node.addPrevDFG(it) } @@ -142,12 +143,22 @@ class DFGPass : Pass() { /** * Adds the DFG edge for a [ForEachStatement]. The data flows from the - * [ForEachStatement.iterable] to the [ForEachStatement.variable] and from - * [ForEachStatement.variable] to the [ForEachStatement] to show the dependence between data and - * branching node. + * [ForEachStatement.iterable] to the [ForEachStatement.variable]. However, since the + * [ForEachStatement.variable] is a [Statement], we have to identify the variable which is used + * in the loop. In most cases, we should have a [DeclarationStatement] which means that we can + * unwrap the [VariableDeclaration]. If this is not the case, we assume that the last + * [VariableDeclaration] in the statement is the one we care about. */ private fun handleForEachStatement(node: ForEachStatement) { - node.iterable?.let { node.variable?.addPrevDFG(it) } + if (node.iterable != null) { + if (node.variable is DeclarationStatement) { + (node.variable as DeclarationStatement).declarations.forEach { + it.addPrevDFG(node.iterable!!) + } + } else { + node.variable.variables.lastOrNull()?.addPrevDFG(node.iterable!!) + } + } node.variable?.let { node.addPrevDFG(it) } } diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguageFrontendTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguageFrontendTest.kt index bdec73ca94..48b8983133 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguageFrontendTest.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguageFrontendTest.kt @@ -143,6 +143,12 @@ internal class JavaLanguageFrontendTest : BaseTest() { assertLocalName("println", sce) assertFullName("java.io.PrintStream.println", sce) + + // Check the flow from the iterable to the variable s + assertEquals(1, sDecl.prevDFG.size) + assertTrue(forEachStatement.iterable as DeclaredReferenceExpression in sDecl.prevDFG) + // Check the flow from the variable s to the print + assertTrue(sDecl in sce.arguments.first().prevDFG) } @Test @@ -743,7 +749,13 @@ internal class JavaLanguageFrontendTest : BaseTest() { val forEach = forIterator.bodyOrNull() assertNotNull(forEach) - assertNotNull(forEach.variable) - assertContains(forEach.variable!!.prevDFG, forEach.iterable!!) + val loopVariable = (forEach.variable as? DeclarationStatement)?.singleDeclaration + assertNotNull(loopVariable) + assertNotNull(forEach.iterable) + assertContains(loopVariable.prevDFG, forEach.iterable!!) + + val jArg = forIterator.calls["println"]?.arguments?.firstOrNull() + assertNotNull(jArg) + assertContains(jArg.prevDFG, loopVariable) } } diff --git a/cpg-language-python/src/main/python/CPGPython/_statements.py b/cpg-language-python/src/main/python/CPGPython/_statements.py index 4ebe1fc680..be4f9be6bb 100644 --- a/cpg-language-python/src/main/python/CPGPython/_statements.py +++ b/cpg-language-python/src/main/python/CPGPython/_statements.py @@ -97,7 +97,6 @@ def handle_statement_impl(self, stmt): return r elif isinstance(stmt, ast.Assign): return self.handle_assign(stmt) - elif isinstance(stmt, ast.AugAssign): return self.handle_assign(stmt) elif isinstance(stmt, ast.AnnAssign): @@ -370,6 +369,7 @@ def handle_function_or_method(self, node, record=None): else: self.log_with_loc(NOT_IMPLEMENTED_MSG, loglevel="ERROR") + # TODO empty annotation # add first arg as value if len(decorator.args) > 0: @@ -432,12 +432,25 @@ def handle_for(self, stmt): # We can handle the AsyncFor / For statement now: for_stmt = StatementBuilderKt.newForEachStatement(self.frontend, self.get_src_code(stmt)) + + # We handle the iterable before the target so that the type can be set + # correctly + it = self.handle_expression(stmt.iter) + for_stmt.setIterable(it) + target = self.handle_expression(stmt.target) - if self.is_variable_declaration(target): + resolved_target = self.scopemanager.resolveReference(target) + if resolved_target is None: + target = DeclarationBuilderKt.newVariableDeclaration( + self.frontend, target.getName(), + it.getType(), + self.get_src_code(stmt.target), + False) + self.scopemanager.addDeclaration(target) target = self.wrap_declaration_to_stmt(target) + for_stmt.setVariable(target) - it = self.handle_expression(stmt.iter) - for_stmt.setIterable(it) + body = self.make_compound_statement(stmt.body) for_stmt.setStatement(body) @@ -522,15 +535,15 @@ def handle_assign_impl(self, stmt): self.log_with_loc( "Expected a DeclaredReferenceExpression or MemberExpression " "but got \"%s\". Skipping." % - (lhs.java_name), loglevel="ERROR") + lhs.java_name, loglevel="ERROR") r = ExpressionBuilderKt.newBinaryOperator(self.frontend, "=", self.get_src_code(stmt)) return r resolved_lhs = self.scopemanager.resolveReference(lhs) - inRecord = self.scopemanager.isInRecord() - inFunction = self.scopemanager.isInFunction() + in_record = self.scopemanager.isInRecord() + in_function = self.scopemanager.isInFunction() if resolved_lhs is not None: # found var => BinaryOperator "=" @@ -541,7 +554,7 @@ def handle_assign_impl(self, stmt): binop.setRhs(rhs) return binop else: - if inRecord and not inFunction: + if in_record and not in_function: """ class Foo: class_var = 123 @@ -570,7 +583,7 @@ class Foo: None, None, False) # TODO None -> add infos self.scopemanager.addDeclaration(v) return v - elif inRecord and inFunction: + elif in_record and in_function: """ class Foo: def bar(self): @@ -634,7 +647,7 @@ def bar(self): self.scopemanager.addDeclaration(v) self.scopemanager.getCurrentRecord().addField(v) return v - elif not inRecord: + elif not in_record: """ either in a function or at file top-level """ diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt index d465cf684e..d04323082c 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt @@ -43,10 +43,7 @@ import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation import de.fraunhofer.aisec.cpg.sarif.Region import java.net.URI import java.nio.file.Path -import kotlin.test.Test -import kotlin.test.assertEquals -import kotlin.test.assertNotNull -import kotlin.test.assertNull +import kotlin.test.* class PythonFrontendTest : BaseTest() { // TODO ensure gradle doesn't remove those classes @@ -828,6 +825,8 @@ class PythonFrontendTest : BaseTest() { } @Test + @Ignore // TODO fix & re-enable this test once there is proper support for multiple variables in + // a loop fun testIssue615() { val topLevel = Path.of("src", "test", "resources", "python") val tu = @@ -1023,4 +1022,77 @@ class PythonFrontendTest : BaseTest() { val route = annotations.firstOrNull() assertFullName("app.route", route) } + + @Test + fun testForLoop() { + val topLevel = Path.of("src", "test", "resources", "python") + val tu = + TestUtils.analyzeAndGetFirstTU( + listOf(topLevel.resolve("forloop.py").toFile()), + topLevel, + true + ) { + it.registerLanguage() + } + assertNotNull(tu) + + val namespace = tu.functions["forloop"]?.body as? CompoundStatement + assertNotNull(namespace) + + val varDefinedBeforeLoop = namespace.variables["varDefinedBeforeLoop"] + assertNotNull(varDefinedBeforeLoop) + + val varDefinedInLoop = namespace.variables["varDefinedInLoop"] + assertNotNull(varDefinedInLoop) + + val firstLoop = namespace.statements[1] as? ForEachStatement + assertNotNull(firstLoop) + + val secondLoop = namespace.statements[2] as? ForEachStatement + assertNotNull(secondLoop) + + val fooCall = namespace.statements[3] as? CallExpression + assertNotNull(fooCall) + + val barCall = namespace.statements[4] as? CallExpression + assertNotNull(barCall) + + // no dataflow from var declaration to loop variable because it's a write access + assert((firstLoop.variable?.prevDFG?.contains(varDefinedBeforeLoop) == false)) + + // dataflow from range call to loop variable + val firstLoopIterable = firstLoop.iterable as? CallExpression + assertNotNull(firstLoopIterable) + assert((firstLoop.variable?.prevDFG?.contains((firstLoopIterable)) == true)) + + // dataflow from var declaration to loop iterable call + assert( + firstLoopIterable.arguments.firstOrNull()?.prevDFG?.contains(varDefinedBeforeLoop) == + true + ) + + // dataflow from first loop to foo call + val loopVar = firstLoop.variable as? DeclaredReferenceExpression + assertNotNull(loopVar) + assert(fooCall.arguments.first().prevDFG.contains(loopVar)) + + // dataflow from var declaration to foo call (in case for loop is not executed) + assert(fooCall.arguments.first().prevDFG.contains(varDefinedBeforeLoop)) + + // dataflow from range call to loop variable + val secondLoopIterable = secondLoop.iterable as? CallExpression + assertNotNull(secondLoopIterable) + assert( + ((secondLoop.variable as DeclarationStatement) + .singleDeclaration + ?.prevDFG + ?.contains((secondLoopIterable)) == true) + ) + + // dataflow from second loop var to bar call + assertEquals( + (secondLoop.variable as? DeclarationStatement)?.singleDeclaration, + barCall.arguments.first().prevDFG.firstOrNull() + ) + } } diff --git a/cpg-language-python/src/test/resources/python/forloop.py b/cpg-language-python/src/test/resources/python/forloop.py new file mode 100644 index 0000000000..29aa19a043 --- /dev/null +++ b/cpg-language-python/src/test/resources/python/forloop.py @@ -0,0 +1,11 @@ +def forloop(): + varDefinedBeforeLoop = 1 + + for varDefinedBeforeLoop in range(varDefinedBeforeLoop): + pass + + for varDefinedInLoop in range(42): + pass + + foo(varDefinedBeforeLoop) + bar(varDefinedInLoop)