Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix DFG of the variable of aForEachStatement #1052

Merged
merged 16 commits into from
Feb 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions cpg-core/specifications/dfg.md
Original file line number Diff line number Diff line change
Expand Up @@ -408,14 +408,37 @@ Specific statements lead to a branch in the control flow of a program. A value t
### ForEachStatement

Interesting fields:
* `variable: Statement`: The statement which is used in each iteration to assign the current iteration value

* `variable: Statement`: The statement which is used in each iteration to assign the current iteration value.
* `iterable: Statement`: The statement or expression, which is iterated

The value of the iterable flow to the `VariableDeclaration` in the `variable`. Since some languages allow arbitrary logic, we differentiate between two cases:

### Case 1. The `variable` is a `DeclarationStatement`.

This is the case for most languages where we can have only a variable in this place (e.g., `for(e in list)`). Here, we get the declaration(s) in the statement and add the DFG from the iterable to this declaration.


Scheme:
```mermaid
flowchart LR
node([ForEachStatement]) -.- variable[variable: DeclarationStatement]
node -.- iterable[iterable]
variable -.- declarations["declarations[i]"]
iterable -- for all i: DFG --> declarations
```

### Case 2. The `variable` is another type of `Statement`.

In this case, we assume that the last VariableDeclaration is the one used for looping. We add a DFG edge only to this declaration.

Scheme:
```mermaid
flowchart LR
node([ForEachStatement]) -.- variable(variable)
node -.- iterable(iterable)
node([ForEachStatement]) -.- statement[variable]
node -.- iterable[iterable]
statement -.- localVars[variables]
localVars -. "last" .-> variable
iterable -- DFG --> variable
variable -- DFG --> node
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import de.fraunhofer.aisec.cpg.graph.edge.Properties
import de.fraunhofer.aisec.cpg.graph.statements.*
import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator
import de.fraunhofer.aisec.cpg.graph.statements.expressions.DeclaredReferenceExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.UnaryOperator
import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.IterativeGraphWalker
import de.fraunhofer.aisec.cpg.passes.order.DependsOn
Expand Down Expand Up @@ -68,15 +69,15 @@ open class ControlFlowSensitiveDFGPass : Pass() {
protected fun handle(node: Node) {
if (node is FunctionDeclaration) {
clearFlowsOfVariableDeclarations(node)
handleFunction(node)
handleStatementHolder(node)
}
}

/**
* Removes all the incoming and outgoing DFG edges for each variable declaration in the function
* [node].
* Removes all the incoming and outgoing DFG edges for each variable declaration in the block of
* code [node].
*/
private fun clearFlowsOfVariableDeclarations(node: FunctionDeclaration) {
private fun clearFlowsOfVariableDeclarations(node: Node) {
maximiliankaul marked this conversation as resolved.
Show resolved Hide resolved
for (varDecl in node.variables) {
varDecl.clearPrevDFG()
varDecl.clearNextDFG()
Expand All @@ -93,7 +94,7 @@ open class ControlFlowSensitiveDFGPass : Pass() {
* - Assignments with an operation e.g. of the form "variable += rhs"
* - Read operations on a variable
*/
private fun handleFunction(node: FunctionDeclaration) {
private fun handleStatementHolder(node: Node) {
// The list of nodes that we have to consider and the last write operations to the different
// variables.
val worklist =
Expand Down Expand Up @@ -208,6 +209,64 @@ open class ControlFlowSensitiveDFGPass : Pass() {
previousWrites[currentNode.refersTo]?.lastOrNull()?.let {
currentNode.addPrevDFG(it)
}
} else if (currentNode is ForEachStatement && currentNode.variable != null) {
// The VariableDeclaration in the ForEachStatement doesn't have an initializer, so
// the "normal" case won't work. We handle this case separately here...
// This is what we write to the declaration
val iterable = currentNode.iterable as? Expression

val writtenTo =
when (currentNode.variable) {
is DeclarationStatement ->
(currentNode.variable as DeclarationStatement).singleDeclaration
else -> currentNode.variable
}

// We wrote something to this variable declaration
writtenDecl =
when (writtenTo) {
is Declaration -> writtenTo
is DeclaredReferenceExpression -> writtenTo.refersTo
else -> {
log.error(
"The variable of type ${writtenTo?.javaClass} is not yet supported in the foreach loop"
)
null
}
}

currentNode.variable?.let { currentWritten = it }

if (writtenTo is DeclaredReferenceExpression) {
if (currentNode !in loopPoints) {
// We haven't been here before, so there's no chance we added something
// already. However, as the variable is processed before, we have already
// added the DFG edge from the VariableDeclaration to the
// DeclaredReferenceExpression. This doesn't make any sense, so we have to
// remove it again.
writtenTo.removePrevDFG(writtenDecl)
}

// This is a special case: We add the nextEOGEdge which goes out of the loop but
// with the old previousWrites map.
val nodesOutsideTheLoop =
currentNode.nextEOGEdges.filter {
it.getProperty(Properties.UNREACHABLE) != true &&
it.end != currentNode.statement &&
it.end !in currentNode.statement.allChildren<Node>()
}
nodesOutsideTheLoop
.map { it.end }
.forEach { worklist.add(Pair(it, copyMap(previousWrites))) }
}

iterable?.let { writtenTo?.addPrevDFG(it) }

if (writtenDecl != null && writtenTo != null) {
// Add the variable declaration (or the reference) to the list of previous write
// nodes in this path
previousWrites.computeIfAbsent(writtenDecl, ::mutableListOf).add(writtenTo)
}
} else if (currentNode is ReturnStatement) {
returnStatements.add(currentNode)
}
Expand Down Expand Up @@ -363,7 +422,7 @@ open class ControlFlowSensitiveDFGPass : Pass() {
val state = loopPoints.computeIfAbsent(currentNode) { mutableMapOf() }
if (
previousWrites.all { (decl, prevs) ->
decl in state && prevs.last() in state[decl]!!
(state[decl]?.contains(prevs.last())) == true
}
) {
// The current state of last write operations has already been seen before =>
Expand All @@ -376,7 +435,7 @@ open class ControlFlowSensitiveDFGPass : Pass() {
}
}
return writtenDecl != null &&
previousWrites[writtenDecl]!!.filter { it == currentWritten }.size >= 2
((previousWrites[writtenDecl]?.filter { it == currentWritten }?.size ?: 0) >= 2)
}

/** Copies the map */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration
import de.fraunhofer.aisec.cpg.graph.statements.*
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.variables
import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.IterativeGraphWalker
import de.fraunhofer.aisec.cpg.helpers.Util
import de.fraunhofer.aisec.cpg.passes.order.DependsOn
Expand Down Expand Up @@ -110,8 +111,8 @@ class DFGPass : Pass() {
}

/**
* Adds the DFG edge for a [VariableDeclaration]. The data flows from the return statement(s) to
* the function.
* Adds the DFG edge for a [VariableDeclaration]. The data flows from initializer to the
* variable.
*/
private fun handleVariableDeclaration(node: VariableDeclaration) {
node.initializer?.let { node.addPrevDFG(it) }
Expand Down Expand Up @@ -142,12 +143,22 @@ class DFGPass : Pass() {

/**
* Adds the DFG edge for a [ForEachStatement]. The data flows from the
* [ForEachStatement.iterable] to the [ForEachStatement.variable] and from
* [ForEachStatement.variable] to the [ForEachStatement] to show the dependence between data and
* branching node.
* [ForEachStatement.iterable] to the [ForEachStatement.variable]. However, since the
* [ForEachStatement.variable] is a [Statement], we have to identify the variable which is used
* in the loop. In most cases, we should have a [DeclarationStatement] which means that we can
* unwrap the [VariableDeclaration]. If this is not the case, we assume that the last
* [VariableDeclaration] in the statement is the one we care about.
*/
private fun handleForEachStatement(node: ForEachStatement) {
node.iterable?.let { node.variable?.addPrevDFG(it) }
if (node.iterable != null) {
if (node.variable is DeclarationStatement) {
(node.variable as DeclarationStatement).declarations.forEach {
it.addPrevDFG(node.iterable!!)
}
} else {
node.variable.variables.lastOrNull()?.addPrevDFG(node.iterable!!)
}
}
node.variable?.let { node.addPrevDFG(it) }
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ internal class JavaLanguageFrontendTest : BaseTest() {

assertLocalName("println", sce)
assertFullName("java.io.PrintStream.println", sce)

// Check the flow from the iterable to the variable s
assertEquals(1, sDecl.prevDFG.size)
assertTrue(forEachStatement.iterable as DeclaredReferenceExpression in sDecl.prevDFG)
// Check the flow from the variable s to the print
assertTrue(sDecl in sce.arguments.first().prevDFG)
}

@Test
Expand Down Expand Up @@ -743,7 +749,13 @@ internal class JavaLanguageFrontendTest : BaseTest() {
val forEach = forIterator.bodyOrNull<ForEachStatement>()
assertNotNull(forEach)

assertNotNull(forEach.variable)
assertContains(forEach.variable!!.prevDFG, forEach.iterable!!)
val loopVariable = (forEach.variable as? DeclarationStatement)?.singleDeclaration
assertNotNull(loopVariable)
assertNotNull(forEach.iterable)
assertContains(loopVariable.prevDFG, forEach.iterable!!)

val jArg = forIterator.calls["println"]?.arguments?.firstOrNull()
assertNotNull(jArg)
assertContains(jArg.prevDFG, loopVariable)
}
}
33 changes: 23 additions & 10 deletions cpg-language-python/src/main/python/CPGPython/_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ def handle_statement_impl(self, stmt):
return r
elif isinstance(stmt, ast.Assign):
return self.handle_assign(stmt)

elif isinstance(stmt, ast.AugAssign):
return self.handle_assign(stmt)
elif isinstance(stmt, ast.AnnAssign):
Expand Down Expand Up @@ -370,6 +369,7 @@ def handle_function_or_method(self, node, record=None):

else:
self.log_with_loc(NOT_IMPLEMENTED_MSG, loglevel="ERROR")
# TODO empty annotation

# add first arg as value
if len(decorator.args) > 0:
Expand Down Expand Up @@ -432,12 +432,25 @@ def handle_for(self, stmt):
# We can handle the AsyncFor / For statement now:
for_stmt = StatementBuilderKt.newForEachStatement(self.frontend,
self.get_src_code(stmt))

# We handle the iterable before the target so that the type can be set
# correctly
it = self.handle_expression(stmt.iter)
for_stmt.setIterable(it)

target = self.handle_expression(stmt.target)
if self.is_variable_declaration(target):
resolved_target = self.scopemanager.resolveReference(target)
if resolved_target is None:
target = DeclarationBuilderKt.newVariableDeclaration(
self.frontend, target.getName(),
it.getType(),
self.get_src_code(stmt.target),
False)
self.scopemanager.addDeclaration(target)
target = self.wrap_declaration_to_stmt(target)

for_stmt.setVariable(target)
it = self.handle_expression(stmt.iter)
for_stmt.setIterable(it)

body = self.make_compound_statement(stmt.body)
for_stmt.setStatement(body)

Expand Down Expand Up @@ -522,15 +535,15 @@ def handle_assign_impl(self, stmt):
self.log_with_loc(
"Expected a DeclaredReferenceExpression or MemberExpression "
"but got \"%s\". Skipping." %
(lhs.java_name), loglevel="ERROR")
lhs.java_name, loglevel="ERROR")
r = ExpressionBuilderKt.newBinaryOperator(self.frontend,
"=",
self.get_src_code(stmt))
return r

resolved_lhs = self.scopemanager.resolveReference(lhs)
inRecord = self.scopemanager.isInRecord()
inFunction = self.scopemanager.isInFunction()
in_record = self.scopemanager.isInRecord()
in_function = self.scopemanager.isInFunction()

if resolved_lhs is not None:
# found var => BinaryOperator "="
Expand All @@ -541,7 +554,7 @@ def handle_assign_impl(self, stmt):
binop.setRhs(rhs)
return binop
else:
if inRecord and not inFunction:
if in_record and not in_function:
"""
class Foo:
class_var = 123
Expand Down Expand Up @@ -570,7 +583,7 @@ class Foo:
None, None, False) # TODO None -> add infos
self.scopemanager.addDeclaration(v)
return v
elif inRecord and inFunction:
elif in_record and in_function:
"""
class Foo:
def bar(self):
Expand Down Expand Up @@ -634,7 +647,7 @@ def bar(self):
self.scopemanager.addDeclaration(v)
self.scopemanager.getCurrentRecord().addField(v)
return v
elif not inRecord:
elif not in_record:
"""
either in a function or at file top-level
"""
Expand Down
Loading