From 7cf04d83937e58eff2dfed96d3a3d066c5234230 Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Wed, 30 Oct 2024 14:02:57 +0100 Subject: [PATCH 1/5] Do not use InitializerListExpressions as lhs for assignments --- .../cpg/frontends/python/StatementHandler.kt | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt index bb75836162..0a4ec96f7d 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt @@ -634,7 +634,31 @@ class StatementHandler(frontend: PythonLanguageFrontend) : * [AssignExpression]. */ private fun handleAssign(node: Python.AST.Assign): AssignExpression { - val lhs = node.targets.map { frontend.expressionHandler.handle(it) } + val lhsCandidate = node.targets.map { frontend.expressionHandler.handle(it) } + + /* + * We have to unpack the lhs of the assign expression, because this might be a multi assign. + * In this case, the handler returns an [InitializerListExpression] which does not fit very + * well for our lhs. We thus unpack the [InitializerListExpression] and directly use the + * [Reference]s stored inside. + */ + val lhs = + lhsCandidate.flatMap { + when (it) { + is Reference -> listOf(it) + is InitializerListExpression -> it.initializers + else -> + listOf( + newProblemExpression( + problem = + "Expected a `Reference` or an `InitializerListExpression`.", + type = ProblemNode.ProblemType.TRANSLATION, + rawNode = node + ) + ) + } + } + node.type_comment?.let { typeComment -> val tpe = frontend.typeOf(typeComment) lhs.forEach { it.type = tpe } From 3669aee07439918827c2ad19491b9f99fe00ecda Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Wed, 30 Oct 2024 14:05:53 +0100 Subject: [PATCH 2/5] added assign tests --- .../frontends/python/ExpressionHandlerTest.kt | 65 +++++++++++++++++++ .../src/test/resources/python/multi_assign.py | 6 ++ 2 files changed, 71 insertions(+) create mode 100644 cpg-language-python/src/test/resources/python/multi_assign.py diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandlerTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandlerTest.kt index 51da0d504d..65b6be7199 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandlerTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandlerTest.kt @@ -26,7 +26,9 @@ package de.fraunhofer.aisec.cpg.frontends.python import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.statements.expressions.AssignExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.test.analyze import de.fraunhofer.aisec.cpg.test.assertLiteralValue import de.fraunhofer.aisec.cpg.test.assertLocalName @@ -112,4 +114,67 @@ class ExpressionHandlerTest { assertContains(nestedBoolOpDifferentOp2.lhs.nextEOG, nestedBoolOpDifferentOp2Rhs.lhs) assertContains(nestedBoolOpDifferentOp2Rhs.lhs.nextEOG, nestedBoolOpDifferentOp2Rhs.rhs) } + + @Test + fun testAssignmentsMultiReturn() { + val topLevel = Path.of("src", "test", "resources", "python") + val result = + analyze(listOf(topLevel.resolve("multi_assign.py").toFile()), topLevel, true) { + it.registerLanguage() + } + assertNotNull(result) + + // `a, b = 1, 2` + val refA = result.refs["a"] + assertIs(refA) + val refB = result.refs["b"] + assertIs(refB) + + val assignment = result.assigns.firstOrNull() + assertIs(assignment) + assertEquals(2, assignment.assignments.size) + + // extract both assignments + val assignmentA = assignment.assignments[0] + assertIs(assignmentA) + val assignmentB = assignment.assignments[1] + assertIs(assignmentB) + + // check that the assignments assign the correct value and assign to the correct reference + assertLiteralValue(1, assignmentA.value) + assertLiteralValue(2, assignmentB.value) + assertEquals(refA, assignmentA.target) + assertEquals(refB, assignmentB.target) + + /* + ```python + def foo(): + return (1, 2) + fooA, fooB = foo() + ``` + */ + val refFooA = result.refs["fooA"] + assertIs(refFooA) + val refFooB = result.refs["fooB"] + assertIs(refFooB) + + val assignmentFoo = result.assigns.getOrNull(1) + assertIs(assignmentFoo) + assertEquals(2, assignmentFoo.assignments.size) + + // extract both assignments + val assignmentFooA = assignmentFoo.assignments[0] + assertIs(assignmentFooA) + val assignmentFooB = assignmentFoo.assignments[1] + assertIs(assignmentFooB) + + // check that the assignments assign the correct value and assign to the correct reference + assertLiteralValue(3, assignmentFooA.value) + assertLiteralValue(4, assignmentFooB.value) + assertEquals(refFooA, assignmentFooA.target) + assertEquals(refFooB, assignmentFooB.target) + + // check that there is only one call to `foo` modeled + assertEquals(1, result.calls.filter { it.invokes == result.functions["foo"] }.size) + } } diff --git a/cpg-language-python/src/test/resources/python/multi_assign.py b/cpg-language-python/src/test/resources/python/multi_assign.py new file mode 100644 index 0000000000..7c7e1df9a7 --- /dev/null +++ b/cpg-language-python/src/test/resources/python/multi_assign.py @@ -0,0 +1,6 @@ +a, b = (1, 2) + +def foo(): + return (3, 4) + +fooA, fooB = foo() From 0438ad0b72d5efda5b6c7f6413e91732fa617c1c Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Wed, 30 Oct 2024 14:34:04 +0100 Subject: [PATCH 3/5] fix findValue logic --- .../aisec/cpg/graph/statements/expressions/AssignExpression.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/AssignExpression.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/AssignExpression.kt index 966a913545..28d12a3e3a 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/AssignExpression.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/AssignExpression.kt @@ -138,7 +138,7 @@ class AssignExpression : /** Finds the value (of [rhs]) that is assigned to the particular [lhs] expression. */ fun findValue(lhsExpression: HasType): Expression? { - return if (lhs.size > 1) { + return if (lhs.size == 1) { rhs.singleOrNull() } else { // Basically, we need to find out which index on the lhs this variable belongs to and From a02743af055e7a1cb009197829b0462fe31eba04 Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Wed, 30 Oct 2024 14:37:17 +0100 Subject: [PATCH 4/5] InitializerListExpressions are now of TupleType --- .../fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt index 951d01cf62..acbce1159b 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt @@ -29,6 +29,7 @@ import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.ImportDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration import de.fraunhofer.aisec.cpg.graph.statements.expressions.* +import de.fraunhofer.aisec.cpg.graph.types.TupleType import jep.python.PyObject class ExpressionHandler(frontend: PythonLanguageFrontend) : @@ -246,7 +247,7 @@ class ExpressionHandler(frontend: PythonLanguageFrontend) : lst += handle(e) } val ile = newInitializerListExpression(rawNode = node) - ile.type = frontend.objectType("tuple") + ile.type = TupleType(types = lst.map { it.type }) ile.initializers = lst return ile } From 2b5bc605f6052b667fe237f1194d07e094b68252 Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Wed, 30 Oct 2024 14:41:19 +0100 Subject: [PATCH 5/5] more python test code --- .../frontends/python/ExpressionHandlerTest.kt | 2 +- .../src/test/resources/python/assignments.py | 19 +++++++++++++++++++ .../src/test/resources/python/multi_assign.py | 6 ------ 3 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 cpg-language-python/src/test/resources/python/assignments.py delete mode 100644 cpg-language-python/src/test/resources/python/multi_assign.py diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandlerTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandlerTest.kt index 65b6be7199..bfbc7b7fee 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandlerTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandlerTest.kt @@ -119,7 +119,7 @@ class ExpressionHandlerTest { fun testAssignmentsMultiReturn() { val topLevel = Path.of("src", "test", "resources", "python") val result = - analyze(listOf(topLevel.resolve("multi_assign.py").toFile()), topLevel, true) { + analyze(listOf(topLevel.resolve("assignments.py").toFile()), topLevel, true) { it.registerLanguage() } assertNotNull(result) diff --git a/cpg-language-python/src/test/resources/python/assignments.py b/cpg-language-python/src/test/resources/python/assignments.py new file mode 100644 index 0000000000..1efa310453 --- /dev/null +++ b/cpg-language-python/src/test/resources/python/assignments.py @@ -0,0 +1,19 @@ +a, b = (1, 2) + +def foo(): + return (3, 4) + +fooA, fooB = foo() + + +# more python code to be used for more tests... +a = 42 +a, b = [21, 42] +c = d = 42 +e = 42, 42 +f, g = '42' # yes, this also unpacks strings! + +class Foo: + foo = 42 + +a, newVar, Foo.foo, Foo.bar, *rest = 1, 2, 3, 4, 5, 6 diff --git a/cpg-language-python/src/test/resources/python/multi_assign.py b/cpg-language-python/src/test/resources/python/multi_assign.py deleted file mode 100644 index 7c7e1df9a7..0000000000 --- a/cpg-language-python/src/test/resources/python/multi_assign.py +++ /dev/null @@ -1,6 +0,0 @@ -a, b = (1, 2) - -def foo(): - return (3, 4) - -fooA, fooB = foo()