diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/Python.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/Python.kt index 3923784889..a33f9da7bc 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/Python.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/Python.kt @@ -62,7 +62,7 @@ interface Python { * `ast.stmt` [AST.BaseStmt] and `ast.expr` [AST.BaseExpr] nodes have extra location * properties as implemented here. */ - interface WithASTLocation { // TODO make the fields accessible `by lazy` + interface WithLocation { // TODO make the fields accessible `by lazy` val pyObject: PyObject /** Maps to the `lineno` filed from Python's ast. */ @@ -156,7 +156,30 @@ interface Python { * | | Continue * ``` */ - sealed class BaseStmt(pyObject: PyObject) : AST(pyObject), WithASTLocation + sealed class BaseStmt(pyObject: PyObject) : AST(pyObject), WithLocation + + /** + * Several classes are duplicated in the python AST for async and non-async variants. This + * interface is a common interface for those AST classes. + */ + interface AsyncOrNot : WithLocation + + /** This interface denotes that this is an "async" node. */ + interface IsAsync : AsyncOrNot + + /** + * ast.FunctionDef and ast.AsyncFunctionDef are not related according to the Python syntax. + * However, they are so similar, that we make use of this interface to avoid a lot of + * duplicate code. + */ + interface NormalOrAsyncFunctionDef : AsyncOrNot { + val name: String + val args: arguments + val body: kotlin.collections.List + val decorator_list: kotlin.collections.List + val returns: BaseExpr? + val type_comment: String? + } /** * ``` @@ -164,20 +187,20 @@ interface Python { * | FunctionDef(identifier name, arguments args, stmt* body, expr* decorator_list, expr? returns, string? type_comment) * ``` */ - class FunctionDef(pyObject: PyObject) : BaseStmt(pyObject) { - val name: String by lazy { "name" of pyObject } + class FunctionDef(pyObject: PyObject) : BaseStmt(pyObject), NormalOrAsyncFunctionDef { + override val name: String by lazy { "name" of pyObject } - val args: arguments by lazy { "args" of pyObject } + override val args: arguments by lazy { "args" of pyObject } - val body: kotlin.collections.List by lazy { "body" of pyObject } + override val body: kotlin.collections.List by lazy { "body" of pyObject } - val decorator_list: kotlin.collections.List by lazy { + override val decorator_list: kotlin.collections.List by lazy { "decorator_list" of pyObject } - val returns: BaseExpr? by lazy { "returns" of pyObject } + override val returns: BaseExpr? by lazy { "returns" of pyObject } - val type_comment: String? by lazy { "type_comment" of pyObject } + override val type_comment: String? by lazy { "type_comment" of pyObject } } /** @@ -186,20 +209,21 @@ interface Python { * | AsyncFunctionDef(identifier name, arguments args, stmt* body, expr* decorator_list, expr? returns, string? type_comment) * ``` */ - class AsyncFunctionDef(pyObject: PyObject) : BaseStmt(pyObject) { - val name: String by lazy { "name" of pyObject } + class AsyncFunctionDef(pyObject: PyObject) : + BaseStmt(pyObject), NormalOrAsyncFunctionDef, IsAsync { + override val name: String by lazy { "name" of pyObject } - val args: arguments by lazy { "args" of pyObject } + override val args: arguments by lazy { "args" of pyObject } - val body: kotlin.collections.List by lazy { "body" of pyObject } + override val body: kotlin.collections.List by lazy { "body" of pyObject } - val decorator_list: kotlin.collections.List by lazy { + override val decorator_list: kotlin.collections.List by lazy { "decorator_list" of pyObject } - val returns: BaseExpr? by lazy { "returns" of pyObject } + override val returns: BaseExpr? by lazy { "returns" of pyObject } - val type_comment: String? by lazy { "type_comment" of pyObject } + override val type_comment: String? by lazy { "type_comment" of pyObject } } /** @@ -281,18 +305,30 @@ interface Python { val simple: Long by lazy { "simple" of pyObject } } + /** + * ast.For and ast.AsyncFor are not related according to the Python syntax. However, they + * are so similar, that we make use of this interface to avoid a lot of duplicate code. + */ + interface NormalOrAsyncFor : AsyncOrNot { + val target: BaseExpr + val iter: BaseExpr + val body: kotlin.collections.List + val orelse: kotlin.collections.List + val type_comment: String? + } + /** * ``` * ast.For = class For(stmt) * | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) * ``` */ - class For(pyObject: PyObject) : BaseStmt(pyObject) { - val target: BaseExpr by lazy { "target" of pyObject } - val iter: BaseExpr by lazy { "iter" of pyObject } - val body: kotlin.collections.List by lazy { "body" of pyObject } - val orelse: kotlin.collections.List by lazy { "orelse" of pyObject } - val type_comment: String? by lazy { "type_comment" of pyObject } + class For(pyObject: PyObject) : BaseStmt(pyObject), NormalOrAsyncFor { + override val target: BaseExpr by lazy { "target" of pyObject } + override val iter: BaseExpr by lazy { "iter" of pyObject } + override val body: kotlin.collections.List by lazy { "body" of pyObject } + override val orelse: kotlin.collections.List by lazy { "orelse" of pyObject } + override val type_comment: String? by lazy { "type_comment" of pyObject } } /** @@ -301,12 +337,12 @@ interface Python { * | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) * ``` */ - class AsyncFor(pyObject: PyObject) : BaseStmt(pyObject) { - val target: BaseExpr by lazy { "target" of pyObject } - val iter: BaseExpr by lazy { "iter" of pyObject } - val body: kotlin.collections.List by lazy { "body" of pyObject } - val orelse: kotlin.collections.List by lazy { "orelse" of pyObject } - val type_comment: String? by lazy { "type_comment" of pyObject } + class AsyncFor(pyObject: PyObject) : BaseStmt(pyObject), NormalOrAsyncFor, IsAsync { + override val target: BaseExpr by lazy { "target" of pyObject } + override val iter: BaseExpr by lazy { "iter" of pyObject } + override val body: kotlin.collections.List by lazy { "body" of pyObject } + override val orelse: kotlin.collections.List by lazy { "orelse" of pyObject } + override val type_comment: String? by lazy { "type_comment" of pyObject } } /** @@ -333,16 +369,26 @@ interface Python { val orelse: kotlin.collections.List by lazy { "orelse" of pyObject } } + /** + * ast.With and ast.AsyncWith are not related according to the Python syntax. However, they + * are so similar, that we make use of this interface to avoid a lot of duplicate code. + */ + interface NormalOrAsyncWith : AsyncOrNot { + val items: kotlin.collections.List + val body: kotlin.collections.List + val type_comment: String? + } + /** * ``` * ast.With = class With(stmt) * | With(withitem* items, stmt* body, string? type_comment) * ``` */ - class With(pyObject: PyObject) : BaseStmt(pyObject) { - val items: withitem by lazy { "items" of pyObject } - val body: kotlin.collections.List by lazy { "body" of pyObject } - val type_comment: String? by lazy { "type_comment" of pyObject } + class With(pyObject: PyObject) : BaseStmt(pyObject), NormalOrAsyncWith { + override val items: kotlin.collections.List by lazy { "items" of pyObject } + override val body: kotlin.collections.List by lazy { "body" of pyObject } + override val type_comment: String? by lazy { "type_comment" of pyObject } } /** @@ -351,12 +397,10 @@ interface Python { * | AsyncWith(withitem* items, stmt* body, string? type_comment) * ``` */ - class AsyncWith(pyObject: PyObject) : BaseStmt(pyObject) { - val target: BaseExpr by lazy { "target" of pyObject } - val iter: BaseExpr by lazy { "iter" of pyObject } - val body: kotlin.collections.List by lazy { "body" of pyObject } - val orelse: kotlin.collections.List by lazy { "orelse" of pyObject } - val type_comment: String? by lazy { "type_comment" of pyObject } + class AsyncWith(pyObject: PyObject) : BaseStmt(pyObject), NormalOrAsyncWith, IsAsync { + override val items: kotlin.collections.List by lazy { "items" of pyObject } + override val body: kotlin.collections.List by lazy { "body" of pyObject } + override val type_comment: String? by lazy { "type_comment" of pyObject } } /** @@ -505,7 +549,7 @@ interface Python { * * ast.expr = class expr(AST) */ - sealed class BaseExpr(pyObject: PyObject) : AST(pyObject), WithASTLocation + sealed class BaseExpr(pyObject: PyObject) : AST(pyObject), WithLocation /** * ``` @@ -1233,7 +1277,7 @@ interface Python { * | arg(identifier arg, expr? annotation, string? type_comment) * ``` */ - class arg(pyObject: PyObject) : AST(pyObject), WithASTLocation { + class arg(pyObject: PyObject) : AST(pyObject), WithLocation { val arg: String by lazy { "arg" of pyObject } val annotation: BaseExpr? by lazy { "annotation" of pyObject } val type_comment: String? by lazy { "type_comment" of pyObject } diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt index 22fee76090..674894f78f 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt @@ -234,7 +234,7 @@ class PythonLanguageFrontend(language: Language, ctx: Tr } override fun locationOf(astNode: Python.AST.AST): PhysicalLocation? { - return if (astNode is Python.AST.WithASTLocation) { + return if (astNode is Python.AST.WithLocation) { PhysicalLocation( uri, Region( diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt index 5e0ec77a80..5b2b8e059a 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt @@ -25,6 +25,7 @@ */ package de.fraunhofer.aisec.cpg.frontends.python +import de.fraunhofer.aisec.cpg.frontends.python.Python.AST.IsAsync import de.fraunhofer.aisec.cpg.frontends.python.PythonLanguage.Companion.MODIFIER_POSITIONAL_ONLY_ARGUMENT import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.Annotation @@ -45,7 +46,7 @@ class StatementHandler(frontend: PythonLanguageFrontend) : return when (node) { is Python.AST.ClassDef -> handleClassDef(node) is Python.AST.FunctionDef -> handleFunctionDef(node) - is Python.AST.AsyncFunctionDef -> handleAsyncFunctionDef(node) + is Python.AST.AsyncFunctionDef -> handleFunctionDef(node) is Python.AST.Pass -> return newEmptyStatement(rawNode = node) is Python.AST.ImportFrom -> handleImportFrom(node) is Python.AST.Assign -> handleAssign(node) @@ -55,7 +56,7 @@ class StatementHandler(frontend: PythonLanguageFrontend) : is Python.AST.AnnAssign -> handleAnnAssign(node) is Python.AST.Expr -> handleExpressionStatement(node) is Python.AST.For -> handleFor(node) - is Python.AST.AsyncFor -> handleAsyncFor(node) + is Python.AST.AsyncFor -> handleFor(node) is Python.AST.While -> handleWhile(node) is Python.AST.Import -> handleImport(node) is Python.AST.Break -> newBreakStatement(rawNode = node) @@ -138,25 +139,25 @@ class StatementHandler(frontend: PythonLanguageFrontend) : private fun handleWhile(node: Python.AST.While): Statement { val ret = newWhileStatement(rawNode = node) ret.condition = frontend.expressionHandler.handle(node.test) - ret.statement = makeBlock(node.body).codeAndLocationFromChildren(node) + ret.statement = makeBlock(node.body, parentNode = node) node.orelse.firstOrNull()?.let { TODO("Not supported") } return ret } - private fun handleFor(node: Python.AST.For): Statement { + private fun handleFor(node: Python.AST.NormalOrAsyncFor): Statement { val ret = newForEachStatement(rawNode = node) - ret.iterable = frontend.expressionHandler.handle(node.iter) - ret.variable = frontend.expressionHandler.handle(node.target) - ret.statement = makeBlock(node.body).codeAndLocationFromChildren(node) - node.orelse.firstOrNull()?.let { TODO("Not supported") } - return ret - } + if (node is IsAsync) { + ret.addDeclaration( + newProblemDeclaration( + problem = "The \"async\" keyword is not yet supported.", + rawNode = node + ) + ) + } - private fun handleAsyncFor(node: Python.AST.AsyncFor): Statement { - val ret = newForEachStatement(rawNode = node) ret.iterable = frontend.expressionHandler.handle(node.iter) ret.variable = frontend.expressionHandler.handle(node.target) - ret.statement = makeBlock(node.body).codeAndLocationFromChildren(node) + ret.statement = makeBlock(node.body, parentNode = node) node.orelse.firstOrNull()?.let { TODO("Not supported") } return ret } @@ -184,13 +185,13 @@ class StatementHandler(frontend: PythonLanguageFrontend) : ret.condition = frontend.expressionHandler.handle(node.test) ret.thenStatement = if (node.body.isNotEmpty()) { - makeBlock(node.body).codeAndLocationFromChildren(node) + makeBlock(node.body, parentNode = node) } else { null } ret.elseStatement = if (node.orelse.isNotEmpty()) { - makeBlock(node.orelse).codeAndLocationFromChildren(node) + makeBlock(node.orelse, parentNode = node) } else { null } @@ -273,7 +274,7 @@ class StatementHandler(frontend: PythonLanguageFrontend) : * `receiver` (most often called `self`). */ private fun handleFunctionDef( - s: Python.AST.FunctionDef, + s: Python.AST.NormalOrAsyncFunctionDef, recordDeclaration: RecordDeclaration? = null ): DeclarationStatement { val result = @@ -297,68 +298,15 @@ class StatementHandler(frontend: PythonLanguageFrontend) : } frontend.scopeManager.enterScope(result) - // Handle decorators (which are translated into CPG "annotations") - result.annotations += handleAnnotations(s) - - // Handle return type and calculate function type - if (result is ConstructorDeclaration) { - // Return type of the constructor is always its record declaration type - result.returnTypes = listOf(recordDeclaration?.toType() ?: unknownType()) - } else { - result.returnTypes = listOf(frontend.typeOf(s.returns)) - } - result.type = FunctionType.computeType(result) - - handleArguments(s.args, result, recordDeclaration) - - if (s.body.isNotEmpty()) { - result.body = makeBlock(s.body).codeAndLocationFromChildren(s) + if (s is Python.AST.AsyncFunctionDef) { + result.addDeclaration( + newProblemDeclaration( + problem = "The \"async\" keyword is not yet supported.", + rawNode = s + ) + ) } - frontend.scopeManager.leaveScope(result) - frontend.scopeManager.addDeclaration(result) - - return wrapDeclarationToStatement(result) - } - - /** - * We have to consider multiple things when matching Python's FunctionDef to the CPG: - * - A [Python.AST.FunctionDef] is a [Statement] from Python's point of view. The CPG sees it as - * a declaration -> we have to wrap the result in a [DeclarationStatement]. - * - A [Python.AST.FunctionDef] could be one of - * - a [ConstructorDeclaration] if it appears in a record and its [name] is `__init__` - * - a [MethodeDeclaration] if it appears in a record, and it isn't a - * [ConstructorDeclaration] - * - a [FunctionDeclaration] if neither of the above apply - * - * In case of a [ConstructorDeclaration] or[MethodDeclaration]: the first argument is the - * `receiver` (most often called `self`). - */ - private fun handleAsyncFunctionDef( - s: Python.AST.AsyncFunctionDef, - recordDeclaration: RecordDeclaration? = null - ): DeclarationStatement { - val result = - if (recordDeclaration != null) { - if (s.name == "__init__") { - newConstructorDeclaration( - name = s.name, - recordDeclaration = recordDeclaration, - rawNode = s - ) - } else { - newMethodDeclaration( - name = s.name, - recordDeclaration = recordDeclaration, - isStatic = false, - rawNode = s - ) - } - } else { - newFunctionDeclaration(name = s.name, rawNode = s) - } - frontend.scopeManager.enterScope(result) - // Handle decorators (which are translated into CPG "annotations") result.annotations += handleAnnotations(s) @@ -374,7 +322,7 @@ class StatementHandler(frontend: PythonLanguageFrontend) : handleArguments(s.args, result, recordDeclaration) if (s.body.isNotEmpty()) { - result.body = makeBlock(s.body).codeAndLocationFromChildren(s) + result.body = makeBlock(s.body, parentNode = s) } frontend.scopeManager.leaveScope(result) @@ -477,16 +425,14 @@ class StatementHandler(frontend: PythonLanguageFrontend) : } } - private fun handleAnnotations(node: Python.AST.AsyncFunctionDef): Collection { - return handleDeclaratorList(node, node.decorator_list) - } - - private fun handleAnnotations(node: Python.AST.FunctionDef): Collection { + private fun handleAnnotations( + node: Python.AST.NormalOrAsyncFunctionDef + ): Collection { return handleDeclaratorList(node, node.decorator_list) } fun handleDeclaratorList( - node: Python.AST.AST, + node: Python.AST.WithLocation, decoratorList: List ): List { val annotations = mutableListOf() @@ -539,15 +485,31 @@ class StatementHandler(frontend: PythonLanguageFrontend) : return annotations } + /** + * This function "wraps" a list of [Python.ASTBASEstmt] nodes into a [Block]. Since the list + * itself does not have a code/location, we need to employ [codeAndLocationFromChildren] on the + * [parentNode]. + */ private fun makeBlock( stmts: List, - rawNode: Python.AST.AST? = null + parentNode: Python.AST.WithLocation ): Block { - val result = newBlock(rawNode = rawNode) + val result = newBlock() for (stmt in stmts) { result.statements += handle(stmt) } - return result + + // Try to retrieve the code and location from the parent node, if it is a base stmt + var baseStmt = parentNode as? Python.AST.BaseStmt + return if (baseStmt != null) { + result.codeAndLocationFromChildren(baseStmt) + } else { + // Otherwise, continue without setting the location + log.warn( + "Could not set location on wrapped block because the parent node is not a python statement" + ) + result + } } internal fun handleArgument( diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandlerTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandlerTest.kt index a015557902..6aea87792a 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandlerTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandlerTest.kt @@ -27,6 +27,7 @@ package de.fraunhofer.aisec.cpg.frontends.python import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.test.analyze +import de.fraunhofer.aisec.cpg.test.analyzeAndGetFirstTU import java.nio.file.Path import kotlin.test.Test import kotlin.test.assertContains @@ -86,4 +87,22 @@ class StatementHandlerTest { assertNotNull(variadicArg, "Failed to find variadic argc") assertEquals(true, variadicArg.isVariadic) } + + @Test + fun testAsync() { + val topLevel = Path.of("src", "test", "resources", "python") + val tu = + analyzeAndGetFirstTU(listOf(topLevel.resolve("async.py").toFile()), topLevel, true) { + it.registerLanguage() + } + assertNotNull(tu) + + val myFunc = tu.functions["my_func"] + assertNotNull(myFunc) + assertEquals(1, myFunc.parameters.size) + + val myOtherFunc = tu.functions["my_other_func"] + assertNotNull(myOtherFunc) + assertEquals(1, myOtherFunc.parameters.size) + } } diff --git a/cpg-language-python/src/test/resources/python/async.py b/cpg-language-python/src/test/resources/python/async.py new file mode 100644 index 0000000000..5db305451e --- /dev/null +++ b/cpg-language-python/src/test/resources/python/async.py @@ -0,0 +1,11 @@ +import asyncio + + +async def my_func(i: int): + async for obj in generator: + pass + + await asyncio.sleep(i) + +def my_other_func(i: int): + pass