diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt index 62778bc208..7e9ce3081c 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt @@ -29,25 +29,11 @@ import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.ImportDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration import de.fraunhofer.aisec.cpg.graph.statements.expressions.* -import de.fraunhofer.aisec.cpg.graph.statements.expressions.CollectionComprehension import jep.python.PyObject class ExpressionHandler(frontend: PythonLanguageFrontend) : PythonHandler(::ProblemExpression, frontend) { - /* - Magic numbers (https://docs.python.org/3/library/ast.html#ast.FormattedValue): - conversion is an integer: - -1: no formatting - 115: !s string formatting - 114: !r repr formatting - 97: !a ascii formatting - */ - private val formattedValConversionNoFormatting = -1L - private val formattedValConversionString = 115L - private val formattedValConversionRepr = 114L - private val formattedValConversionASCII = 97L - override fun handleNode(node: Python.AST.BaseExpr): Expression { return when (node) { is Python.AST.Name -> handleName(node) @@ -182,45 +168,116 @@ class ExpressionHandler(frontend: PythonLanguageFrontend) : return assignExpression } + /** + * Translates a Python + * [`FormattedValue`](https://docs.python.org/3/library/ast.html#ast.FormattedValue) into an + * [Expression]. + * + * We are handling the format handling, following [PEP 3101](https://peps.python.org/pep-3101). + * + * The following example + * + * ```python + * f"{value:.2f}" + * ``` + * + * is modeled: + * 1. The value `value` is wrapped in a `format()` call. + * 2. The `format()` call has two arguments: + * - The value to format (`value`). + * - The format specification (`".2f"`). + * + * CPG Representation: + * - `CallExpression` node: + * - `callee`: `Reference` to `format`. + * - `arguments`: + * 1. A node representing `value`. + * 2. A node representing the string `".2f"`. + */ private fun handleFormattedValue(node: Python.AST.FormattedValue): Expression { - if (node.format_spec != null) { - return newProblemExpression( - "Cannot handle formatted value with format_spec ${node.format_spec} yet", - rawNode = node - ) - } - return when (node.conversion) { - formattedValConversionNoFormatting -> { - // No formatting, just return the value. - handle(node.value) - } - formattedValConversionString -> { - // String representation. wrap in str() call. - val strCall = - newCallExpression(newReference("str", rawNode = node), "str", rawNode = node) - strCall.addArgument(handle(node.value)) - strCall - } - formattedValConversionRepr -> { - newProblemExpression( - "Cannot handle conversion '114: !r repr formatting', yet.", - rawNode = node - ) - } - formattedValConversionASCII -> { - newProblemExpression( - "Cannot handle conversion '97: !a ascii formatting', yet.", - rawNode = node - ) + /* + Magic numbers (https://docs.python.org/3/library/ast.html#ast.FormattedValue): + conversion is an integer: + -1: no formatting + 115: !s string formatting + 114: !r repr formatting + 97: !a ascii formatting + */ + val formattedValConversionNoFormatting = -1L + val formattedValConversionString = 115L + val formattedValConversionRepr = 114L + val formattedValConversionASCII = 97L + + val formatSpec = node.format_spec?.let { handle(it) } + val valueExpression = handle(node.value) + val conversion = + when (node.conversion) { + formattedValConversionNoFormatting -> { + // No formatting, just return the value. + valueExpression + } + formattedValConversionString -> { + // String representation: wrap in `str()` call. + val strCall = + newCallExpression( + callee = newReference(name = "str", rawNode = node), + fqn = "str", + rawNode = node + ) + .implicit() + strCall.addArgument(valueExpression) + strCall + } + formattedValConversionRepr -> { + // Repr-String representation: wrap in `repr()` call. + val reprCall = + newCallExpression( + callee = newReference(name = "repr", rawNode = node), + fqn = "repr", + rawNode = node + ) + .implicit() + reprCall.addArgument(valueExpression) + reprCall + } + formattedValConversionASCII -> { + // ASCII-String representation: wrap in `ascii()` call. + val asciiCall = + newCallExpression( + newReference("ascii", rawNode = node), + "ascii", + rawNode = node + ) + .implicit() + asciiCall.addArgument(handle(node.value)) + asciiCall + } + else -> + newProblemExpression( + problem = + "Cannot handle formatted value with conversion code ${node.conversion} yet", + rawNode = node + ) } - else -> - newProblemExpression( - "Cannot handle formatted value with conversion ${node.conversion} yet", + if (formatSpec != null) { + return newCallExpression( + callee = newReference(name = "format", rawNode = node), + fqn = "format", rawNode = node ) + .implicit() + .apply { + addArgument(conversion) + addArgument(formatSpec) + } } + return conversion } + /** + * Translates a Python [`JoinedStr`](https://docs.python.org/3/library/ast.html#ast.JoinedStr) + * into a [Expression]. + */ private fun handleJoinedStr(node: Python.AST.JoinedStr): Expression { val values = node.values.map(::handle) return if (values.isEmpty()) { diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt index 4f8f908227..e95489f9b4 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt @@ -38,6 +38,7 @@ import de.fraunhofer.aisec.cpg.graph.types.ObjectType import de.fraunhofer.aisec.cpg.graph.types.SetType import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker import de.fraunhofer.aisec.cpg.passes.ControlDependenceGraphPass +import de.fraunhofer.aisec.cpg.query.value import de.fraunhofer.aisec.cpg.sarif.Region import de.fraunhofer.aisec.cpg.test.* import java.nio.file.Path diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/expressionHandler/FormattedValueHandlerTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/expressionHandler/FormattedValueHandlerTest.kt new file mode 100644 index 0000000000..fe26f34a77 --- /dev/null +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/expressionHandler/FormattedValueHandlerTest.kt @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.frontends.python.expressionHandler + +import de.fraunhofer.aisec.cpg.frontends.python.PythonLanguage +import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.statements.expressions.AssignExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Literal +import de.fraunhofer.aisec.cpg.test.analyzeAndGetFirstTU +import de.fraunhofer.aisec.cpg.test.assertLiteralValue +import de.fraunhofer.aisec.cpg.test.assertLocalName +import java.nio.file.Path +import kotlin.test.assertEquals +import kotlin.test.assertIs +import kotlin.test.assertNotNull +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.TestInstance + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class FormattedValueHandlerTest { + + private lateinit var topLevel: Path + private lateinit var result: TranslationUnitDeclaration + + @BeforeAll + fun setup() { + topLevel = Path.of("src", "test", "resources", "python") + analyzeFile() + } + + fun analyzeFile() { + result = + analyzeAndGetFirstTU( + listOf(topLevel.resolve("formatted_values.py").toFile()), + topLevel, + true + ) { + it.registerLanguage() + } + assertNotNull(result) + } + + @Test + fun testFormattedValues() { + // Test for a = f'Number: {42:.2f}' + val aAssExpression = result.variables["a"]?.astParent + assertIs(aAssExpression) + val aExprRhs = aAssExpression.rhs.singleOrNull() + assertIs(aExprRhs) + val aFormatCall = aExprRhs.rhs + assertIs(aFormatCall) + assertLocalName("format", aFormatCall) + val aArguments = aFormatCall.arguments + assertEquals(2, aArguments.size) + assertIs>(aArguments[0]) + assertLiteralValue(42.toLong(), aArguments[0]) + assertIs>(aArguments[1]) + assertLiteralValue(".2f", aArguments[1]) + + // Test for b = f'Hexadecimal: {255:#x}' + val bAssExpression = result.variables["b"]?.astParent + assertIs(bAssExpression) + val bExprRhs = bAssExpression.rhs.singleOrNull() + assertIs(bExprRhs) + val bFormatCall = bExprRhs.rhs + assertIs(bFormatCall) + assertLocalName("format", bFormatCall) + val bArguments = bFormatCall.arguments + assertEquals(2, bArguments.size) + assertIs>(bArguments[0]) + assertLiteralValue(255L.toLong(), bArguments[0]) + // assertIs>(bArguments[1]) + assertLiteralValue("#x", bArguments[1]) + + // Test for c = f'String with conversion: {"Hello, world!"!r}' + val cAssExpression = result.variables["c"]?.astParent + assertIs(cAssExpression) + val cExprRhs = cAssExpression.rhs.singleOrNull() + assertIs(cExprRhs) + val cConversionCall = cExprRhs.rhs + assertIs(cConversionCall) + assertLocalName("repr", cConversionCall) + val cArguments = cConversionCall.arguments.singleOrNull() + assertNotNull(cArguments) + assertLiteralValue("Hello, world!", cArguments) + + // Test for d = f'ASCII representation: {"50$"!a}' + val dAssExpression = result.variables["d"]?.astParent + assertIs(dAssExpression) + val dExprRhs = dAssExpression.rhs.singleOrNull() + assertIs(dExprRhs) + val dConversionCall = dExprRhs.rhs + assertIs(dConversionCall) + assertLocalName("ascii", dConversionCall) + val dArguments = dConversionCall.arguments.singleOrNull() + assertNotNull(dArguments) + assertLiteralValue("50$", dArguments) + + // Test for e = f'Combined: {42!s:10}' + // This is translated to `'Combined: ' + format(str(b), "10")` + val eAssExpression = result.variables["e"]?.astParent + assertIs(eAssExpression) + val eExprRhs = eAssExpression.rhs.singleOrNull() + assertIs(eExprRhs) + val eFormatCall = eExprRhs.rhs + assertIs(eFormatCall) + assertLocalName("format", eFormatCall) + val eArguments = eFormatCall.arguments + assertEquals(2, eArguments.size) + val eConversionCall = eArguments[0] + assertIs(eConversionCall) + assertLocalName("str", eConversionCall) + assertLiteralValue("42".toLong(), eConversionCall.arguments.singleOrNull()) + assertLiteralValue("10", eArguments[1]) + } +} diff --git a/cpg-language-python/src/test/resources/python/datatypes.py b/cpg-language-python/src/test/resources/python/datatypes.py index 6e35b7b7d2..9ad09dcef6 100644 --- a/cpg-language-python/src/test/resources/python/datatypes.py +++ b/cpg-language-python/src/test/resources/python/datatypes.py @@ -7,4 +7,5 @@ "e": "f" } e = f'Values of a: {a} and b: {b!s}' -f = a[1:3:2] \ No newline at end of file +f = a[1:3:2] + diff --git a/cpg-language-python/src/test/resources/python/formatted_values.py b/cpg-language-python/src/test/resources/python/formatted_values.py new file mode 100644 index 0000000000..1efd3bc4dc --- /dev/null +++ b/cpg-language-python/src/test/resources/python/formatted_values.py @@ -0,0 +1,5 @@ +a = f'Number: {42:.2f}' +b = f'Hexadecimal: {255:#x}' +c = f'String with conversion: {"Hello, world!"!r}' +d = f'ASCII representation: {"50$"!a}' +e = f'Combined: {42!s:10}' \ No newline at end of file