From cff462f7f6c7e7776afb58c085eb24fc423d4687 Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Fri, 1 Dec 2023 16:38:45 +0100 Subject: [PATCH] Python improve codeOf (#1387) --- .../python/PythonLanguageFrontend.kt | 82 ++++++++++++++----- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt index 042916af46..df72d6c1d2 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt @@ -42,10 +42,12 @@ import java.net.URI import jep.python.PyObject import kotlin.io.path.Path import kotlin.io.path.nameWithoutExtension +import kotlin.math.min @RegisterExtraPass(PythonAddDeclarationsPass::class) class PythonLanguageFrontend(language: Language, ctx: TranslationContext) : LanguageFrontend(language, ctx) { + private val lineSeparator = '\n' // TODO private val jep = JepSingleton // configure Jep // val declarationHandler = DeclarationHandler(this) @@ -69,7 +71,6 @@ class PythonLanguageFrontend(language: Language, ctx: Tr it.set("content", fileContent) it.set("filename", file.absolutePath) it.exec("import ast") - it.exec("import os") it.exec("parsed = ast.parse(content, filename=filename, type_comments=True)") val pyAST = it.getValue("parsed") as PyObject @@ -116,32 +117,71 @@ class PythonLanguageFrontend(language: Language, ctx: Tr } } + /** + * This functions extracts the source code from the input file given a location. This is a bit + * tricky in Python, as indents are part of the syntax. We also don't want to include leading + * whitespaces/tabs in case of extracting a nested code fragment. Thus, we use the following + * approximation to retrieve the fragment's source code: + * 1) Get the relevant source code lines + * 2) Delete extra code at the end of the last line that is not part of the provided location + * 3) Remove trailing whitespaces / tabs + */ override fun codeOf(astNode: Python.AST): String? { - val physicalLocation = locationOf(astNode) - if (physicalLocation != null) { - val lines = - fileContent - .split('\n') // TODO - .subList(physicalLocation.region.startLine - 1, physicalLocation.region.endLine) - val mutableLines = lines.toMutableList() - - // remove not needed first characters of all lines (making the assumption, that we are - // in an intended code block - for (idx in mutableLines.indices) { - mutableLines[idx] = mutableLines[idx].substring(physicalLocation.region.startColumn) - } + val location = locationOf(astNode) + if (location != null) { + var lines = getRelevantLines(location) + lines = removeExtraAtEnd(location, lines) + lines = fixStartColumn(location, lines) - // remove not needed trailing characters of last line - val lastLineIdx = mutableLines.lastIndex - val toRemove = - mutableLines[lastLineIdx].length + physicalLocation.region.startColumn - - physicalLocation.region.endColumn - mutableLines[lastLineIdx] = mutableLines[lastLineIdx].dropLast(toRemove) - return mutableLines.joinToString(separator = "\n") // TODO + return lines.joinToString(separator = lineSeparator.toString()) } return null } + private fun getRelevantLines(location: PhysicalLocation): MutableList { + val lines = + fileContent + .split(lineSeparator) + .subList(location.region.startLine - 1, location.region.endLine) + return lines.toMutableList() + } + + private fun fixStartColumn( + location: PhysicalLocation, + lines: MutableList + ): MutableList { + for (idx in lines.indices) { + val prefixLength = min(location.region.startColumn, lines[idx].length) + if (idx == 0) { + lines[idx] = lines[idx].substring(prefixLength) + } else { + + for (j in 0..prefixLength - 1) { + if (lines[idx][0] == ' ' || lines[idx][0] == '\t') { + lines[idx] = lines[idx].substring(1) + } else { + break + } + } + } + } + return lines + } + + private fun removeExtraAtEnd( + location: PhysicalLocation, + lines: MutableList + ): MutableList { + val lastLineIdx = lines.lastIndex + val lastLineLength = lines[lastLineIdx].length + val locationEndColumn = location.region.endColumn + val toRemove = lastLineLength - locationEndColumn + if (toRemove > 0) { + lines[lastLineIdx] = lines[lastLineIdx].dropLast(toRemove) + } + return lines + } + override fun locationOf(astNode: Python.AST): PhysicalLocation? { return if (astNode is Python.WithPythonLocation) { PhysicalLocation(