Skip to content

Commit

Permalink
Fixed crash in getCodeOfSubregion (#1776)
Browse files Browse the repository at this point in the history
  • Loading branch information
oxisto authored and maximiliankaul committed Oct 8, 2024
1 parent 0187754 commit 9755f71
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ package de.fraunhofer.aisec.cpg.helpers

import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.sarif.Region
import kotlin.math.min
import org.apache.commons.lang3.StringUtils

/**
Expand Down Expand Up @@ -62,12 +63,17 @@ fun getCodeOfSubregion(code: String, nodeRegion: Region, subRegion: Region): Str
(StringUtils.ordinalIndexOf(code, nlType, subRegion.startLine - nodeRegion.startLine) +
subRegion.startColumn)
}
val end =
var end =
if (subRegion.endLine == nodeRegion.startLine) {
subRegion.endColumn - nodeRegion.startColumn
} else {
(StringUtils.ordinalIndexOf(code, nlType, subRegion.endLine - nodeRegion.startLine) +
subRegion.endColumn)
}

// Unfortunately, we sometimes have issues with (non)-Unicode characters in code, where the
// python AST thinks that multiple characters are needed and reports a position that is actually
// beyond our "end"
end = min(end, code.length)
return code.substring(start, end)
}
Original file line number Diff line number Diff line change
Expand Up @@ -1453,6 +1453,33 @@ class PythonFrontendTest : BaseTest() {
assertEquals(4.toLong(), rhs.evaluate())
}

@Test
fun testParseWithUnicode() {
val topLevel = Path.of("src", "test", "resources", "python")
val tu =
analyzeAndGetFirstTU(listOf(topLevel.resolve("unicode.py").toFile()), topLevel, true) {
it.registerLanguage<PythonLanguage>()
}
assertNotNull(tu)

val normalFunc = tu.functions["normal_func"]
assertNotNull(normalFunc)
// 11 chars (including whitespace) -> SARIF position = 12
// e = "e"
assertEquals(12, normalFunc.body?.location?.region?.endColumn)

val unicodeFunc = tu.functions["unicode_func"]
assertNotNull(unicodeFunc)

// also 11 chars (including whitespace) -> SARIF position = 12
// But the python parser somehow sees these as two bytes so the position is 13 :(
// e = "é"
assertEquals(13, unicodeFunc.body?.location?.region?.endColumn)

// So the code exceeds the line, but we clamp it and avoid a crash
assertEquals("e = \"é\"", unicodeFunc.body?.code)
}

class PythonValueEvaluator : ValueEvaluator() {
override fun computeBinaryOpEffect(
lhsValue: Any?,
Expand Down
5 changes: 5 additions & 0 deletions cpg-language-python/src/test/resources/python/unicode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def normal_func():
e = "e"

def unicode_func():
e = "é"

0 comments on commit 9755f71

Please sign in to comment.