diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt index 199f03fe91..98cee4d6e2 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt @@ -27,6 +27,7 @@ package de.fraunhofer.aisec.cpg import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend +import de.fraunhofer.aisec.cpg.frontends.SupportsNewParse import de.fraunhofer.aisec.cpg.frontends.SupportsParallelParsing import de.fraunhofer.aisec.cpg.frontends.TranslationException import de.fraunhofer.aisec.cpg.graph.Component @@ -43,7 +44,10 @@ import java.util.concurrent.CompletableFuture import java.util.concurrent.CompletionException import java.util.concurrent.ExecutionException import java.util.concurrent.atomic.AtomicBoolean +import kotlin.io.path.absolute +import kotlin.io.path.readText import kotlin.reflect.full.findAnnotation +import kotlin.time.DurationUnit import org.slf4j.LoggerFactory /** Main entry point for all source code translation for all language front-ends. */ @@ -116,6 +120,15 @@ private constructor( } } + log.info( + "Translated {} LoC in total ({} / LoC)", + result.stats.totalLinesOfCode, + (outerBench.duration / result.stats.totalLinesOfCode).toString( + DurationUnit.MILLISECONDS, + decimals = 3 + ) + ) + return result } @@ -276,7 +289,7 @@ private constructor( val future = CompletableFuture.supplyAsync { try { - return@supplyAsync parse(component, ctx, sourceLocation) + return@supplyAsync parse(component, result, ctx, sourceLocation) } catch (e: TranslationException) { throw RuntimeException("Error parsing $sourceLocation", e) } @@ -337,7 +350,7 @@ private constructor( for (sourceLocation in sourceLocations) { ctx.currentComponent = component - val f = parse(component, ctx, sourceLocation) + val f = parse(component, result, ctx, sourceLocation) if (f != null) { handleCompletion(result, usedFrontends, sourceLocation, f) } @@ -365,6 +378,7 @@ private constructor( @Throws(TranslationException::class) private fun parse( component: Component, + result: TranslationResult, ctx: TranslationContext, sourceLocation: File, ): LanguageFrontend<*, *>? { @@ -384,7 +398,30 @@ private constructor( } return null } - component.addTranslationUnit(frontend.parse(sourceLocation)) + + // Check, if the frontend supports the new API + var tu = + if (frontend is SupportsNewParse) { + // Read the file contents and supply it to the frontend. This gives us a chance + // to do some statistics here, for example on the lines of code. For now, we + // just print it, in a future PR we will gather this information and consolidate + // it. + var path = sourceLocation.toPath().absolute() + var content = path.readText() + var linesOfCode = content.linesOfCode + + log.info("{} has {} LoC", path, linesOfCode) + + var tu = frontend.parse(content, path) + + // Add the LoC. This needs to be synchronized on the stats object, because of + // parallel parsing + synchronized(result.stats) { result.stats.totalLinesOfCode += linesOfCode } + tu + } else { + frontend.parse(sourceLocation) + } + component.addTranslationUnit(tu) } catch (ex: TranslationException) { log.error("An error occurred during parsing of ${sourceLocation.name}: ${ex.message}") if (config.failOnError) { @@ -462,3 +499,12 @@ private constructor( } } } + +/** + * This returns a VERY trivial count of the lines of code (mainly just the line count). This can be + * extended to a real LoC algorithm at some point. + */ +val String.linesOfCode: Int + get() { + return this.count { it == '\n' } + } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationResult.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationResult.kt index 05ace1a4dc..675122ca57 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationResult.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationResult.kt @@ -85,6 +85,8 @@ class TranslationResult( return finalCtx } + var stats = TranslationStats() + /** * Checks if only a single software component has been analyzed and returns its translation * units. For multiple software components, it aggregates the results. diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationStats.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationStats.kt new file mode 100644 index 0000000000..a97cf11be7 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationStats.kt @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg + +import de.fraunhofer.aisec.cpg.helpers.MeasurementHolder +import de.fraunhofer.aisec.cpg.helpers.StatisticsHolder + +/** + * This class provides some statistics about our translation process. At some point this will fully + * replace [StatisticsHolder] and [MeasurementHolder] + */ +class TranslationStats { + + /** The total lines of code that were translated into the CPG. */ + var totalLinesOfCode: Int = 0 +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/SupportsNewParse.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/SupportsNewParse.kt new file mode 100644 index 0000000000..7bb3538062 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/SupportsNewParse.kt @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.frontends + +import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import java.nio.file.Path + +interface SupportsNewParse { + /** + * Parses the given [content] with the language frontend into a [TranslationUnitDeclaration]. If + * known, a [path] should be specified, so that the language frontend can potentially use more + * advanced features like module resolution. + */ + fun parse(content: String, path: Path? = null): TranslationUnitDeclaration +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/MeasurementHolder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/MeasurementHolder.kt index f16b284cdc..5e5d76091c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/MeasurementHolder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/MeasurementHolder.kt @@ -33,6 +33,8 @@ import java.nio.file.Path import java.time.Duration import java.time.Instant import java.util.* +import kotlin.time.DurationUnit +import kotlin.time.toDuration import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -146,6 +148,7 @@ constructor( ) : MeasurementHolder(c, message, debug, holder) { private val start: Instant + var duration: kotlin.time.Duration = kotlin.time.Duration.ZERO /** Stops this benchmark and adds its measurement to the its [StatisticsHolder]. */ fun stop() { @@ -154,7 +157,7 @@ constructor( /** Stops the time and computes the difference between */ override fun addMeasurement(measurementKey: String?, measurementValue: String?): Any? { - val duration = Duration.between(start, Instant.now()).toMillis() + var duration = Duration.between(start, Instant.now()).toMillis() measurements["${caller}: $message"] = "$duration ms" logDebugMsg("$caller: $message done in $duration ms") @@ -162,6 +165,9 @@ constructor( // update our holder, if we have any holder?.addBenchmark(this) + // update our internal duration so that others can access it + this.duration = duration.toDuration(DurationUnit.MILLISECONDS) + return duration } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/sarif/PhysicalLocation.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/sarif/PhysicalLocation.kt index 3fca276c8f..3f0dd83f07 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/sarif/PhysicalLocation.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/sarif/PhysicalLocation.kt @@ -29,11 +29,15 @@ import java.net.URI import java.util.* /** A SARIF compatible location referring to a location, i.e. file and region within the file. */ -class PhysicalLocation(uri: URI, region: Region) { - class ArtifactLocation(val uri: URI) { +class PhysicalLocation(uri: URI?, region: Region) { + class ArtifactLocation(val uri: URI?) { override fun toString(): String { - return uri.path.substring(uri.path.lastIndexOf('/') + 1) + return if (uri != null) { + uri.path + } else { + "unknown" + } } override fun equals(other: Any?): Boolean { @@ -45,7 +49,7 @@ class PhysicalLocation(uri: URI, region: Region) { override fun hashCode() = Objects.hashCode(uri) } - val artifactLocation: ArtifactLocation + var artifactLocation: ArtifactLocation var region: Region init { @@ -68,11 +72,7 @@ class PhysicalLocation(uri: URI, region: Region) { companion object { fun locationLink(location: PhysicalLocation?): String { return if (location != null) { - (location.artifactLocation.uri.path + - ":" + - location.region.startLine + - ":" + - location.region.startColumn) + "${location.artifactLocation}:${location.region.startLine}:${location.region.startColumn}" } else "unknown" } } diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt index 029981cbe1..35473d229d 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonLanguageFrontend.kt @@ -28,6 +28,7 @@ package de.fraunhofer.aisec.cpg.frontends.python import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend +import de.fraunhofer.aisec.cpg.frontends.SupportsNewParse import de.fraunhofer.aisec.cpg.frontends.TranslationException import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration @@ -39,15 +40,16 @@ import de.fraunhofer.aisec.cpg.passes.configuration.RegisterExtraPass import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation import de.fraunhofer.aisec.cpg.sarif.Region import java.io.File -import java.net.URI +import java.nio.file.Path import jep.python.PyObject -import kotlin.io.path.Path +import kotlin.io.path.absolute +import kotlin.io.path.name import kotlin.io.path.nameWithoutExtension import kotlin.math.min @RegisterExtraPass(PythonAddDeclarationsPass::class) class PythonLanguageFrontend(language: Language, ctx: TranslationContext) : - LanguageFrontend(language, ctx) { + LanguageFrontend(language, ctx), SupportsNewParse { private val lineSeparator = '\n' // TODO private val tokenTypeIndex = 0 private val jep = JepSingleton // configure Jep @@ -62,21 +64,32 @@ class PythonLanguageFrontend(language: Language, ctx: Tr * new [PythonLanguageFrontend] instance per file. */ private lateinit var fileContent: String - private lateinit var uri: URI + private var filePath: Path? = null @Throws(TranslationException::class) override fun parse(file: File): TranslationUnitDeclaration { - fileContent = file.readText(Charsets.UTF_8) - uri = file.toURI() + return parse(file.readText(Charsets.UTF_8), file.toPath()) + } + + override fun parse(content: String, path: Path?): TranslationUnitDeclaration { + this.fileContent = content + this.filePath = path jep.getInterp().use { - it.set("content", fileContent) - it.set("filename", file.absolutePath) + it.set("content", content) + it.set( + "filename", + if (path != null) { + path.absolute().toString() + } else { + "" + } + ) it.exec("import ast") it.exec("parsed = ast.parse(content, filename=filename, type_comments=True)") val pyAST = it.getValue("parsed") as PyObject - val tud = pythonASTtoCPG(pyAST, file.name) + val tud = pythonASTtoCPG(pyAST, path) if (config.matchCommentsToNodes) { it.exec("import tokenize") @@ -236,7 +249,7 @@ class PythonLanguageFrontend(language: Language, ctx: Tr override fun locationOf(astNode: Python.AST.AST): PhysicalLocation? { return if (astNode is Python.AST.WithLocation) { PhysicalLocation( - uri, + filePath?.toUri(), Region( startLine = astNode.lineno, endLine = astNode.end_lineno, @@ -253,17 +266,22 @@ class PythonLanguageFrontend(language: Language, ctx: Tr // will be invoked by native function } - private fun pythonASTtoCPG(pyAST: PyObject, path: String): TranslationUnitDeclaration { + private fun pythonASTtoCPG(pyAST: PyObject, path: Path?): TranslationUnitDeclaration { val pythonASTModule = fromPython(pyAST) as? Python.AST.Module ?: TODO( "Python ast of type ${fromPython(pyAST).javaClass} is not supported yet" ) // could be one of "ast.{Module,Interactive,Expression,FunctionType} - val tud = newTranslationUnitDeclaration(path, rawNode = pythonASTModule) + val tud = newTranslationUnitDeclaration(path?.name, rawNode = pythonASTModule) scopeManager.resetToGlobal(tud) - val nsdName = Path(path).nameWithoutExtension + val nsdName = + if (path != null) { + path.nameWithoutExtension + } else { + "unknown" + } val nsd = newNamespaceDeclaration(nsdName, rawNode = pythonASTModule) tud.addDeclaration(nsd) diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt index 5d63e06d6d..c1a427c7cf 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt @@ -25,6 +25,10 @@ */ package de.fraunhofer.aisec.cpg.frontends.python +import de.fraunhofer.aisec.cpg.ScopeManager +import de.fraunhofer.aisec.cpg.TranslationConfiguration +import de.fraunhofer.aisec.cpg.TranslationContext +import de.fraunhofer.aisec.cpg.TypeManager import de.fraunhofer.aisec.cpg.analysis.ValueEvaluator import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.Annotation @@ -1345,6 +1349,32 @@ class PythonFrontendTest : BaseTest() { assertEquals(4.toLong(), rhs.evaluate()) } + @Test + fun testParseContent() { + var frontend = + PythonLanguageFrontend( + language = PythonLanguage(), + ctx = + TranslationContext( + TranslationConfiguration.builder().build(), + ScopeManager(), + TypeManager() + ) + ) + + val tu = frontend.parse("a = 4\nprint(a)") + assertNotNull(tu) + + val unknown = tu.namespaces["unknown"] + assertNotNull(unknown) + + val refNames = tu.refs.map { it.name.localName } + assertEquals(listOf("a", "a", "print"), refNames) + + val call = tu.calls["print"] + assertNotNull(call) + } + class PythonValueEvaluator : ValueEvaluator() { override fun computeBinaryOpEffect( lhsValue: Any?, diff --git a/cpg-language-ruby/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/ruby/RubyLanguageFrontend.kt b/cpg-language-ruby/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/ruby/RubyLanguageFrontend.kt index 42d1ccb6c3..129d495d46 100644 --- a/cpg-language-ruby/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/ruby/RubyLanguageFrontend.kt +++ b/cpg-language-ruby/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/ruby/RubyLanguageFrontend.kt @@ -27,11 +27,13 @@ package de.fraunhofer.aisec.cpg.frontends.ruby import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend +import de.fraunhofer.aisec.cpg.frontends.SupportsNewParse import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation import java.io.File +import java.nio.file.Path import org.jruby.Ruby import org.jruby.ast.BlockNode import org.jruby.ast.MethodDefNode @@ -40,19 +42,27 @@ import org.jruby.parser.Parser import org.jruby.parser.ParserConfiguration class RubyLanguageFrontend(language: RubyLanguage, ctx: TranslationContext) : - LanguageFrontend(language, ctx) { + LanguageFrontend(language, ctx), SupportsNewParse { val declarationHandler: DeclarationHandler = DeclarationHandler(this) val expressionHandler: ExpressionHandler = ExpressionHandler(this) val statementHandler: StatementHandler = StatementHandler(this) override fun parse(file: File): TranslationUnitDeclaration { + return parse(file.readText(Charsets.UTF_8), file.toPath()) + } + + override fun parse(content: String, path: Path?): TranslationUnitDeclaration { val ruby = Ruby.getGlobalRuntime() val parser = Parser(ruby) val node = parser.parse( - file.path, - file.inputStream(), + if (path != null) { + path.toString() + } else { + "unknown" + }, + content.byteInputStream(), null, ParserConfiguration(ruby, 0, false, true, false) ) as RootNode