From 40866cee74ca791ba8fd3bd54d74b12faa66805d Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Fri, 13 Sep 2024 09:44:05 +0200 Subject: [PATCH 01/19] Trying to speed up type handling A map of lists instead of a simple list --- .../de/fraunhofer/aisec/cpg/TranslationManager.kt | 4 +++- .../kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt | 11 ++++++----- .../de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt | 2 +- .../de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt | 3 ++- .../de/fraunhofer/aisec/cpg/passes/TypeResolver.kt | 4 ++-- .../cpg/passes/JavaExternalTypeHierarchyResolver.kt | 2 +- .../de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt | 2 ++ 7 files changed, 17 insertions(+), 11 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt index 199f03fe91..db53365233 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt @@ -314,7 +314,9 @@ private constructor( // and individual file scopes beneath it var newGlobalScope = globalCtx.scopeManager.globalScope var types = - globalCtx.typeManager.firstOrderTypes.union(globalCtx.typeManager.secondOrderTypes) + globalCtx.typeManager.firstOrderTypes.values + .flatten() + .union(globalCtx.typeManager.secondOrderTypes) types.forEach { if (it.scope is GlobalScope) { it.scope = newGlobalScope diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt index 8745e7944d..2f4bbb6a01 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt @@ -54,7 +54,7 @@ class TypeManager { MutableMap> = ConcurrentHashMap() - val firstOrderTypes: MutableSet = ConcurrentHashMap.newKeySet() + val firstOrderTypes = ConcurrentHashMap>() val secondOrderTypes: MutableSet = ConcurrentHashMap.newKeySet() /** @@ -197,9 +197,10 @@ class TypeManager { } if (t.isFirstOrderType) { + var types = firstOrderTypes.computeIfAbsent(t.name.toString()) { mutableListOf() } // Make sure we only ever return one unique object per type - if (!firstOrderTypes.add(t)) { - return firstOrderTypes.first { it == t && it is T } as T + if (!types.add(t)) { + return types.first { it == t && it is T } as T } else { log.trace( "Registering unique first order type {}{}", @@ -224,7 +225,7 @@ class TypeManager { /** Checks, whether a [Type] with the given [name] exists. */ fun typeExists(name: CharSequence): Boolean { - return firstOrderTypes.any { type: Type -> type.root.name == name } + return firstOrderTypes.values.flatten().any { type: Type -> type.root.name == name } } fun resolvePossibleTypedef(alias: Type, scopeManager: ScopeManager): Type { @@ -247,7 +248,7 @@ class TypeManager { return primitiveType } - return firstOrderTypes.firstOrNull { + return firstOrderTypes[fqn.toString()]?.firstOrNull { (it.typeOrigin == Type.Origin.RESOLVED || it.typeOrigin == Type.Origin.GUESSED) && it.root.name == fqn && if (generics != null) { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt index a0a0fde85a..87a435c660 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt @@ -118,7 +118,7 @@ fun LanguageProvider.objectType( synchronized(c.typeManager.firstOrderTypes) { // We can try to look up the type by its name and return it, if it already exists. var type = - c.typeManager.firstOrderTypes.firstOrNull { + c.typeManager.firstOrderTypes[name.toString()]?.firstOrNull { it is ObjectType && it.name == name && it.scope == scope && diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index 6a3d8707a3..f9c5b7cbe8 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -1011,7 +1011,8 @@ fun TranslationContext.tryRecordInference( // update the type's record. Because types are only unique per scope, we potentially need to // update multiple type nodes, i.e., all type nodes whose FQN match the inferred record if (record != null) { - typeManager.firstOrderTypes + typeManager.firstOrderTypes.values + .flatten() .filter { it.name == record.name } .forEach { it.recordDeclaration = record } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index df57786fd1..65d6d3f4f7 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -122,7 +122,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { private fun handleNode(node: Node?) { if (node is RecordDeclaration) { - for (t in typeManager.firstOrderTypes) { + for (t in typeManager.firstOrderTypes.values.flatten()) { if (t.name == node.name && t is ObjectType) { // The node is the class of the type t t.recordDeclaration = node @@ -136,7 +136,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { } fun resolveFirstOrderTypes() { - for (type in typeManager.firstOrderTypes.sortedBy { it.name }) { + for (type in typeManager.firstOrderTypes.values.flatten().sortedBy { it.name }) { if (type is ObjectType && type.typeOrigin == Type.Origin.UNRESOLVED) { resolveType(type) } diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt index 84a75a95b9..f3be085f0d 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt @@ -71,7 +71,7 @@ class JavaExternalTypeHierarchyResolver(ctx: TranslationContext) : ComponentPass } // Iterate over all known types and add their (direct) supertypes. - for (t in typeManager.firstOrderTypes) { + for (t in typeManager.firstOrderTypes.values.flatten()) { val symbol = resolver.tryToSolveType(t.typeName) if (symbol.isSolved) { try { diff --git a/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt b/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt index 79c60675a8..95a5d691b7 100644 --- a/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt +++ b/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt @@ -616,6 +616,8 @@ class Application : Callable { "Benchmark: analyzing code in " + (analyzingTime - startTime) / S_TO_MS_FACTOR + " s." ) + translationResult.benchmarkResults.print() + exportJsonFile?.let { exportToJson(translationResult, it) } if (!noNeo4j) { pushToNeo4j(translationResult) From 5fb3f7a24c504a7ce9d0fe33ae82694ba6ea0c9c Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Wed, 25 Sep 2024 11:35:43 +0200 Subject: [PATCH 02/19] More optimization. Java test still fails --- .../aisec/cpg/TranslationManager.kt | 4 +-- .../de/fraunhofer/aisec/cpg/TypeManager.kt | 20 ++++++++++--- .../fraunhofer/aisec/cpg/graph/TypeBuilder.kt | 4 +-- .../aisec/cpg/passes/SymbolResolver.kt | 29 ++++++++++++------- .../aisec/cpg/passes/TypeResolver.kt | 13 +++++---- .../JavaExternalTypeHierarchyResolver.kt | 2 +- .../aisec/cpg/graph/types/TypeTests.kt | 3 +- 7 files changed, 49 insertions(+), 26 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt index db53365233..199f03fe91 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt @@ -314,9 +314,7 @@ private constructor( // and individual file scopes beneath it var newGlobalScope = globalCtx.scopeManager.globalScope var types = - globalCtx.typeManager.firstOrderTypes.values - .flatten() - .union(globalCtx.typeManager.secondOrderTypes) + globalCtx.typeManager.firstOrderTypes.union(globalCtx.typeManager.secondOrderTypes) types.forEach { if (it.scope is GlobalScope) { it.scope = newGlobalScope diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt index 2f4bbb6a01..c713a544a9 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt @@ -54,7 +54,19 @@ class TypeManager { MutableMap> = ConcurrentHashMap() - val firstOrderTypes = ConcurrentHashMap>() + /** + * A map that contains all first order types organized by their type name as key. This is + * extremely helpful for retrieving all possibly types for a given type name, which is done + * often during frontend parsing and symbol resolving. + */ + val firstOrderTypesMap = ConcurrentHashMap>() + + /** Retrieves the list of all *unique* first order types. */ + val firstOrderTypes: Set + get() { + return synchronized(firstOrderTypesMap) { firstOrderTypesMap.values.flatten().toSet() } + } + val secondOrderTypes: MutableSet = ConcurrentHashMap.newKeySet() /** @@ -197,7 +209,7 @@ class TypeManager { } if (t.isFirstOrderType) { - var types = firstOrderTypes.computeIfAbsent(t.name.toString()) { mutableListOf() } + var types = firstOrderTypesMap.computeIfAbsent(t.name.toString()) { mutableListOf() } // Make sure we only ever return one unique object per type if (!types.add(t)) { return types.first { it == t && it is T } as T @@ -225,7 +237,7 @@ class TypeManager { /** Checks, whether a [Type] with the given [name] exists. */ fun typeExists(name: CharSequence): Boolean { - return firstOrderTypes.values.flatten().any { type: Type -> type.root.name == name } + return firstOrderTypes.any { type: Type -> type.root.name == name } } fun resolvePossibleTypedef(alias: Type, scopeManager: ScopeManager): Type { @@ -248,7 +260,7 @@ class TypeManager { return primitiveType } - return firstOrderTypes[fqn.toString()]?.firstOrNull { + return firstOrderTypesMap[fqn.toString()]?.firstOrNull { (it.typeOrigin == Type.Origin.RESOLVED || it.typeOrigin == Type.Origin.GUESSED) && it.root.name == fqn && if (generics != null) { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt index 87a435c660..47fab07cef 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt @@ -115,10 +115,10 @@ fun LanguageProvider.objectType( val scope = c.scopeManager.currentScope - synchronized(c.typeManager.firstOrderTypes) { + synchronized(c.typeManager.firstOrderTypesMap) { // We can try to look up the type by its name and return it, if it already exists. var type = - c.typeManager.firstOrderTypes[name.toString()]?.firstOrNull { + c.typeManager.firstOrderTypesMap[name.toString()]?.firstOrNull { it is ObjectType && it.name == name && it.scope == scope && diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index f9c5b7cbe8..0032c9eabd 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -285,7 +285,9 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return if (language != null && language.namespaceDelimiter.isNotEmpty()) { val parentName = (current.name.parent ?: current.name).toString() var type = current.objectType(parentName) - TypeResolver.resolveType(type) + if (type is ObjectType) { + TypeResolver.resolveType(type) + } type } else { current.unknownType() @@ -426,10 +428,10 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } var record = base.recordDeclaration - if (record == null) { + if (base !is UnknownType && base !is AutoType && record == null) { // We access an unknown field of an unknown record. so we need to handle that along the // way as well - record = ctx.tryRecordInference(base, locationHint = ref) + record = ctx.tryRecordInference(base, locationHint = ref, updateType = true) } if (record == null) { @@ -699,7 +701,11 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { if (records.isEmpty()) { records = listOfNotNull( - ctx.tryRecordInference(bestGuess?.root ?: unknownType(), locationHint = call) + ctx.tryRecordInference( + bestGuess?.root ?: unknownType(), + locationHint = call, + updateType = true + ) ) } records = records.distinct() @@ -967,10 +973,14 @@ fun TranslationContext.tryNamespaceInference( /** * Tries to infer a [RecordDeclaration] from an unresolved [Type]. This will return `null`, if * inference was not possible, or if it was turned off in the [InferenceConfiguration]. + * + * If [updateType] is set to true, also the [ObjectType.recordDeclaration] is adjusted. This is only + * needed if we call this function in the [SymbolResolver] (and not in the [TypeResolver]). */ fun TranslationContext.tryRecordInference( type: Type, - locationHint: Node? = null + locationHint: Node? = null, + updateType: Boolean = false, ): RecordDeclaration? { val kind = if (type.language is HasStructs) { @@ -1010,11 +1020,10 @@ fun TranslationContext.tryRecordInference( // update the type's record. Because types are only unique per scope, we potentially need to // update multiple type nodes, i.e., all type nodes whose FQN match the inferred record - if (record != null) { - typeManager.firstOrderTypes.values - .flatten() - .filter { it.name == record.name } - .forEach { it.recordDeclaration = record } + if (updateType && record != null) { + typeManager.firstOrderTypesMap[record.name.toString()]?.forEach { + it.recordDeclaration = record + } } return record diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index 65d6d3f4f7..2dcba47724 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -79,7 +79,11 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { // Check for a possible typedef var target = ctx?.scopeManager?.typedefFor(type.name, type.scope) if (target != null) { - if (target.typeOrigin == Type.Origin.UNRESOLVED && type != target) { + if ( + target.typeOrigin == Type.Origin.UNRESOLVED && + type != target && + target is ObjectType + ) { // Make sure our typedef target is resolved resolveType(target) } @@ -98,8 +102,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { } // If we found the "real" declared type, we can normalize the name of our scoped type - // and - // set the name to the declared type. + // and set the name to the declared type. if (declares != null) { var declaredType = declares.declaredType log.debug( @@ -122,7 +125,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { private fun handleNode(node: Node?) { if (node is RecordDeclaration) { - for (t in typeManager.firstOrderTypes.values.flatten()) { + for (t in typeManager.firstOrderTypesMap.values.flatten()) { if (t.name == node.name && t is ObjectType) { // The node is the class of the type t t.recordDeclaration = node @@ -136,7 +139,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { } fun resolveFirstOrderTypes() { - for (type in typeManager.firstOrderTypes.values.flatten().sortedBy { it.name }) { + for (type in typeManager.firstOrderTypesMap.values.flatten().sortedBy { it.name }) { if (type is ObjectType && type.typeOrigin == Type.Origin.UNRESOLVED) { resolveType(type) } diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt index f3be085f0d..d95b108275 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt @@ -71,7 +71,7 @@ class JavaExternalTypeHierarchyResolver(ctx: TranslationContext) : ComponentPass } // Iterate over all known types and add their (direct) supertypes. - for (t in typeManager.firstOrderTypes.values.flatten()) { + for (t in typeManager.firstOrderTypesMap.values.flatten()) { val symbol = resolver.tryToSolveType(t.typeName) if (symbol.isSolved) { try { diff --git a/cpg-language-java/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/types/TypeTests.kt b/cpg-language-java/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/types/TypeTests.kt index 5b608b175b..2a3174c6e5 100644 --- a/cpg-language-java/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/types/TypeTests.kt +++ b/cpg-language-java/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/types/TypeTests.kt @@ -118,7 +118,8 @@ internal class TypeTests : BaseTest() { val level2 = assertNotNull(result.records["multistep.Level2"]).toType() val unrelated = assertNotNull(result.records["multistep.Unrelated"]).toType() println( - result.finalCtx.typeManager.firstOrderTypes + result.finalCtx.typeManager.firstOrderTypesMap.values + .flatten() .filter { it.typeName == "multistep.Root" } .map { it.superTypes } ) From dc4b5f6b86bd4d9ba76d2bcda6ab071a91ede990 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Wed, 25 Sep 2024 13:08:57 +0200 Subject: [PATCH 03/19] Fixed common type tests --- .../aisec/cpg/enhancements/types/TypeTests.kt | 16 ++++++----- .../aisec/cpg/graph/types/TypeTests.kt | 28 ++++++++++--------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/types/TypeTests.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/types/TypeTests.kt index 2fd4c7d2ab..94a3c79d00 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/types/TypeTests.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/types/TypeTests.kt @@ -178,13 +178,15 @@ internal class TypeTests : BaseTest() { analyze("simple_inheritance.cpp", topLevel, true) { it.registerLanguage() } - val root = assertNotNull(result.records["Root"]).toType() - val level0 = assertNotNull(result.records["Level0"]).toType() - val level1 = assertNotNull(result.records["Level1"]).toType() - val level1b = assertNotNull(result.records["Level1B"]).toType() - val level2 = assertNotNull(result.records["Level2"]).toType() - val unrelated = assertNotNull(result.records["Unrelated"]).toType() - getCommonTypeTestGeneral(root, level0, level1, level1b, level2, unrelated) + with(result) { + val root = assertResolvedType("Root") + val level0 = assertResolvedType("Level0") + val level1 = assertResolvedType("Level1") + val level1b = assertResolvedType("Level1B") + val level2 = assertResolvedType("Level2") + val unrelated = assertResolvedType("Unrelated") + getCommonTypeTestGeneral(root, level0, level1, level1b, level2, unrelated) + } } } diff --git a/cpg-language-java/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/types/TypeTests.kt b/cpg-language-java/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/types/TypeTests.kt index 2a3174c6e5..f12e243c9d 100644 --- a/cpg-language-java/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/types/TypeTests.kt +++ b/cpg-language-java/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/types/TypeTests.kt @@ -111,19 +111,21 @@ internal class TypeTests : BaseTest() { fun testCommonTypeTestJava() { val topLevel = Path.of("src", "test", "resources", "compiling", "hierarchy") val result = analyze("java", topLevel, true) { it.registerLanguage(JavaLanguage()) } - val root = assertNotNull(result.records["multistep.Root"]).toType() - val level0 = assertNotNull(result.records["multistep.Level0"]).toType() - val level1 = assertNotNull(result.records["multistep.Level1"]).toType() - val level1b = assertNotNull(result.records["multistep.Level1B"]).toType() - val level2 = assertNotNull(result.records["multistep.Level2"]).toType() - val unrelated = assertNotNull(result.records["multistep.Unrelated"]).toType() - println( - result.finalCtx.typeManager.firstOrderTypesMap.values - .flatten() - .filter { it.typeName == "multistep.Root" } - .map { it.superTypes } - ) - getCommonTypeTestGeneral(root, level0, level1, level1b, level2, unrelated) + with(result) { + val root = assertResolvedType("multistep.Root") + val level0 = assertResolvedType("multistep.Level0") + val level1 = assertResolvedType("multistep.Level1") + val level1b = assertResolvedType("multistep.Level1B") + val level2 = assertResolvedType("multistep.Level2") + val unrelated = assertResolvedType("multistep.Unrelated") + println( + result.finalCtx.typeManager.firstOrderTypesMap.values + .flatten() + .filter { it.typeName == "multistep.Root" } + .map { it.superTypes } + ) + getCommonTypeTestGeneral(root, level0, level1, level1b, level2, unrelated) + } } private fun getCommonTypeTestGeneral( From 615230999c71690fc44ea5e558a80bf21231d547 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Wed, 25 Sep 2024 14:39:10 +0200 Subject: [PATCH 04/19] Made the list of types a synchronized list --- .../de/fraunhofer/aisec/cpg/TypeManager.kt | 5 +++- .../fraunhofer/aisec/cpg/graph/TypeBuilder.kt | 30 +++++++++---------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt index c713a544a9..6e50796b94 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt @@ -209,7 +209,10 @@ class TypeManager { } if (t.isFirstOrderType) { - var types = firstOrderTypesMap.computeIfAbsent(t.name.toString()) { mutableListOf() } + var types = + firstOrderTypesMap.computeIfAbsent(t.name.toString()) { + Collections.synchronizedList(mutableListOf()) + } // Make sure we only ever return one unique object per type if (!types.add(t)) { return types.first { it == t && it is T } as T diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt index 47fab07cef..a8375d9abf 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt @@ -115,10 +115,10 @@ fun LanguageProvider.objectType( val scope = c.scopeManager.currentScope - synchronized(c.typeManager.firstOrderTypesMap) { - // We can try to look up the type by its name and return it, if it already exists. + var list = c.typeManager.firstOrderTypesMap[name.toString()] + if (list != null) { var type = - c.typeManager.firstOrderTypesMap[name.toString()]?.firstOrNull { + list.firstOrNull { it is ObjectType && it.name == name && it.scope == scope && @@ -128,19 +128,19 @@ fun LanguageProvider.objectType( if (type != null) { return type } - - // Otherwise, we either need to create the type because of the generics or because we do not - // know the type yet. - type = ObjectType(name, generics, false, language) - // Apply our usual metadata, such as scope, code, location, if we have any. Make sure only - // to refer by the local name because we will treat types as sort of references when - // creating them and resolve them later. - type.applyMetadata(this, name, rawNode = rawNode, localNameOnly = true) - - // Piping it through register type will ensure that in any case we return the one unique - // type object (per scope) for it. - return c.typeManager.registerType(type) } + + // Otherwise, we either need to create the type because of the generics or because we do not + // know the type yet. + var type = ObjectType(name, generics, false, language) + // Apply our usual metadata, such as scope, code, location, if we have any. Make sure only + // to refer by the local name because we will treat types as sort of references when + // creating them and resolve them later. + type.applyMetadata(this, name, rawNode = rawNode, localNameOnly = true) + + // Piping it through register type will ensure that in any case we return the one unique + // type object (per scope) for it. + return c.typeManager.registerType(type) } /** From ce04f7a0c787e1bc7038506c21985a1f84c365fb Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Sat, 28 Sep 2024 11:03:44 +0200 Subject: [PATCH 05/19] Better handling --- .../aisec/cpg/TranslationManager.kt | 4 ++- .../de/fraunhofer/aisec/cpg/TypeManager.kt | 30 ++++++++++++------- .../aisec/cpg/passes/TypeResolver.kt | 8 +++-- .../aisec/cpg/passes/CXXExtraPass.kt | 4 +-- .../frontends/cxx/CXXLanguageFrontendTest.kt | 10 +++---- 5 files changed, 36 insertions(+), 20 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt index 199f03fe91..408c8d3357 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt @@ -314,7 +314,9 @@ private constructor( // and individual file scopes beneath it var newGlobalScope = globalCtx.scopeManager.globalScope var types = - globalCtx.typeManager.firstOrderTypes.union(globalCtx.typeManager.secondOrderTypes) + globalCtx.typeManager.resolvedFirstOrderTypes.union( + globalCtx.typeManager.secondOrderTypes + ) types.forEach { if (it.scope is GlobalScope) { it.scope = newGlobalScope diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt index 6e50796b94..de512f33d8 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt @@ -61,11 +61,8 @@ class TypeManager { */ val firstOrderTypesMap = ConcurrentHashMap>() - /** Retrieves the list of all *unique* first order types. */ - val firstOrderTypes: Set - get() { - return synchronized(firstOrderTypesMap) { firstOrderTypesMap.values.flatten().toSet() } - } + /** Retrieves the list of all *resolved* first order types. */ + val resolvedFirstOrderTypes: MutableSet = mutableSetOf() val secondOrderTypes: MutableSet = ConcurrentHashMap.newKeySet() @@ -239,8 +236,8 @@ class TypeManager { } /** Checks, whether a [Type] with the given [name] exists. */ - fun typeExists(name: CharSequence): Boolean { - return firstOrderTypes.any { type: Type -> type.root.name == name } + fun resolvedTypeExists(name: CharSequence): Boolean { + return resolvedFirstOrderTypes.any { type: Type -> type.name == name } } fun resolvePossibleTypedef(alias: Type, scopeManager: ScopeManager): Type { @@ -263,9 +260,8 @@ class TypeManager { return primitiveType } - return firstOrderTypesMap[fqn.toString()]?.firstOrNull { - (it.typeOrigin == Type.Origin.RESOLVED || it.typeOrigin == Type.Origin.GUESSED) && - it.root.name == fqn && + return resolvedFirstOrderTypes.firstOrNull { + it.root.name == fqn && if (generics != null) { (it as? ObjectType)?.generics == generics } else { @@ -273,6 +269,20 @@ class TypeManager { } } } + + /** + * This function marks a type as [Type.Origin.RESOLVED] and adds it to the + * [resolvedFirstOrderTypes]. + */ + fun markAsResolved(type: Type) { + // Mark it as RESOLVED + type.typeOrigin = Type.Origin.RESOLVED + + if (type.isFirstOrderType) { + // Add it to our resolved first order type list + resolvedFirstOrderTypes += type + } + } } val Type.ancestors: Set diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index 2dcba47724..9a99fb0b18 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -93,7 +93,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { log.debug("Aliasing type {} in {} scope to {}", type.name, type.scope, name) type.declaredFrom = originDeclares type.recordDeclaration = originDeclares - type.typeOrigin = Type.Origin.RESOLVED + ctx?.typeManager?.markAsResolved(type) return true } @@ -114,8 +114,8 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { type.name = declaredType.name type.declaredFrom = declares type.recordDeclaration = declares as? RecordDeclaration - type.typeOrigin = Type.Origin.RESOLVED type.superTypes.addAll(declaredType.superTypes) + ctx?.typeManager?.markAsResolved(type) return true } @@ -142,6 +142,10 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { for (type in typeManager.firstOrderTypesMap.values.flatten().sortedBy { it.name }) { if (type is ObjectType && type.typeOrigin == Type.Origin.UNRESOLVED) { resolveType(type) + } else if (type.typeOrigin == Type.Origin.RESOLVED) { + // This will most likely only affect built-in types. They are resolved by default, + // and we want to make sure that they end up in the resolve first order list + ctx.typeManager.markAsResolved(type) } } } diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt index b6eeae1365..09c78698f8 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt @@ -77,7 +77,7 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) { * the graph. */ private fun removeBracketOperators(node: UnaryOperator, parent: Node?) { - if (node.operatorCode == "()" && !typeManager.typeExists(node.input.name)) { + if (node.operatorCode == "()" && !typeManager.resolvedTypeExists(node.input.name)) { // It was really just parenthesis around an identifier, but we can only make this // distinction now. // @@ -109,7 +109,7 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) { language != null && fakeUnaryOp is UnaryOperator && fakeUnaryOp.operatorCode == "()" && - typeManager.typeExists(fakeUnaryOp.input.name) + typeManager.resolvedTypeExists(fakeUnaryOp.input.name) ) { // If the name (`long` in the example) is a type, then the unary operator (`(long)`) // is really a cast and our binary operator is really a unary operator `&addr`. diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt index 3e1b661e95..ca9fd766f0 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt @@ -1690,13 +1690,13 @@ internal class CXXLanguageFrontendTest : BaseTest() { assertNotNull(result) // There should be no type "string" anymore, only "std::string" - assertFalse(result.finalCtx.typeManager.typeExists("string")) - assertTrue(result.finalCtx.typeManager.typeExists("std::string")) + assertFalse(result.finalCtx.typeManager.resolvedTypeExists("string")) + assertTrue(result.finalCtx.typeManager.resolvedTypeExists("std::string")) // the same applies to "inner::secret" - assertFalse(result.finalCtx.typeManager.typeExists("secret")) - assertFalse(result.finalCtx.typeManager.typeExists("inner::secret")) - assertTrue(result.finalCtx.typeManager.typeExists("std::inner::secret")) + assertFalse(result.finalCtx.typeManager.resolvedTypeExists("secret")) + assertFalse(result.finalCtx.typeManager.resolvedTypeExists("inner::secret")) + assertTrue(result.finalCtx.typeManager.resolvedTypeExists("std::inner::secret")) } @Test From 2b0b5bc6f912deceb2f959129121e1a066f70a59 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Sat, 28 Sep 2024 16:59:21 +0200 Subject: [PATCH 06/19] Fixed issue with CXXExtraPass --- .../kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt | 6 +++--- .../kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt index 408c8d3357..dae0917699 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt @@ -314,9 +314,9 @@ private constructor( // and individual file scopes beneath it var newGlobalScope = globalCtx.scopeManager.globalScope var types = - globalCtx.typeManager.resolvedFirstOrderTypes.union( - globalCtx.typeManager.secondOrderTypes - ) + globalCtx.typeManager.firstOrderTypesMap.values + .flatten() + .union(globalCtx.typeManager.secondOrderTypes) types.forEach { if (it.scope is GlobalScope) { it.scope = newGlobalScope diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index 9a99fb0b18..9dea909087 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -142,8 +142,11 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { for (type in typeManager.firstOrderTypesMap.values.flatten().sortedBy { it.name }) { if (type is ObjectType && type.typeOrigin == Type.Origin.UNRESOLVED) { resolveType(type) - } else if (type.typeOrigin == Type.Origin.RESOLVED) { - // This will most likely only affect built-in types. They are resolved by default, + } else if ( + type.typeOrigin == Type.Origin.RESOLVED || type.typeOrigin == Type.Origin.GUESSED + ) { + // This will most likely only affect built-in types (and some left over from the + // Java legacy type stuff). They are resolved by default, // and we want to make sure that they end up in the resolve first order list ctx.typeManager.markAsResolved(type) } From 54f6fa1f4ea87fb7ca9236ff2c7a0338e5489792 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Sat, 28 Sep 2024 17:30:35 +0200 Subject: [PATCH 07/19] Cleaning up CXXExtraPass some more --- .../de/fraunhofer/aisec/cpg/ScopeManager.kt | 39 +++++++++++++++++++ .../de/fraunhofer/aisec/cpg/TypeManager.kt | 2 +- .../aisec/cpg/passes/TypeResolver.kt | 11 +----- .../aisec/cpg/passes/CXXExtraPass.kt | 15 ++++++- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt index 72d09e4d6a..04da463ebe 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt @@ -31,6 +31,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.scopes.* import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.* +import de.fraunhofer.aisec.cpg.graph.types.DeclaresType import de.fraunhofer.aisec.cpg.graph.types.FunctionPointerType import de.fraunhofer.aisec.cpg.graph.types.IncompleteType import de.fraunhofer.aisec.cpg.graph.types.Type @@ -1004,6 +1005,44 @@ class ScopeManager : ScopeProvider { return list } + + /** + * This function tries to look up the symbol contained in [name] (using [findSymbols]) and + * returns a [DeclaresType] node, if this name resolved to something which declares a type. + */ + fun findTypeDeclaration(name: Name, startScope: Scope?): DeclaresType? { + var symbols = findSymbols(name = name, startScope = startScope) { it is DeclaresType } + + // We need to have a single match, otherwise we have an ambiguous type and we cannot + // normalize it. + // TODO: Maybe we should have a warning in this case? + var declares = symbols.filterIsInstance().singleOrNull() + + return declares + } + + /** + * This function checks, whether there exists a [Type] for the given combination of a [name] and + * [scope]. + * + * This is needed in passes that need to replace potential identifiers with [Type] nodes, + * because of ambiguities. + */ + fun nameIsTypeForScope( + name: Name, + language: Language<*>?, + scope: Scope? = currentScope + ): Boolean { + // First, check if it is a simple type + var type = language?.getSimpleTypeOf(name) + if (type != null) { + return true + } + + // Otherwise, try to find a type declaration with the given combination of symbol and name + var declares = findTypeDeclaration(name, scope) + return declares?.declaredType != null + } } /** diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt index de512f33d8..c6bc8a0b33 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt @@ -235,7 +235,7 @@ class TypeManager { return t } - /** Checks, whether a [Type] with the given [name] exists. */ + /** Checks, whether a resolved [Type] with the given [name] exists. */ fun resolvedTypeExists(name: CharSequence): Boolean { return resolvedFirstOrderTypes.any { type: Type -> type.name == name } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index 9dea909087..18fcaa4e2e 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -28,7 +28,6 @@ package de.fraunhofer.aisec.cpg.passes import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration -import de.fraunhofer.aisec.cpg.graph.types.DeclaresType import de.fraunhofer.aisec.cpg.graph.types.ObjectType import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration @@ -66,15 +65,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { // filter for nodes that implement DeclaresType, because otherwise we will get a lot of // constructor declarations and such with the same name. It seems this is ok since most // languages will prefer structs/classes over functions when resolving types. - var symbols = - ctx?.scopeManager?.findSymbols(type.name, startScope = type.scope) { - it is DeclaresType - } ?: listOf() - - // We need to have a single match, otherwise we have an ambiguous type and we cannot - // normalize it. - // TODO: Maybe we should have a warning in this case? - var declares = symbols.filterIsInstance().singleOrNull() + var declares = ctx?.scopeManager?.findTypeDeclaration(type.name, type.scope) // Check for a possible typedef var target = ctx?.scopeManager?.typedefFor(type.name, type.scope) diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt index 09c78698f8..e39c7e7e03 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt @@ -77,7 +77,14 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) { * the graph. */ private fun removeBracketOperators(node: UnaryOperator, parent: Node?) { - if (node.operatorCode == "()" && !typeManager.resolvedTypeExists(node.input.name)) { + if ( + node.operatorCode == "()" && + !scopeManager.nameIsTypeForScope( + node.input.name, + node.input.language, + node.input.scope + ) + ) { // It was really just parenthesis around an identifier, but we can only make this // distinction now. // @@ -109,7 +116,11 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) { language != null && fakeUnaryOp is UnaryOperator && fakeUnaryOp.operatorCode == "()" && - typeManager.resolvedTypeExists(fakeUnaryOp.input.name) + scopeManager.nameIsTypeForScope( + fakeUnaryOp.input.name, + fakeUnaryOp.input.language, + fakeUnaryOp.input.scope, + ) ) { // If the name (`long` in the example) is a type, then the unary operator (`(long)`) // is really a cast and our binary operator is really a unary operator `&addr`. From c5e1616cc08fec8f0df865f2369cabdc0e7ce011 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Sat, 28 Sep 2024 17:38:17 +0200 Subject: [PATCH 08/19] Simplified ResolveCallExpressionAmbiguityPass --- .../de/fraunhofer/aisec/cpg/ScopeManager.kt | 35 ++++++++++----- .../ResolveCallExpressionAmbiguityPass.kt | 44 +++---------------- .../aisec/cpg/passes/CXXExtraPass.kt | 8 ++-- .../cpg/frontends/cxx/CXXExpressionTest.kt | 4 +- 4 files changed, 37 insertions(+), 54 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt index 04da463ebe..be96ec7ced 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt @@ -36,6 +36,7 @@ import de.fraunhofer.aisec.cpg.graph.types.FunctionPointerType import de.fraunhofer.aisec.cpg.graph.types.IncompleteType import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.helpers.Util +import de.fraunhofer.aisec.cpg.passes.ResolveCallExpressionAmbiguityPass import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation import java.util.* import java.util.function.Predicate @@ -1011,37 +1012,49 @@ class ScopeManager : ScopeProvider { * returns a [DeclaresType] node, if this name resolved to something which declares a type. */ fun findTypeDeclaration(name: Name, startScope: Scope?): DeclaresType? { - var symbols = findSymbols(name = name, startScope = startScope) { it is DeclaresType } + var symbols = + findSymbols(name = name, startScope = startScope) { it is DeclaresType } + .filterIsInstance() - // We need to have a single match, otherwise we have an ambiguous type and we cannot + // We need to have a single match, otherwise we have an ambiguous type, and we cannot // normalize it. - // TODO: Maybe we should have a warning in this case? - var declares = symbols.filterIsInstance().singleOrNull() + if (symbols.size > 1) { + LOGGER.warn( + "Lookup of type {} returned more than one symbol which declares a type, this is an ambiguity and the following analysis might not be correct.", + name + ) + } - return declares + return symbols.singleOrNull() } /** * This function checks, whether there exists a [Type] for the given combination of a [name] and - * [scope]. + * [scope] and returns it. It returns null, if no such type exists. * * This is needed in passes that need to replace potential identifiers with [Type] nodes, - * because of ambiguities. + * because of ambiguities (e.g. [ResolveCallExpressionAmbiguityPass]). */ - fun nameIsTypeForScope( + fun findTypeWithNameAndScope( name: Name, language: Language<*>?, scope: Scope? = currentScope - ): Boolean { + ): Type? { // First, check if it is a simple type var type = language?.getSimpleTypeOf(name) if (type != null) { - return true + return type + } + + // This could also be a typedef + type = typedefFor(name, scope) + if (type != null) { + return type } // Otherwise, try to find a type declaration with the given combination of symbol and name var declares = findTypeDeclaration(name, scope) - return declares?.declaredType != null + return declares?.declaredType } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ResolveCallExpressionAmbiguityPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ResolveCallExpressionAmbiguityPass.kt index 42f5ec7c8b..7da8a32c0d 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ResolveCallExpressionAmbiguityPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ResolveCallExpressionAmbiguityPass.kt @@ -88,23 +88,9 @@ class ResolveCallExpressionAmbiguityPass(ctx: TranslationContext) : TranslationU // Check, if this is cast is really a construct expression (if the language supports // functional-constructs) if (language is HasFunctionStyleConstruction) { - // Make sure, we do not accidentally "construct" primitive types - if (language.builtInTypes.contains(callee.name.toString()) == true) { - return - } - - val fqn = - if (callee.name.parent == null) { - scopeManager.currentNamespace.fqn( - callee.name.localName, - delimiter = callee.name.delimiter - ) - } else { - callee.name - } - // Check for our type. We are only interested in object types - val type = typeManager.lookupResolvedType(fqn) + var type = + scopeManager.findTypeWithNameAndScope(callee.name, callee.language, callee.scope) if (type is ObjectType) { walker.replaceCallWithConstruct(type, parent, call) } @@ -121,27 +107,11 @@ class ResolveCallExpressionAmbiguityPass(ctx: TranslationContext) : TranslationU callee = callee.input } - // First, check if this is a built-in type - var builtInType = language.getSimpleTypeOf(callee.name) - if (builtInType != null) { - walker.replaceCallWithCast(builtInType, parent, call, false) - } else { - // If not, then this could still refer to an existing type. We need to make sure - // that we take the current namespace into account - val fqn = - if (callee.name.parent == null) { - scopeManager.currentNamespace.fqn( - callee.name.localName, - delimiter = callee.name.delimiter - ) - } else { - callee.name - } - - val type = typeManager.lookupResolvedType(fqn) - if (type != null) { - walker.replaceCallWithCast(type, parent, call, pointer) - } + // Check if it is type and replace the call + var type = + scopeManager.findTypeWithNameAndScope(callee.name, callee.language, callee.scope) + if (type != null) { + walker.replaceCallWithCast(type, parent, call, false) } } } diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt index e39c7e7e03..c9ce581a60 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt @@ -79,11 +79,11 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) { private fun removeBracketOperators(node: UnaryOperator, parent: Node?) { if ( node.operatorCode == "()" && - !scopeManager.nameIsTypeForScope( + scopeManager.findTypeWithNameAndScope( node.input.name, node.input.language, node.input.scope - ) + ) == null ) { // It was really just parenthesis around an identifier, but we can only make this // distinction now. @@ -116,11 +116,11 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) { language != null && fakeUnaryOp is UnaryOperator && fakeUnaryOp.operatorCode == "()" && - scopeManager.nameIsTypeForScope( + scopeManager.findTypeWithNameAndScope( fakeUnaryOp.input.name, fakeUnaryOp.input.language, fakeUnaryOp.input.scope, - ) + ) != null ) { // If the name (`long` in the example) is a type, then the unary operator (`(long)`) // is really a cast and our binary operator is really a unary operator `&addr`. diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXExpressionTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXExpressionTest.kt index aa6d6ee292..cb561a4a45 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXExpressionTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXExpressionTest.kt @@ -42,9 +42,9 @@ class CXXExpressionTest { } assertNotNull(tu) - // We should have two calls (int and myint64) + // We should have two casts (int and myint64, which is a typedef for long long int) val casts = tu.casts assertEquals(2, casts.size) - assertEquals(listOf("int", "myint64"), casts.map { it.name.localName }) + assertEquals(listOf("int", "long long int"), casts.map { it.name.localName }) } } From 0d1921db6a3b5a7a8a89de4fc092ec23b0adfa94 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Sat, 28 Sep 2024 18:20:57 +0200 Subject: [PATCH 09/19] Rename typeOrigin to resolutionState --- .../de/fraunhofer/aisec/cpg/TypeManager.kt | 6 +-- .../cpg/graph/types/FunctionPointerType.kt | 2 +- .../aisec/cpg/graph/types/NumericType.kt | 2 +- .../aisec/cpg/graph/types/ReferenceType.kt | 2 +- .../aisec/cpg/graph/types/StringType.kt | 2 +- .../fraunhofer/aisec/cpg/graph/types/Type.kt | 37 +++++++++++++------ .../aisec/cpg/graph/types/UnknownType.kt | 2 +- .../aisec/cpg/passes/TypeResolver.kt | 26 ++++++++----- .../frontends/cxx/CXXLanguageFrontendTest.kt | 10 ++--- .../cpg/frontends/golang/ExpressionHandler.kt | 3 +- .../aisec/cpg/frontends/golang/GoLanguage.kt | 5 +-- .../cpg/frontends/java/DeclarationHandler.kt | 6 +-- .../cpg/frontends/java/ExpressionHandler.kt | 2 +- .../frontends/java/JavaLanguageFrontend.kt | 9 +++-- .../cpg/frontends/java/StatementHandler.kt | 2 +- .../JavaExternalTypeHierarchyResolver.kt | 2 +- 16 files changed, 70 insertions(+), 48 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt index c6bc8a0b33..fd42c7f1be 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt @@ -248,7 +248,7 @@ class TypeManager { /** * This function returns the first (there should be only one) [Type] with the given [fqn] that - * is [Type.Origin.RESOLVED]. + * is [Type.ResolutionState.RESOLVED]. */ fun lookupResolvedType( fqn: CharSequence, @@ -271,12 +271,12 @@ class TypeManager { } /** - * This function marks a type as [Type.Origin.RESOLVED] and adds it to the + * This function marks a type as [Type.ResolutionState.RESOLVED] and adds it to the * [resolvedFirstOrderTypes]. */ fun markAsResolved(type: Type) { // Mark it as RESOLVED - type.typeOrigin = Type.Origin.RESOLVED + type.resolutionState = Type.ResolutionState.RESOLVED if (type.isFirstOrderType) { // Add it to our resolved first order type list diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/FunctionPointerType.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/FunctionPointerType.kt index ad2660733f..ae8655cfb1 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/FunctionPointerType.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/FunctionPointerType.kt @@ -89,7 +89,7 @@ class FunctionPointerType : Type { .appendSuper(super.toString()) .append("parameters", parameters) .append("returnType", returnType) - .append("typeOrigin", typeOrigin) + .append("resolutionState", resolutionState) .toString() } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/NumericType.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/NumericType.kt index 20a1db8032..d2824a7f4e 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/NumericType.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/NumericType.kt @@ -38,7 +38,7 @@ open class NumericType( init { // Built-in types are always resolved - this.typeOrigin = Origin.RESOLVED + this.resolutionState = ResolutionState.RESOLVED } /** diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/ReferenceType.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/ReferenceType.kt index 3ed69cfe7f..32b9561bff 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/ReferenceType.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/ReferenceType.kt @@ -81,7 +81,7 @@ class ReferenceType : Type, SecondOrderType { .appendSuper(super.toString()) .append("elementType", elementType) .append("name", name) - .append("typeOrigin", typeOrigin) + .append("resolutionState", resolutionState) .toString() } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/StringType.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/StringType.kt index 3ecbc2fd9e..73b4b12bae 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/StringType.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/StringType.kt @@ -35,6 +35,6 @@ class StringType( init { // Built-in types are always resolved - this.typeOrigin = Origin.RESOLVED + this.resolutionState = ResolutionState.RESOLVED } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt index ad167ae643..57f5817f77 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt @@ -73,7 +73,7 @@ abstract class Type : Node { var isPrimitive = false protected set - open var typeOrigin: Origin? = null + open var resolutionState: ResolutionState? = null /** * This points to the [DeclaresType] node (most likely a [Declaration]), that declares this @@ -87,12 +87,12 @@ abstract class Type : Node { constructor(typeName: String?) { name = language.parseName(typeName ?: UNKNOWN_TYPE_STRING) - typeOrigin = Origin.UNRESOLVED + resolutionState = ResolutionState.UNRESOLVED } constructor(type: Type?) { type?.name?.let { name = it.clone() } - typeOrigin = type?.typeOrigin + resolutionState = type?.resolutionState } constructor(typeName: CharSequence, language: Language<*>?) { @@ -103,20 +103,33 @@ abstract class Type : Node { language.parseName(typeName) } this.language = language - typeOrigin = Origin.UNRESOLVED + resolutionState = ResolutionState.UNRESOLVED } constructor(fullTypeName: Name, language: Language<*>?) { name = fullTypeName.clone() - typeOrigin = Origin.UNRESOLVED + resolutionState = ResolutionState.UNRESOLVED this.language = language } - /** Type Origin describes where the Type information came from */ - enum class Origin { + /** Information about the resolution state of this type. */ + enum class ResolutionState { + /** + * The type is fully resolved and if it is an [ObjectType], the property + * [ObjectType.recordDeclaration] will have the information about where the type was + * declared. + */ RESOLVED, - DATAFLOW, - GUESSED, + + /** + * GUESSED is not really used anymore, except in the java frontend, which needs a more or + * less complete type-rewrite. Once that is done, we can remove the GUESSED state. + */ + @Deprecated(message = "Use UNRESOLVED instead") GUESSED, + + /** + * The type is not yet resolved. Therefore the type is only valid within the current scope. + */ UNRESOLVED } @@ -130,10 +143,10 @@ abstract class Type : Node { * @return Returns a reference to the current Type. E.g. when creating a pointer to an existing * ObjectType * - * TODO(oxisto) Ideally, we would make this function "internal", but there is a bug in the Go - * frontend, so that we still need this function :( + * */ - abstract fun reference(pointer: PointerOrigin?): Type + // TODO(oxisto): Make this internal, but some tests still use it + /*internal*/ abstract fun reference(pointer: PointerOrigin?): Type /** * @return Dereferences the current Type by resolving the reference. E.g. when dereferencing a diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/UnknownType.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/UnknownType.kt index 7b849dc96d..79db885e58 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/UnknownType.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/UnknownType.kt @@ -64,7 +64,7 @@ class UnknownType private constructor() : Type() { return "UNKNOWN" } - override var typeOrigin: Origin? = null + override var resolutionState: ResolutionState? = null companion object { /** A map of [UnknownType] and their respective [Language]. */ diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index 18fcaa4e2e..a3962ac9cd 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -48,7 +48,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { refreshNames() walker = SubgraphWalker.ScopedWalker(scopeManager) - walker.registerHandler(::handleNode) + walker.registerHandler(::legacyHandleNode) walker.iterate(component) } @@ -71,7 +71,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { var target = ctx?.scopeManager?.typedefFor(type.name, type.scope) if (target != null) { if ( - target.typeOrigin == Type.Origin.UNRESOLVED && + target.resolutionState == Type.ResolutionState.UNRESOLVED && type != target && target is ObjectType ) { @@ -114,7 +114,8 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - private fun handleNode(node: Node?) { + /** This piece of code is completely unnecessary, since we already set the */ + private fun legacyHandleNode(node: Node?) { if (node is RecordDeclaration) { for (t in typeManager.firstOrderTypesMap.values.flatten()) { if (t.name == node.name && t is ObjectType) { @@ -130,14 +131,21 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { } fun resolveFirstOrderTypes() { + var allTypes = typeManager.firstOrderTypesMap.values.flatten().sortedBy { it.name } + + log.info("Resolving {} first order type objects", allTypes.size) + for (type in typeManager.firstOrderTypesMap.values.flatten().sortedBy { it.name }) { - if (type is ObjectType && type.typeOrigin == Type.Origin.UNRESOLVED) { - resolveType(type) - } else if ( - type.typeOrigin == Type.Origin.RESOLVED || type.typeOrigin == Type.Origin.GUESSED + if ( + type is ObjectType && type.resolutionState == Type.ResolutionState.UNRESOLVED || + type.resolutionState == Type.ResolutionState.GUESSED ) { - // This will most likely only affect built-in types (and some left over from the - // Java legacy type stuff). They are resolved by default, + // Try to resolve all UNRESOLVED types. Also try to resolve GUESSED types. GUESSED + // is not really used anymore, except in the java frontend, which needs a more or + // less complete type-rewrite. Once that is done, we can remove the GUESSED state. + resolveType(type) + } else if (type.resolutionState == Type.ResolutionState.RESOLVED) { + // This will most likely only affect built-in types. They are resolved by default, // and we want to make sure that they end up in the resolve first order list ctx.typeManager.markAsResolved(type) } diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt index ca9fd766f0..3e1b661e95 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt @@ -1690,13 +1690,13 @@ internal class CXXLanguageFrontendTest : BaseTest() { assertNotNull(result) // There should be no type "string" anymore, only "std::string" - assertFalse(result.finalCtx.typeManager.resolvedTypeExists("string")) - assertTrue(result.finalCtx.typeManager.resolvedTypeExists("std::string")) + assertFalse(result.finalCtx.typeManager.typeExists("string")) + assertTrue(result.finalCtx.typeManager.typeExists("std::string")) // the same applies to "inner::secret" - assertFalse(result.finalCtx.typeManager.resolvedTypeExists("secret")) - assertFalse(result.finalCtx.typeManager.resolvedTypeExists("inner::secret")) - assertTrue(result.finalCtx.typeManager.resolvedTypeExists("std::inner::secret")) + assertFalse(result.finalCtx.typeManager.typeExists("secret")) + assertFalse(result.finalCtx.typeManager.typeExists("inner::secret")) + assertTrue(result.finalCtx.typeManager.typeExists("std::inner::secret")) } @Test diff --git a/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/ExpressionHandler.kt b/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/ExpressionHandler.kt index 8a389e1a58..4d528f8fd4 100644 --- a/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/ExpressionHandler.kt +++ b/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/ExpressionHandler.kt @@ -31,7 +31,6 @@ import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.scopes.NameScope import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.types.ObjectType -import de.fraunhofer.aisec.cpg.graph.types.PointerType import de.fraunhofer.aisec.cpg.graph.types.Type import java.math.BigInteger @@ -280,7 +279,7 @@ class ExpressionHandler(frontend: GoLanguageFrontend) : val type = frontend.typeOf(callExpr.args[0]) // new is a pointer, so need to reference the type with a pointer - n.type = type.reference(PointerType.PointerOrigin.POINTER) + n.type = type.pointer() // a new expression also needs an initializer, which is usually a ConstructExpression val construct = newConstructExpression(rawNode = callExpr) diff --git a/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/GoLanguage.kt b/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/GoLanguage.kt index b1f17ca3bd..513b28480e 100644 --- a/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/GoLanguage.kt +++ b/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/GoLanguage.kt @@ -27,6 +27,7 @@ package de.fraunhofer.aisec.cpg.frontends.golang import de.fraunhofer.aisec.cpg.frontends.* import de.fraunhofer.aisec.cpg.graph.declarations.ParameterDeclaration +import de.fraunhofer.aisec.cpg.graph.pointer import de.fraunhofer.aisec.cpg.graph.primitiveType import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator import de.fraunhofer.aisec.cpg.graph.statements.expressions.Literal @@ -152,9 +153,7 @@ class GoLanguage : // This makes lambda expression works, as long as we have the dedicated a // FunctionPointerType if (type is FunctionPointerType && targetType.underlyingType is FunctionType) { - return if ( - type == targetType.underlyingType?.reference(PointerType.PointerOrigin.POINTER) - ) { + return if (type == targetType.underlyingType?.pointer()) { DirectMatch } else { CastNotPossible diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/DeclarationHandler.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/DeclarationHandler.kt index 27c44a126d..39f8d11e81 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/DeclarationHandler.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/DeclarationHandler.kt @@ -260,7 +260,7 @@ open class DeclarationHandler(lang: JavaLanguageFrontend) : type = frontend.typeOf(variable.type) } else { type = this.objectType(t) - type.typeOrigin = Type.Origin.GUESSED + type.resolutionState = Type.ResolutionState.GUESSED } } catch (e: UnsupportedOperationException) { val t = frontend.recoverTypeFromUnsolvedException(e) @@ -269,7 +269,7 @@ open class DeclarationHandler(lang: JavaLanguageFrontend) : type = frontend.typeOf(variable.type) } else { type = this.objectType(t) - type.typeOrigin = Type.Origin.GUESSED + type.resolutionState = Type.ResolutionState.GUESSED } } catch (e: IllegalArgumentException) { val t = frontend.recoverTypeFromUnsolvedException(e) @@ -278,7 +278,7 @@ open class DeclarationHandler(lang: JavaLanguageFrontend) : type = frontend.typeOf(variable.type) } else { type = this.objectType(t) - type.typeOrigin = Type.Origin.GUESSED + type.resolutionState = Type.ResolutionState.GUESSED } } val fieldDeclaration = diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/ExpressionHandler.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/ExpressionHandler.kt index 063d163c2a..c4461ee7b8 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/ExpressionHandler.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/ExpressionHandler.kt @@ -590,7 +590,7 @@ class ExpressionHandler(lang: JavaLanguageFrontend) : t = unknownType() } else { t = this.objectType(typeString) - t.typeOrigin = Type.Origin.GUESSED + t.resolutionState = Type.ResolutionState.GUESSED } val declaredReferenceExpression = newReference(name, t, rawNode = nameExpr) val recordDeclaration = frontend.scopeManager.currentRecord diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguageFrontend.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguageFrontend.kt index 78609f9b7e..01c9284853 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguageFrontend.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguageFrontend.kt @@ -387,7 +387,8 @@ open class JavaLanguageFrontend(language: Language, ctx: T if (!searchType.isClassOrInterfaceType || context == null) { log.warn("Unable to resolve type for {}", type.asString()) val returnType = this.typeOf(type) - returnType.typeOrigin = de.fraunhofer.aisec.cpg.graph.types.Type.Origin.GUESSED + returnType.resolutionState = + de.fraunhofer.aisec.cpg.graph.types.Type.ResolutionState.GUESSED return returnType } val clazz = searchType.asClassOrInterfaceType() @@ -409,12 +410,14 @@ open class JavaLanguageFrontend(language: Language, ctx: T parseName(o.get().nameAsString + language.namespaceDelimiter + returnType.name) } - returnType.typeOrigin = de.fraunhofer.aisec.cpg.graph.types.Type.Origin.GUESSED + returnType.resolutionState = + de.fraunhofer.aisec.cpg.graph.types.Type.ResolutionState.GUESSED return returnType } log.warn("Unable to resolve type for {}", type.asString()) val returnType = this.typeOf(type) - returnType.typeOrigin = de.fraunhofer.aisec.cpg.graph.types.Type.Origin.GUESSED + returnType.resolutionState = + de.fraunhofer.aisec.cpg.graph.types.Type.ResolutionState.GUESSED return returnType } diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/StatementHandler.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/StatementHandler.kt index df320075fe..e9e0c6425e 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/StatementHandler.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/StatementHandler.kt @@ -526,7 +526,7 @@ class StatementHandler(lang: JavaLanguageFrontend?) : // we do not know which of the exceptions was actually thrown, so we assume this might // be any concreteType = this.objectType("java.lang.Throwable") - concreteType.typeOrigin = Type.Origin.GUESSED + concreteType.resolutionState = Type.ResolutionState.GUESSED } else { concreteType = frontend.getTypeAsGoodAsPossible(catchCls.parameter.type) possibleTypes.add(concreteType) diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt index d95b108275..44514939b9 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExternalTypeHierarchyResolver.kt @@ -84,7 +84,7 @@ class JavaExternalTypeHierarchyResolver(ctx: TranslationContext) : ComponentPass // Otherwise, we can create this in the global scope if (superType == null) { superType = provider.objectType(anc.qualifiedName) - superType.typeOrigin = Type.Origin.RESOLVED + superType.resolutionState = Type.ResolutionState.RESOLVED } // Add all resolved supertypes to the type. From 0a248e89bbf2f9396ce1ede69be6d7a9cd89ce78 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Sat, 28 Sep 2024 18:26:13 +0200 Subject: [PATCH 10/19] Removing extra AST traversal in type resolver --- .../fraunhofer/aisec/cpg/graph/types/Type.kt | 2 -- .../aisec/cpg/passes/TypeResolver.kt | 18 +----------------- .../frontends/cxx/CXXLanguageFrontendTest.kt | 10 +++++----- .../aisec/cpg/frontends/golang/GoLanguage.kt | 5 +++-- 4 files changed, 9 insertions(+), 26 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt index 57f5817f77..80f8dee921 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt @@ -142,8 +142,6 @@ abstract class Type : Node { * @param pointer Reason for the reference (array of pointer) * @return Returns a reference to the current Type. E.g. when creating a pointer to an existing * ObjectType - * - * */ // TODO(oxisto): Make this internal, but some tests still use it /*internal*/ abstract fun reference(pointer: PointerOrigin?): Type diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index a3962ac9cd..c491afb69c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -46,10 +46,6 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { override fun accept(component: Component) { resolveFirstOrderTypes() refreshNames() - - walker = SubgraphWalker.ScopedWalker(scopeManager) - walker.registerHandler(::legacyHandleNode) - walker.iterate(component) } private fun refreshNames() { @@ -114,18 +110,6 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - /** This piece of code is completely unnecessary, since we already set the */ - private fun legacyHandleNode(node: Node?) { - if (node is RecordDeclaration) { - for (t in typeManager.firstOrderTypesMap.values.flatten()) { - if (t.name == node.name && t is ObjectType) { - // The node is the class of the type t - t.recordDeclaration = node - } - } - } - } - override fun cleanup() { // Nothing to do } @@ -135,7 +119,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { log.info("Resolving {} first order type objects", allTypes.size) - for (type in typeManager.firstOrderTypesMap.values.flatten().sortedBy { it.name }) { + for (type in allTypes) { if ( type is ObjectType && type.resolutionState == Type.ResolutionState.UNRESOLVED || type.resolutionState == Type.ResolutionState.GUESSED diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt index 3e1b661e95..ca9fd766f0 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt @@ -1690,13 +1690,13 @@ internal class CXXLanguageFrontendTest : BaseTest() { assertNotNull(result) // There should be no type "string" anymore, only "std::string" - assertFalse(result.finalCtx.typeManager.typeExists("string")) - assertTrue(result.finalCtx.typeManager.typeExists("std::string")) + assertFalse(result.finalCtx.typeManager.resolvedTypeExists("string")) + assertTrue(result.finalCtx.typeManager.resolvedTypeExists("std::string")) // the same applies to "inner::secret" - assertFalse(result.finalCtx.typeManager.typeExists("secret")) - assertFalse(result.finalCtx.typeManager.typeExists("inner::secret")) - assertTrue(result.finalCtx.typeManager.typeExists("std::inner::secret")) + assertFalse(result.finalCtx.typeManager.resolvedTypeExists("secret")) + assertFalse(result.finalCtx.typeManager.resolvedTypeExists("inner::secret")) + assertTrue(result.finalCtx.typeManager.resolvedTypeExists("std::inner::secret")) } @Test diff --git a/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/GoLanguage.kt b/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/GoLanguage.kt index 513b28480e..b1f17ca3bd 100644 --- a/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/GoLanguage.kt +++ b/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/golang/GoLanguage.kt @@ -27,7 +27,6 @@ package de.fraunhofer.aisec.cpg.frontends.golang import de.fraunhofer.aisec.cpg.frontends.* import de.fraunhofer.aisec.cpg.graph.declarations.ParameterDeclaration -import de.fraunhofer.aisec.cpg.graph.pointer import de.fraunhofer.aisec.cpg.graph.primitiveType import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator import de.fraunhofer.aisec.cpg.graph.statements.expressions.Literal @@ -153,7 +152,9 @@ class GoLanguage : // This makes lambda expression works, as long as we have the dedicated a // FunctionPointerType if (type is FunctionPointerType && targetType.underlyingType is FunctionType) { - return if (type == targetType.underlyingType?.pointer()) { + return if ( + type == targetType.underlyingType?.reference(PointerType.PointerOrigin.POINTER) + ) { DirectMatch } else { CastNotPossible From dc7a9974ce6b87fc8a154fbb74fe76077be5d235 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Sat, 28 Sep 2024 18:33:18 +0200 Subject: [PATCH 11/19] Added benchmark --- .../main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt index dae0917699..1bc8983c5b 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt @@ -309,6 +309,8 @@ private constructor( // We want to merge everything into the final scope manager of the result globalCtx.scopeManager.mergeFrom(parallelContexts.map { it.scopeManager }) + var b = + Benchmark(TranslationManager::class.java, "Updating global scopes in all types", true) // We also need to update all types that point to one of the "old" global scopes // TODO(oxisto): This is really messy and instead we should have ONE global scope // and individual file scopes beneath it @@ -322,6 +324,7 @@ private constructor( it.scope = newGlobalScope } } + b.stop() log.info("Parallel parsing completed") From b0fab7887b496e859352cd5e090142562a1ea5a3 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Mon, 30 Sep 2024 18:36:06 +0200 Subject: [PATCH 12/19] Added scopes and symbols docs --- docs/docs/CPG/impl/scopes.md | 70 +++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/docs/docs/CPG/impl/scopes.md b/docs/docs/CPG/impl/scopes.md index 9fafc1ea83..79543aa42c 100755 --- a/docs/docs/CPG/impl/scopes.md +++ b/docs/docs/CPG/impl/scopes.md @@ -11,5 +11,73 @@ description: > --- -# Implementation and Concepts: Scopes and Scope Manger +# Scopes and Symbols +The concept of scopes and symbols are at the heart of every programming language and thus are also the core of static analysis. Both concepts consist in the CPG library through the types `Scope` and `Symbol` respectively. A `Symbol` is just a typealias for a string. + +A "symbol" can be seen as an identifier in most programming languages, referring to variables or functions. Symbols are often grouped in scopes, which defines the visibility of a symbol, e.g. a slice of a program that can "see" the symbol. Often this is also synonymous with the life-time of a variable, e.g., that its memory will be freed (or collected by a garbage collector) once it goes "out of scope". + +```c +// This defines a symbol "a" in the global/file scope. +// Its visibility is global within the file. +int a = 1; + +int main() { + // this defines another symbol "a" in a function/block scope. + // Its visibility is limited to the block it is defined in. + int a = 1; +} +``` + +Usually symbols declared in a local scope override the declaration of a symbol in a higher (e.g., global scope), which is also referred to as "shadowing". This needs to be taken into account when resolving symbols to their declarations. + +The `Scope` class holds all its symbols in the `Scope::symbols` property. More specifically, this property is a `SymbolMap`, which is a type alias to a map, whose key type is a `Symbol` and whose value type is a list of `Declaration` nodes. This is basically a symbol lookup table for all symbols in its scope. It is a map of a list because some programming languages have concepts like function overloading, which leads to the declaration of multiple `FunctionDeclaration` nodes under the same symbol in one scope. + +For a frontend or pass developer, the main interaction point with scopes and symbols is through the `ScopeManager`. The scope manager is available to all nodes via the `TranslationContext` and also injected in frontend, handlers and passes. + +## Hierarchy of Scopes + +Each scope (except the `GlobalScope`) can have a parent and possible child scopes. This can be used to model a hierarchy of scopes within a program. For example using the snippet above, the following scopes are defined in the CPG: + +* A `GlobalScope` that comprises the whole file +* A `FunctionScope` that comprises the function `main` +* A `BlockScope` that comprises the function body + +Note, that each programming language is different when it comes to scoping and this needs to be thought of by a frontend developer. For example in C/C++ each block introduced by `{}` introduces a new scope and variables can be declared only for such a block, meaning that each `for`, `if` and other statements also introduce a new scope. In contrast, Python only differentiates between a global scope, function and class scope. + +## Defining Scopes and Declaring Symbols + +In order to define new scopes, the `ScopeManager` offers two main APIs: + +* `enterScope(node)`, which specifies that `node` will declare a new scope and that an appropriate `Scope` (or derived type) will be created +* `leaveScope(node)`, which closes the scope again + +It is important that every opened scope must also be closed again. When scopes are nested, they also need to be closed in reverse order. + +```Kotlin +// We are inside the global scope here and want to create a new function +var func = newFunctionDeclaration("main") + +// Create a function scope +scopeManager.enterScope(func) + +// Create a block scope for the body because our language works this way +var body = newBlock() +func.body = body +scopeManager.enterScope(body) + +// Add statements here +body.statements += /* ... */ + +// Leave block scope +scopeManager.leaveScope(body) + +// Back to global scope, add the function to global scope +scopeManager.leaveScope(func) +scopeManager.addDeclaration(func) +``` + +Inside the scope, declarations can be added with `ScopeManager::addDeclaration`. This takes care of adding the declaration to an appropriate place in the AST (which beyond the scope of this document) and also adds the `Declaration` to the `Scope` under the appropriate `Symbol`. + + +## Looking up Symbols From 9637f233b214504d5eb3ae1a5a4e65d4f9b7b36b Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Mon, 30 Sep 2024 20:47:03 +0200 Subject: [PATCH 13/19] scope docs first draft finished --- docs/docs/CPG/impl/scopes.md | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/docs/CPG/impl/scopes.md b/docs/docs/CPG/impl/scopes.md index 79543aa42c..7d0f838a1d 100755 --- a/docs/docs/CPG/impl/scopes.md +++ b/docs/docs/CPG/impl/scopes.md @@ -13,7 +13,7 @@ description: > # Scopes and Symbols -The concept of scopes and symbols are at the heart of every programming language and thus are also the core of static analysis. Both concepts consist in the CPG library through the types `Scope` and `Symbol` respectively. A `Symbol` is just a typealias for a string. +The concept of scopes and symbols are at the heart of every programming language and thus are also the core of static analysis. Both concepts consist in the CPG library through the types `Scope` and `Symbol` respectively. A "symbol" can be seen as an identifier in most programming languages, referring to variables or functions. Symbols are often grouped in scopes, which defines the visibility of a symbol, e.g. a slice of a program that can "see" the symbol. Often this is also synonymous with the life-time of a variable, e.g., that its memory will be freed (or collected by a garbage collector) once it goes "out of scope". @@ -31,7 +31,7 @@ int main() { Usually symbols declared in a local scope override the declaration of a symbol in a higher (e.g., global scope), which is also referred to as "shadowing". This needs to be taken into account when resolving symbols to their declarations. -The `Scope` class holds all its symbols in the `Scope::symbols` property. More specifically, this property is a `SymbolMap`, which is a type alias to a map, whose key type is a `Symbol` and whose value type is a list of `Declaration` nodes. This is basically a symbol lookup table for all symbols in its scope. It is a map of a list because some programming languages have concepts like function overloading, which leads to the declaration of multiple `FunctionDeclaration` nodes under the same symbol in one scope. +The `Scope` class holds all its symbols in the `Scope::symbols` property. More specifically, this property is a `SymbolMap`, which is a type alias to a map, whose key type is a `Symbol` and whose value type is a list of `Declaration` nodes. This is basically a symbol lookup table for all symbols in its scope. It is a map of a list because some programming languages have concepts like function overloading, which leads to the declaration of multiple `FunctionDeclaration` nodes under the same symbol in one scope. In the current implementation, a `Symbol` is just a typealias for a string, and it is always "local" to the scope, meaning that it MUST NOT contain any qualifier. If you want to refer to a fully qualified identifier, a `Name` must be used. In the future, we might consider merging the concepts of `Symbol` and `Name`. For a frontend or pass developer, the main interaction point with scopes and symbols is through the `ScopeManager`. The scope manager is available to all nodes via the `TranslationContext` and also injected in frontend, handlers and passes. @@ -81,3 +81,24 @@ Inside the scope, declarations can be added with `ScopeManager::addDeclaration`. ## Looking up Symbols + +During different analysis steps, e.g., in different passes, we want to find certain symbols or lookup the declaration(s) belonging to a particular symbol. There are two functions in order to do so - a "higher" level concept in the `ScopeManager` and a "lower" level function on the `Scope` itself. + +The lower level one is called `Scope::lookupSymbol` and can be used to retrieve a list of `Declaration` nodes that belong to a particular `Symbol` that is "visible" the scope. It does so by first looking through its own `Scope::symbols`. If no match was found, the scope is traversed upwards to its `Scope::parent`, until a match is found. Furthermore, additional logic is needed to resolve symbol that are pointing to another scope, e.g., because they represent an `ImportDeclaration`. + +```Kotlin +var scope = /* ... */ +var declarations = scope.lookupSymbol("a") { + // Some additional predicate if we want +} +``` + +Additionally, the lookup can be fine-tuned by an additional predicate. However, this should be used carefully as it restricts the possible list of symbols very early. In most cases the list of symbols should be quite exhaustive at first to find all possible candidates and then selecting the best candidate in a second step (e.g., based on argument types for a function call). + +While the aforementioned API works great if we already have a specific start scope and local `Symbol`, we often start our resolution process with a `Name` -- which could potentially be qualified, such as `std::string`. Therefore, the "higher level" function `ScopeManager::findSymbols` can be used to retrieve a list of candidate declarations by a given `Name`. In a first step, the name is checked for a potential scope qualifier (`std` in this example). If present, it is extracted and the search scope is set to it. This is what is usually referred to as a "qualified lookup". Otherwise, the local part of the name is used to start the lookup, in what is called an "unqualified lookup". In both cases, the actual lookup is delegated to `ScopeManager::lookupSymbols`, but with different parameters. + +```Kotlin +var name = parseName("std::string") +// This will return all the 'string' symbols within the 'std' name scope +var stringSymbols = scopeManager.findSymbols(name) +``` From 24cde3d3e05b2c11b2155cb182929651d6ae992c Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 1 Oct 2024 00:29:59 +0200 Subject: [PATCH 14/19] Added docs for symbol resolver --- .../aisec/cpg/passes/SymbolResolver.kt | 36 ++++++++++++++---- docs/docs/CPG/impl/index.md | 1 + docs/docs/CPG/impl/scopes.md | 6 ++- docs/docs/CPG/impl/symbol-resolver.md | 38 +++++++++++++++++++ docs/mkdocs.yaml | 3 +- 5 files changed, 74 insertions(+), 10 deletions(-) create mode 100644 docs/docs/CPG/impl/symbol-resolver.md diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index 0032c9eabd..3a4edeea87 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -171,7 +171,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // identifier in Go) if ( language is HasAnonymousIdentifier && - current.name.localName == language.anonymousIdentifier + current.name.localName == language.anonymousIdentifier ) { return } @@ -180,6 +180,11 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // resolution, but in future this will also be used in resolving regular references. current.candidates = scopeManager.findSymbols(current.name, current.location).toSet() + // Preparation for a future without legacy call resolving. Taking the first candidate is not + // ideal since we are running into an issue with function pointers here (see workaround + // below). + var wouldResolveTo = current.candidates.singleOrNull() + // For now, we need to ignore reference expressions that are directly embedded into call // expressions, because they are the "callee" property. In the future, we will use this // property to actually resolve the function call. However, there is a special case that @@ -189,21 +194,38 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // of this call expression back to its original variable declaration. In the future, we want // to extend this particular code to resolve all callee references to their declarations, // i.e., their function definitions and get rid of the separate CallResolver. - var wouldResolveTo: Declaration? = null if (current.resolutionHelper is CallExpression) { // Peek into the declaration, and if it is only one declaration and a variable, we can // proceed normally, as we are running into the special case explained above. Otherwise, // we abort here (for now). - wouldResolveTo = current.candidates.singleOrNull() if (wouldResolveTo !is VariableDeclaration && wouldResolveTo !is ParameterDeclaration) { return } } + // Some stupid C++ workaround to use the legacy call resolver when we try to resolve targets + // for function pointers. At least we are only invoking the legacy resolver for a very small + // percentage of references now. + if (wouldResolveTo is FunctionDeclaration) { + // We need to invoke the legacy resolver, just to be sure + var legacy = scopeManager.resolveReference(current) + + // This is just for us to catch these differences in symbol resolving in the future. The + // difference is pretty much only that the legacy system takes parameters of the + // function-pointer-type into account and the new system does not (yet), because it just + // takes the first match. This will be needed to solve in the future. + if (legacy != wouldResolveTo) { + log.warn( + "The legacy symbol resolution and the new system produced different results here. This needs to be investigated in the future. For now, we take the legacy result." + ) + wouldResolveTo = legacy + } + } + // Only consider resolving, if the language frontend did not specify a resolution. If we // already have populated the wouldResolveTo variable, we can re-use this instead of // resolving again - var refersTo = current.refersTo ?: wouldResolveTo ?: scopeManager.resolveReference(current) + var refersTo = current.refersTo ?: wouldResolveTo var recordDeclType: Type? = null if (currentClass != null) { @@ -218,9 +240,9 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // only add new nodes for non-static unknown if ( refersTo == null && - !current.isStaticAccess && - recordDeclType != null && - recordDeclType.recordDeclaration != null + !current.isStaticAccess && + recordDeclType != null && + recordDeclType.recordDeclaration != null ) { // Maybe we are referring to a field instead of a local var val field = resolveMember(recordDeclType, current) diff --git a/docs/docs/CPG/impl/index.md b/docs/docs/CPG/impl/index.md index 9359f148d5..dc0b49f7db 100755 --- a/docs/docs/CPG/impl/index.md +++ b/docs/docs/CPG/impl/index.md @@ -24,3 +24,4 @@ the graph. These two stages are strictly separated one from each other. * [Languages and Language Frontends](./language) * [Scopes](./scopes) * [Passes](./passes) +* [Symbol Resolution](./symbol-resolver.md) diff --git a/docs/docs/CPG/impl/scopes.md b/docs/docs/CPG/impl/scopes.md index 7d0f838a1d..83fa49f153 100755 --- a/docs/docs/CPG/impl/scopes.md +++ b/docs/docs/CPG/impl/scopes.md @@ -1,6 +1,6 @@ --- -title: "Implementation and Concepts - Scopes" -linkTitle: "Implementation and Concepts - Scopes" +title: "Implementation and Concepts - Scopes and Symbols" +linkTitle: "Implementation and Concepts - Scopes and Symbols" weight: 20 no_list: false menu: @@ -102,3 +102,5 @@ var name = parseName("std::string") // This will return all the 'string' symbols within the 'std' name scope var stringSymbols = scopeManager.findSymbols(name) ``` + +Developers should avoid symbol lookup during frontend parsing, since often during parsing, only a limited view of all symbols is available. Instead, a dedicated pass that is run on the complete translation result is the preferred option. Apart from that, the main usage of this API is in the [SymbolResolver](symbol-resolver.md). \ No newline at end of file diff --git a/docs/docs/CPG/impl/symbol-resolver.md b/docs/docs/CPG/impl/symbol-resolver.md new file mode 100644 index 0000000000..c052e6b69f --- /dev/null +++ b/docs/docs/CPG/impl/symbol-resolver.md @@ -0,0 +1,38 @@ +--- +title: "Implementation and Concepts - Symbol Resolution" +linkTitle: "Implementation and Concepts - Symbol Resolution" +weight: 20 +no_list: false +menu: + main: + weight: 20 +description: > + The CPG library is a language-agnostic graph representation of source code. +--- + + +# Symbol Resolution + +This pages describes the main functionality behind symbol resolution in the CPG library. This is mostly done by the `SymbolResolver` pass, in combination with the symbol lookup API (see [Scopes and Symbols](scopes.md#looking-up-symbols)). In addition to the *lookup* of a symbol, the *resolution* takes the input of the lookup and provides a "definite" decision which symbol is used. This mostly referred to symbols / names used in a `Reference` or a `CallExpression` (which also has a reference as its `CallExpression::callee`). + +## The `SymbolResolver` Pass + +The `SymbolResolver` pass takes care of the heavy lifting of symbol (or rather reference) resolving: + +* It sets the `Reference::refersTo` property, +* and sets the `CallExpression::invokes` property, +* and finally takes cares of operator overloading (if the language supports it). + +In a way, it can be compared to a linker step in a compiler. The pass operates on a single `Component` and starts by identifying EOG starter nodes within the component. These node "start" an EOG sub-graph, i.e., they do not have any previous EOG edges. The symbol resolver uses the `ScopedWalker` with a special set-up that traverses the EOG starting with each EOG starter node until it reaches the end. This ensures that symbols are resolved in the correct order of "evaluation", e.g., that a base of a member expression is resolved before the expression itself. This ensures that necessary type information on the base are available in order to resolve appropriate fields of the member expression. + +The symbol resolver itself has gone through many re-writes over the years and there is still some code left that we consider *legacy*. These functions are marked as such, and we aim to remove them slowly. + +## Resolving References + +The main functionality lies in `ScopeManager::handleReference`. For all `Reference` nodes (that are not `MemberExpression` nodes) we use the symbol lookup API to find declaration candidates for the name the reference is referring to. This candidate list is then stored in `Reference::candidates`. If the reference is the `CallExpression::callee` property of a call, we abort here and jump to [Resolve Calls](#resolve-calls). + +Otherwise, we currently take the first entry of the candidate list and set the `Reference::refersTo` property to it. + +## Resolve Calls + +Prequisite: The `CallExpression::callee` reference must have been resolved (see [Resolving References](#resolving-references)). \ No newline at end of file diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml index b6a535732d..e73c12e752 100755 --- a/docs/mkdocs.yaml +++ b/docs/mkdocs.yaml @@ -167,8 +167,9 @@ nav: - "Implementation": - CPG/impl/index.md - "Language Frontends": CPG/impl/language.md - - "Scopes": CPG/impl/scopes.md + - "Scopes and Symbols": CPG/impl/scopes.md - "Passes": CPG/impl/passes.md + - "Symbol Resolution": CPG/impl/symbol-resolver.md - "Contributing": - "Contributing to the CPG library": Contributing/index.md # This assumes that the most recent dokka build was generated with the "main" tag! From 3d30cdbedcb257cef1daa1fcb57b1ce3f0ab8be6 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 1 Oct 2024 23:15:13 +0200 Subject: [PATCH 15/19] More cleanup of handleReference --- .../aisec/cpg/frontends/LanguageTraits.kt | 9 + .../aisec/cpg/passes/InferenceHelpers.kt | 226 ++++++++++++++++ .../aisec/cpg/passes/SymbolResolver.kt | 248 +++--------------- .../aisec/cpg/passes/TypeResolver.kt | 1 + .../aisec/cpg/frontends/cxx/CPPLanguage.kt | 6 +- .../aisec/cpg/frontends/java/JavaLanguage.kt | 5 +- .../cpg/passes/PythonAddDeclarationsPass.kt | 29 +- .../frontends/python/PythonFrontendTest.kt | 6 +- .../aisec/cpg_vis_neo4j/Application.kt | 4 +- 9 files changed, 294 insertions(+), 240 deletions(-) create mode 100644 cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt index beb36b3661..dee23cc697 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt @@ -123,6 +123,15 @@ interface HasSuperClasses : LanguageTrait { ): Boolean } +/** + * A language trait, that specifies that this language has support for implicit receiver, e.g., that + * one can omit references to a base such as `this`. + */ +interface HasImplicitReceiver : LanguageTrait { + + val receiverName: String +} + /** * A language trait, that specifies that this language has certain qualifiers. If so, we should * consider them when parsing the types. diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt new file mode 100644 index 0000000000..2a5a7d82b8 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +import de.fraunhofer.aisec.cpg.InferenceConfiguration +import de.fraunhofer.aisec.cpg.TranslationContext +import de.fraunhofer.aisec.cpg.frontends.HasGlobalVariables +import de.fraunhofer.aisec.cpg.frontends.HasImplicitReceiver +import de.fraunhofer.aisec.cpg.frontends.HasStructs +import de.fraunhofer.aisec.cpg.graph.Name +import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.declarations.Declaration +import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.NamespaceDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.ValueDeclaration +import de.fraunhofer.aisec.cpg.graph.newFieldDeclaration +import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope +import de.fraunhofer.aisec.cpg.graph.scopes.NameScope +import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope +import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import de.fraunhofer.aisec.cpg.graph.types.ObjectType +import de.fraunhofer.aisec.cpg.graph.types.Type +import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration +import de.fraunhofer.aisec.cpg.graph.unknownType +import de.fraunhofer.aisec.cpg.passes.Pass.Companion.log +import de.fraunhofer.aisec.cpg.passes.SymbolResolver +import de.fraunhofer.aisec.cpg.passes.TypeResolver +import de.fraunhofer.aisec.cpg.passes.inference.Inference.TypeInferenceObserver +import de.fraunhofer.aisec.cpg.passes.inference.startInference +import kotlin.collections.forEach + +internal fun TranslationContext.tryNamespaceInference( + name: Name, + locationHint: Node? +): NamespaceDeclaration? { + return scopeManager.globalScope + ?.astNode + ?.startInference(this) + ?.inferNamespaceDeclaration(name, null, locationHint) +} + +/** + * Tries to infer a [RecordDeclaration] from an unresolved [Type]. This will return `null`, if + * inference was not possible, or if it was turned off in the [InferenceConfiguration]. + * + * If [updateType] is set to true, also the [ObjectType.recordDeclaration] is adjusted. This is only + * needed if we call this function in the [SymbolResolver] (and not in the [TypeResolver]). + */ +internal fun TranslationContext.tryRecordInference( + type: Type, + locationHint: Node? = null, + updateType: Boolean = false, +): RecordDeclaration? { + val kind = + if (type.language is HasStructs) { + "struct" + } else { + "class" + } + // Determine the scope where we want to start our inference + var (scope, _) = scopeManager.extractScope(type) + + if (scope !is NameScope) { + scope = null + } + + var holder = scope?.astNode + + // If we could not find a scope, but we have an FQN, we can try to infer a namespace (or a + // parent record) + var parentName = type.name.parent + if (scope == null && parentName != null) { + // At this point, we need to check whether we have any type reference to our parent + // name. If we have (e.g. it is used in a function parameter, variable, etc.), then we + // have a high chance that this is actually a parent record and not a namespace + var parentType = typeManager.lookupResolvedType(parentName) + holder = + if (parentType != null) { + tryRecordInference(parentType, locationHint = locationHint) + } else { + tryNamespaceInference(parentName, locationHint = locationHint) + } + } + + val record = + (holder ?: this.scopeManager.globalScope?.astNode) + ?.startInference(this) + ?.inferRecordDeclaration(type, kind, locationHint) + + // update the type's record. Because types are only unique per scope, we potentially need to + // update multiple type nodes, i.e., all type nodes whose FQN match the inferred record + if (updateType && record != null) { + typeManager.firstOrderTypesMap[record.name.toString()]?.forEach { + it.recordDeclaration = record + } + } + + return record +} + +/** + * Tries to infer a global variable from an unresolved [Reference]. This will return `null`, if + * inference was not possible, or if it was turned off in the [InferenceConfiguration]. + * + * Currently, this can only infer variables in the [GlobalScope], but not in a namespace. + */ +internal fun TranslationContext.tryGlobalVariableInference(ref: Reference): Declaration? { + // For now, we only infer globals at the top-most global level, i.e., no globals in + // namespaces + if (ref.name.isQualified()) { + val (scope, _) = scopeManager.extractScope(ref, null) + when (scope) { + is NameScope -> { + log.warn( + "We should infer a namespace variable ${ref.name} at this point, but this is not yet implemented." + ) + return null + } + else -> { + log.warn( + "We should infer a variable ${ref.name} in ${scope}, but this is not yet implemented." + ) + return null + } + } + } + + if (ref.language !is HasGlobalVariables) { + return null + } + + // Forward this to our inference system. This will also check whether and how inference is + // configured. + return scopeManager.globalScope?.astNode?.startInference(this)?.inferVariableDeclaration(ref) +} + +/** + * Tries to infer a [FieldDeclaration] from an unresolved [MemberExpression] or [Reference] (if the + * language has [HasImplicitReceiver]). This will return `null`, if inference was not possible, or + * if it was turned off in the [InferenceConfiguration]. + */ +internal fun TranslationContext.tryFieldInference( + ref: Reference, + containingClass: ObjectType +): ValueDeclaration? { + // This is a little bit of a workaround, but at least this makes sure we are not inferring a + // record, where a namespace already exist + val (scope, _) = scopeManager.extractScope(ref, null) + val shouldInfer = + if (scope == null) { + true + } else { + // Workaround needed for Java? + when (scope) { + is RecordScope -> true + else -> false + } + } + if (!shouldInfer) { + return null + } + + val name = ref.name + + var record = containingClass.recordDeclaration + if (record == null) { + // We access an unknown field of an unknown record. so we need to handle that along the + // way as well + record = tryRecordInference(containingClass, locationHint = ref, updateType = true) + } + + if (record == null) { + log.error( + "There is no matching record in the record map. Can't identify which field is used." + ) + return null + } + + val target = record.fields.firstOrNull { it.name.lastPartsMatch(name) } + + return if (target != null) { + target + } else { + val declaration = + ref.newFieldDeclaration( + name.localName, + // we will set the type later through the type inference observer + record.unknownType(), + listOf(), + null, + false, + ) + record.addField(declaration) + declaration.language = record.language + declaration.isInferred = true + + // We might be able to resolve the type later (or better), if a type is + // assigned to our reference later + ref.registerTypeObserver(TypeInferenceObserver(declaration)) + + declaration + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index 3a4edeea87..6cdd1b42e7 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -31,7 +31,6 @@ import de.fraunhofer.aisec.cpg.frontends.* import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.scopes.NameScope -import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope import de.fraunhofer.aisec.cpg.graph.scopes.Symbol import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.types.* @@ -39,13 +38,15 @@ import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.ScopedWalker import de.fraunhofer.aisec.cpg.helpers.Util import de.fraunhofer.aisec.cpg.helpers.replace import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn -import de.fraunhofer.aisec.cpg.passes.inference.Inference.TypeInferenceObserver import de.fraunhofer.aisec.cpg.passes.inference.inferFunction import de.fraunhofer.aisec.cpg.passes.inference.inferMethod import de.fraunhofer.aisec.cpg.passes.inference.startInference import de.fraunhofer.aisec.cpg.processing.strategy.Strategy import org.slf4j.Logger import org.slf4j.LoggerFactory +import tryFieldInference +import tryGlobalVariableInference +import tryRecordInference /** * Creates new connections between the place where a variable is declared and where it is used. @@ -171,11 +172,17 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // identifier in Go) if ( language is HasAnonymousIdentifier && - current.name.localName == language.anonymousIdentifier + current.name.localName == language.anonymousIdentifier ) { return } + // Ignore references to "super" if the language has super expressions, because they will be + // handled separately in handleMemberExpression + if (language is HasSuperClasses && current.name.localName == language.superClassKeyword) { + return + } + // Find a list of candidate symbols. Currently, this is only used the in the "next-gen" call // resolution, but in future this will also be used in resolving regular references. current.candidates = scopeManager.findSymbols(current.name, current.location).toSet() @@ -237,33 +244,23 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { refersTo = resolveMethodFunctionPointer(current, helperType) } - // only add new nodes for non-static unknown - if ( - refersTo == null && - !current.isStaticAccess && - recordDeclType != null && - recordDeclType.recordDeclaration != null - ) { - // Maybe we are referring to a field instead of a local var - val field = resolveMember(recordDeclType, current) - if (field != null) { - refersTo = field - } - } - - // TODO: we need to do proper scoping (and merge it with the code above), but for now - // this just enables CXX static fields - if (refersTo == null && language != null && current.name.isQualified()) { - recordDeclType = getEnclosingTypeOf(current) - val field = resolveMember(recordDeclType, current) - if (field != null) { - refersTo = field - } - } - + // If we did not resolve the reference up to this point, we can try to infer the declaration if (refersTo == null) { - // We can try to infer a possible global variable, if the language supports this - refersTo = tryGlobalVariableInference(current) + refersTo = + // Try to infer fields or namespace variables, if it makes sense + if ( + language is HasImplicitReceiver && + !current.name.isQualified() && + !current.isStaticAccess && + recordDeclType is ObjectType && + recordDeclType.recordDeclaration != null + ) { + ctx.tryFieldInference(current, recordDeclType) + } else { + // We can try to infer a possible global variable (either at top-level or + // namespace level), if the language supports this + ctx.tryGlobalVariableInference(current) + } } if (refersTo != null) { @@ -277,45 +274,6 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - /** - * Tries to infer a global variable from an unresolved [Reference]. This will return `null`, if - * inference was not possible, or if it was turned off in the [InferenceConfiguration]. - */ - private fun tryGlobalVariableInference(ref: Reference): Declaration? { - if (ref.language !is HasGlobalVariables) { - return null - } - - // For now, we only infer globals at the top-most global level, i.e., no globals in - // namespaces - if (ref.name.isQualified()) { - return null - } - - // Forward this to our inference system. This will also check whether and how inference is - // configured. - return scopeManager.globalScope?.astNode?.startInference(ctx)?.inferVariableDeclaration(ref) - } - - /** - * We get the type of the "scope" this node is in. (e.g. for a field, we drop the field's name - * and have the class) - */ - protected fun getEnclosingTypeOf(current: Node): Type { - val language = current.language - - return if (language != null && language.namespaceDelimiter.isNotEmpty()) { - val parentName = (current.name.parent ?: current.name).toString() - var type = current.objectType(parentName) - if (type is ObjectType) { - TypeResolver.resolveType(type) - } - type - } else { - current.unknownType() - } - } - protected fun handleMemberExpression(curClass: RecordDeclaration?, current: MemberExpression) { // Some locals for easier smart casting var base = current.base @@ -358,17 +316,22 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } val baseType = base.type.root - current.refersTo = resolveMember(baseType, current) + if (baseType is ObjectType) { + current.refersTo = resolveMember(baseType, current) + } } - protected fun resolveMember(containingClass: Type, reference: Reference): ValueDeclaration? { + protected fun resolveMember( + containingClass: ObjectType, + reference: Reference + ): ValueDeclaration? { if (isSuperclassReference(reference)) { // if we have a "super" on the member side, this is a member call. We need to resolve // this in the call resolver instead return null } var member: ValueDeclaration? = null - var type = containingClass + var type: Type = containingClass // Check for a possible overloaded operator-> if ( @@ -414,79 +377,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return member } - // This is a little bit of a workaround, but at least this makes sure we are not inferring a - // record, where a namespace already exist - val (scope, _) = scopeManager.extractScope(reference, null) - return if (scope == null) { - handleUnknownField(containingClass, reference) - } else { - // Workaround needed for Java. If we already have a record scope, use the "old" - // inference function - when (scope) { - is RecordScope -> handleUnknownField(containingClass, reference) - is NameScope -> { - log.warn( - "We should infer a namespace variable ${reference.name} at this point, but this is not yet implemented." - ) - null - } - else -> { - log.warn( - "We should infer a variable ${reference.name} in ${scope}, but this is not yet implemented." - ) - null - } - } - } - } - - // TODO(oxisto): Move to inference class - protected fun handleUnknownField(base: Type, ref: Reference): FieldDeclaration? { - val name = ref.name - - // unwrap a potential pointer-type - if (base is PointerType) { - return handleUnknownField(base.elementType, ref) - } - - var record = base.recordDeclaration - if (base !is UnknownType && base !is AutoType && record == null) { - // We access an unknown field of an unknown record. so we need to handle that along the - // way as well - record = ctx.tryRecordInference(base, locationHint = ref, updateType = true) - } - - if (record == null) { - log.error( - "There is no matching record in the record map. Can't identify which field is used." - ) - return null - } - - val target = record.fields.firstOrNull { it.name.lastPartsMatch(name) } - - return if (target != null) { - target - } else { - val declaration = - newFieldDeclaration( - name.localName, - // we will set the type later through the type inference observer - record.unknownType(), - listOf(), - null, - false, - ) - record.addField(declaration) - declaration.language = record.language - declaration.isInferred = true - - // We might be able to resolve the type later (or better), if a type is - // assigned to our reference later - ref.registerTypeObserver(TypeInferenceObserver(declaration)) - - declaration - } + return ctx.tryFieldInference(reference, containingClass) } protected fun handle(node: Node?, currClass: RecordDeclaration?) { @@ -981,72 +872,3 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } } - -fun TranslationContext.tryNamespaceInference( - name: Name, - locationHint: Node? -): NamespaceDeclaration? { - return scopeManager.globalScope - ?.astNode - ?.startInference(this) - ?.inferNamespaceDeclaration(name, null, locationHint) -} - -/** - * Tries to infer a [RecordDeclaration] from an unresolved [Type]. This will return `null`, if - * inference was not possible, or if it was turned off in the [InferenceConfiguration]. - * - * If [updateType] is set to true, also the [ObjectType.recordDeclaration] is adjusted. This is only - * needed if we call this function in the [SymbolResolver] (and not in the [TypeResolver]). - */ -fun TranslationContext.tryRecordInference( - type: Type, - locationHint: Node? = null, - updateType: Boolean = false, -): RecordDeclaration? { - val kind = - if (type.language is HasStructs) { - "struct" - } else { - "class" - } - // Determine the scope where we want to start our inference - var (scope, _) = scopeManager.extractScope(type) - - if (scope !is NameScope) { - scope = null - } - - var holder = scope?.astNode - - // If we could not find a scope, but we have an FQN, we can try to infer a namespace (or a - // parent record) - var parentName = type.name.parent - if (scope == null && parentName != null) { - // At this point, we need to check whether we have any type reference to our parent - // name. If we have (e.g. it is used in a function parameter, variable, etc.), then we - // have a high chance that this is actually a parent record and not a namespace - var parentType = typeManager.lookupResolvedType(parentName) - holder = - if (parentType != null) { - tryRecordInference(parentType, locationHint = locationHint) - } else { - tryNamespaceInference(parentName, locationHint = locationHint) - } - } - - val record = - (holder ?: this.scopeManager.globalScope?.astNode) - ?.startInference(this) - ?.inferRecordDeclaration(type, kind, locationHint) - - // update the type's record. Because types are only unique per scope, we potentially need to - // update multiple type nodes, i.e., all type nodes whose FQN match the inferred record - if (updateType && record != null) { - typeManager.firstOrderTypesMap[record.name.toString()]?.forEach { - it.recordDeclaration = record - } - } - - return record -} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index c491afb69c..17c2abdbb3 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -33,6 +33,7 @@ import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn +import tryRecordInference /** * The purpose of this [Pass] is to establish a relationship between [Type] nodes (more specifically diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CPPLanguage.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CPPLanguage.kt index 23d7b942fd..c5fc9815fd 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CPPLanguage.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CPPLanguage.kt @@ -56,7 +56,8 @@ open class CPPLanguage : HasUnknownType, HasFunctionStyleCasts, HasFunctionOverloading, - HasOperatorOverloading { + HasOperatorOverloading, + HasImplicitReceiver { override val fileExtensions = listOf("cpp", "cc", "cxx", "c++", "hpp", "hh") override val elaboratedTypeSpecifier = listOf("class", "struct", "union", "enum") override val unknownTypeString = listOf("auto") @@ -317,4 +318,7 @@ open class CPPLanguage : return Pair(false, listOf()) } + + override val receiverName: String + get() = "this" } diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguage.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguage.kt index 4a3ed16ee2..f0f6497559 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguage.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguage.kt @@ -45,7 +45,8 @@ open class JavaLanguage : HasQualifier, HasUnknownType, HasShortCircuitOperators, - HasFunctionOverloading { + HasFunctionOverloading, + HasImplicitReceiver { override val fileExtensions = listOf("java") override val namespaceDelimiter = "." @Transient override val frontend: KClass = JavaLanguageFrontend::class @@ -116,4 +117,6 @@ open class JavaLanguage : override val startCharacter = '<' override val endCharacter = '>' + override val receiverName: String + get() = "this" } diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt index 100791c74a..e4eaea76e9 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt @@ -32,6 +32,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope import de.fraunhofer.aisec.cpg.graph.statements.ForEachStatement import de.fraunhofer.aisec.cpg.graph.statements.expressions.AssignExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression @@ -102,22 +103,18 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx) { (scopeManager.currentFunction as? MethodDeclaration)?.receiver?.name ) { // We need to temporarily jump into the scope of the current record to - // add the field - val field = - scopeManager.withScope(scopeManager.currentRecord?.scope) { - newFieldDeclaration(node.name) - } - field + // add the field. These are instance attributes + scopeManager.withScope( + scopeManager.firstScopeIsInstanceOrNull() + ) { + newFieldDeclaration(node.name) + } } else { val v = newVariableDeclaration(node.name) v } } else { - val field = - scopeManager.withScope(scopeManager.currentRecord?.scope) { - newFieldDeclaration(node.name) - } - field + newFieldDeclaration(node.name) } } else { newVariableDeclaration(node.name) @@ -127,14 +124,8 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx) { decl.location = node.location decl.isImplicit = true - if (decl is FieldDeclaration) { - scopeManager.currentRecord?.addField(decl) - scopeManager.withScope(scopeManager.currentRecord?.scope) { - scopeManager.addDeclaration(decl) - } - } else { - scopeManager.addDeclaration(decl) - } + scopeManager.withScope(decl.scope) { scopeManager.addDeclaration(decl) } + return decl } diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt index 17f8b0855d..f68284a5de 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt @@ -1336,10 +1336,8 @@ class PythonFrontendTest : BaseTest() { it.registerLanguage() } assertNotNull(result) - assertEquals(2, result.variables.size) - // Note, that "pi" is incorrectly inferred as a field declaration. This is a known bug in - // the inference system (and not in the python module) and will be handled separately. - assertEquals(listOf("mypi", "pi"), result.variables.map { it.name.localName }) + assertEquals(1, result.variables.size) + assertEquals(listOf("mypi"), result.variables.map { it.name.localName }) } @Test diff --git a/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt b/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt index 95a5d691b7..385dad3c88 100644 --- a/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt +++ b/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt @@ -519,8 +519,8 @@ class Application : Callable { if (!noDefaultPasses) { translationConfiguration.defaultPasses() - translationConfiguration.registerPass() - translationConfiguration.registerPass() + // translationConfiguration.registerPass() + // translationConfiguration.registerPass() } if (customPasses != "DEFAULT") { val pieces = customPasses.split(",") From f2dcb188714f9a6c9f5c72078baa544d9b4135e2 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 1 Oct 2024 23:26:32 +0200 Subject: [PATCH 16/19] More inference cleanup --- .../aisec/cpg/passes/InferenceHelpers.kt | 108 ++++++++++++------ 1 file changed, 70 insertions(+), 38 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt index 2a5a7d82b8..3d6773a68d 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt @@ -23,6 +23,7 @@ * \______/ \__| \______/ * */ +import de.fraunhofer.aisec.cpg.CallResolutionResult import de.fraunhofer.aisec.cpg.InferenceConfiguration import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.frontends.HasGlobalVariables @@ -32,13 +33,16 @@ import de.fraunhofer.aisec.cpg.graph.Name import de.fraunhofer.aisec.cpg.graph.Node import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.NamespaceDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.ValueDeclaration import de.fraunhofer.aisec.cpg.graph.newFieldDeclaration import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope import de.fraunhofer.aisec.cpg.graph.scopes.NameScope import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.types.ObjectType @@ -49,6 +53,7 @@ import de.fraunhofer.aisec.cpg.passes.Pass.Companion.log import de.fraunhofer.aisec.cpg.passes.SymbolResolver import de.fraunhofer.aisec.cpg.passes.TypeResolver import de.fraunhofer.aisec.cpg.passes.inference.Inference.TypeInferenceObserver +import de.fraunhofer.aisec.cpg.passes.inference.inferFunction import de.fraunhofer.aisec.cpg.passes.inference.startInference import kotlin.collections.forEach @@ -164,32 +169,22 @@ internal fun TranslationContext.tryGlobalVariableInference(ref: Reference): Decl */ internal fun TranslationContext.tryFieldInference( ref: Reference, - containingClass: ObjectType + objectType: ObjectType ): ValueDeclaration? { - // This is a little bit of a workaround, but at least this makes sure we are not inferring a - // record, where a namespace already exist + // We only want to infer fields here, this can either happen if we have a reference with an + // implicit receiver or if we have a scoped reference and the scope points to a record val (scope, _) = scopeManager.extractScope(ref, null) - val shouldInfer = - if (scope == null) { - true - } else { - // Workaround needed for Java? - when (scope) { - is RecordScope -> true - else -> false - } - } - if (!shouldInfer) { + if (scope != null && scope !is RecordScope) { return null } val name = ref.name - var record = containingClass.recordDeclaration + var record = objectType.recordDeclaration if (record == null) { // We access an unknown field of an unknown record. so we need to handle that along the // way as well - record = tryRecordInference(containingClass, locationHint = ref, updateType = true) + record = tryRecordInference(objectType, locationHint = ref, updateType = true) } if (record == null) { @@ -199,28 +194,65 @@ internal fun TranslationContext.tryFieldInference( return null } - val target = record.fields.firstOrNull { it.name.lastPartsMatch(name) } + val declaration = + ref.newFieldDeclaration( + name.localName, + // we will set the type later through the type inference observer + record.unknownType(), + listOf(), + null, + false, + ) + record.addField(declaration) + declaration.language = record.language + declaration.isInferred = true + + // We might be able to resolve the type later (or better), if a type is + // assigned to our reference later + ref.registerTypeObserver(TypeInferenceObserver(declaration)) + + return declaration +} + + +fun TranslationContext.tryFunctionInference( + call: CallExpression, + result: CallResolutionResult, +): List { + // We need to see, whether we have any suitable base (e.g. a class) or not; There are two + // main cases + // a) we have a member expression -> easy + // b) we have a call expression -> not so easy. This could be a member call with an implicit + // this (in which case we want to explore the base type). But that is only possible if + // the callee is not qualified, because otherwise we are in a static call like + // MyClass::doSomething() or in a namespace call (in case we do not want to explore the + // base type here yet). This will change in a future PR. + val (suitableBases, bestGuess) = + if (call.callee is MemberExpression || !call.callee.name.isQualified()) { + getPossibleContainingTypes(call) + } else { + Pair(setOf(), null) + } - return if (target != null) { - target + return if (suitableBases.isEmpty()) { + // Resolution has provided no result, we can forward this to the inference system, + // if we want. While this is definitely a function, it could still be a function + // inside a namespace. We therefore have two possible start points, a namespace + // declaration or a translation unit. Nothing else is allowed (fow now). We can + // re-use the information in the ResolutionResult, since this already contains the + // actual start scope (e.g. in case the callee has an FQN). + var scope = result.actualStartScope + if (scope !is NameScope) { + scope = scopeManager.globalScope + } + val func = + when (val start = scope?.astNode) { + is TranslationUnitDeclaration -> start.inferFunction(call, ctx = ctx) + is NamespaceDeclaration -> start.inferFunction(call, ctx = ctx) + else -> null + } + listOfNotNull(func) } else { - val declaration = - ref.newFieldDeclaration( - name.localName, - // we will set the type later through the type inference observer - record.unknownType(), - listOf(), - null, - false, - ) - record.addField(declaration) - declaration.language = record.language - declaration.isInferred = true - - // We might be able to resolve the type later (or better), if a type is - // assigned to our reference later - ref.registerTypeObserver(TypeInferenceObserver(declaration)) - - declaration + createMethodDummies(suitableBases, bestGuess, call) } -} +} \ No newline at end of file From 42fd4f65d633902ee667a4dd4efc586b9b33dadc Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 1 Oct 2024 23:40:04 +0200 Subject: [PATCH 17/19] Even more clean up and migration of inference stuff --- .../aisec/cpg/passes/InferenceHelpers.kt | 47 +++++- .../aisec/cpg/passes/SymbolResolver.kt | 137 ++++-------------- 2 files changed, 71 insertions(+), 113 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt index 3d6773a68d..da961cb624 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt @@ -52,8 +52,10 @@ import de.fraunhofer.aisec.cpg.graph.unknownType import de.fraunhofer.aisec.cpg.passes.Pass.Companion.log import de.fraunhofer.aisec.cpg.passes.SymbolResolver import de.fraunhofer.aisec.cpg.passes.TypeResolver +import de.fraunhofer.aisec.cpg.passes.getPossibleContainingTypes import de.fraunhofer.aisec.cpg.passes.inference.Inference.TypeInferenceObserver import de.fraunhofer.aisec.cpg.passes.inference.inferFunction +import de.fraunhofer.aisec.cpg.passes.inference.inferMethod import de.fraunhofer.aisec.cpg.passes.inference.startInference import kotlin.collections.forEach @@ -214,8 +216,7 @@ internal fun TranslationContext.tryFieldInference( return declaration } - -fun TranslationContext.tryFunctionInference( +internal fun TranslationContext.tryFunctionInference( call: CallExpression, result: CallResolutionResult, ): List { @@ -247,12 +248,46 @@ fun TranslationContext.tryFunctionInference( } val func = when (val start = scope?.astNode) { - is TranslationUnitDeclaration -> start.inferFunction(call, ctx = ctx) - is NamespaceDeclaration -> start.inferFunction(call, ctx = ctx) + is TranslationUnitDeclaration -> start.inferFunction(call, ctx = this) + is NamespaceDeclaration -> start.inferFunction(call, ctx = this) else -> null } listOfNotNull(func) } else { - createMethodDummies(suitableBases, bestGuess, call) + tryMethodInference(call, suitableBases, bestGuess) + } +} + +/** + * Creates an inferred element for each RecordDeclaration + * + * @param call + * @param possibleContainingTypes + */ +internal fun TranslationContext.tryMethodInference( + call: CallExpression, + possibleContainingTypes: Set, + bestGuess: Type?, +): List { + var records = + possibleContainingTypes.mapNotNull { + val root = it.root as? ObjectType + root?.recordDeclaration + } + + // We access an unknown method of an unknown record. so we need to handle that + // along the way as well. We prefer the base type + if (records.isEmpty()) { + records = + listOfNotNull( + tryRecordInference( + bestGuess?.root ?: call.unknownType(), + locationHint = call, + updateType = true + ) + ) } -} \ No newline at end of file + records = records.distinct() + + return records.mapNotNull { record -> record.inferMethod(call, ctx = this) } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index 6cdd1b42e7..a576b9f742 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -38,15 +38,13 @@ import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.ScopedWalker import de.fraunhofer.aisec.cpg.helpers.Util import de.fraunhofer.aisec.cpg.helpers.replace import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn -import de.fraunhofer.aisec.cpg.passes.inference.inferFunction -import de.fraunhofer.aisec.cpg.passes.inference.inferMethod import de.fraunhofer.aisec.cpg.passes.inference.startInference import de.fraunhofer.aisec.cpg.processing.strategy.Strategy import org.slf4j.Logger import org.slf4j.LoggerFactory import tryFieldInference +import tryFunctionInference import tryGlobalVariableInference -import tryRecordInference /** * Creates new connections between the place where a variable is declared and where it is used. @@ -445,7 +443,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // callee var candidates = if (useLegacyResolution) { - val (possibleTypes, _) = getPossibleContainingTypes(call) + val (possibleTypes, _) = ctx.getPossibleContainingTypes(call) resolveMemberByName(callee.name.localName, possibleTypes).toSet() } else { callee.candidates @@ -478,7 +476,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { call.invokes = result.bestViable.toMutableList() } UNRESOLVED -> { - call.invokes = tryFunctionInference(call, result).toMutableList() + call.invokes = ctx.tryFunctionInference(call, result).toMutableList() } } @@ -592,40 +590,6 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return candidates } - /** - * Creates an inferred element for each RecordDeclaration - * - * @param possibleContainingTypes - * @param call - */ - protected fun createMethodDummies( - possibleContainingTypes: Set, - bestGuess: Type?, - call: CallExpression - ): List { - var records = - possibleContainingTypes.mapNotNull { - val root = it.root as? ObjectType - root?.recordDeclaration - } - - // We access an unknown method of an unknown record. so we need to handle that - // along the way as well. We prefer the base type - if (records.isEmpty()) { - records = - listOfNotNull( - ctx.tryRecordInference( - bestGuess?.root ?: unknownType(), - locationHint = call, - updateType = true - ) - ) - } - records = records.distinct() - - return records.mapNotNull { record -> record.inferMethod(call, ctx = ctx) } - } - protected fun handleConstructExpression(constructExpression: ConstructExpression) { if (constructExpression.instantiates != null && constructExpression.constructor != null) return @@ -707,32 +671,6 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return resolveWithArguments(candidates, op.operatorArguments, op as Expression) } - /** - * Returns a set of types in which the callee of our [call] could reside in. More concretely, it - * returns a [Pair], where the first element is the set of types and the second is our best - * guess. - */ - protected fun getPossibleContainingTypes(call: CallExpression): Pair, Type?> { - val possibleTypes = mutableSetOf() - var bestGuess: Type? = null - if (call is MemberCallExpression) { - call.base?.let { base -> - bestGuess = base.type - possibleTypes.add(base.type) - possibleTypes.addAll(base.assignedTypes) - } - } else { - // This could be a C++ member call with an implicit this (which we do not create), so - // let's add the current class to the possible list - scopeManager.currentRecord?.toType()?.let { - bestGuess = it - possibleTypes.add(it) - } - } - - return Pair(possibleTypes, bestGuess) - } - protected fun getInvocationCandidatesFromParents( name: Symbol, possibleTypes: Set, @@ -806,48 +744,6 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { ?.createInferredConstructor(constructExpression.signature) } - fun tryFunctionInference( - call: CallExpression, - result: CallResolutionResult, - ): List { - // We need to see, whether we have any suitable base (e.g. a class) or not; There are two - // main cases - // a) we have a member expression -> easy - // b) we have a call expression -> not so easy. This could be a member call with an implicit - // this (in which case we want to explore the base type). But that is only possible if - // the callee is not qualified, because otherwise we are in a static call like - // MyClass::doSomething() or in a namespace call (in case we do not want to explore the - // base type here yet). This will change in a future PR. - val (suitableBases, bestGuess) = - if (call.callee is MemberExpression || !call.callee.name.isQualified()) { - getPossibleContainingTypes(call) - } else { - Pair(setOf(), null) - } - - return if (suitableBases.isEmpty()) { - // Resolution has provided no result, we can forward this to the inference system, - // if we want. While this is definitely a function, it could still be a function - // inside a namespace. We therefore have two possible start points, a namespace - // declaration or a translation unit. Nothing else is allowed (fow now). We can - // re-use the information in the ResolutionResult, since this already contains the - // actual start scope (e.g. in case the callee has an FQN). - var scope = result.actualStartScope - if (scope !is NameScope) { - scope = scopeManager.globalScope - } - val func = - when (val start = scope?.astNode) { - is TranslationUnitDeclaration -> start.inferFunction(call, ctx = ctx) - is NamespaceDeclaration -> start.inferFunction(call, ctx = ctx) - else -> null - } - listOfNotNull(func) - } else { - createMethodDummies(suitableBases, bestGuess, call) - } - } - companion object { val LOGGER: Logger = LoggerFactory.getLogger(SymbolResolver::class.java) @@ -872,3 +768,30 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } } + +/** + * Returns a set of types in which the callee of our [call] could reside in. More concretely, it + * returns a [Pair], where the first element is the set of types and the second is our best guess. + */ +internal fun TranslationContext.getPossibleContainingTypes( + call: CallExpression +): Pair, Type?> { + val possibleTypes = mutableSetOf() + var bestGuess: Type? = null + if (call is MemberCallExpression) { + call.base?.let { base -> + bestGuess = base.type + possibleTypes.add(base.type) + possibleTypes.addAll(base.assignedTypes) + } + } else if (call.language is HasImplicitReceiver) { + // This could be a member call with an implicit receiver, so let's add the current class + // to the possible list + scopeManager.currentRecord?.toType()?.let { + bestGuess = it + possibleTypes.add(it) + } + } + + return Pair(possibleTypes, bestGuess) +} From 66c1216da7a8d4a19ede427843bfc530ae98c8e9 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Tue, 1 Oct 2024 23:58:27 +0200 Subject: [PATCH 18/19] Even more inference cleanup --- .../aisec/cpg/passes/InferenceHelpers.kt | 67 ++++++++++--------- .../aisec/cpg/passes/SymbolResolver.kt | 59 ++++++---------- 2 files changed, 56 insertions(+), 70 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt index da961cb624..e8aa4b8f22 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/InferenceHelpers.kt @@ -29,17 +29,11 @@ import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.frontends.HasGlobalVariables import de.fraunhofer.aisec.cpg.frontends.HasImplicitReceiver import de.fraunhofer.aisec.cpg.frontends.HasStructs +import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.graph.Name import de.fraunhofer.aisec.cpg.graph.Node -import de.fraunhofer.aisec.cpg.graph.declarations.Declaration -import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration -import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration -import de.fraunhofer.aisec.cpg.graph.declarations.NamespaceDeclaration -import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration -import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration -import de.fraunhofer.aisec.cpg.graph.declarations.ValueDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.newFieldDeclaration -import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope import de.fraunhofer.aisec.cpg.graph.scopes.NameScope import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression @@ -129,39 +123,52 @@ internal fun TranslationContext.tryRecordInference( } /** - * Tries to infer a global variable from an unresolved [Reference]. This will return `null`, if + * Tries to infer a [VariableDeclaration] out of a [Reference]. This will return `null`, if * inference was not possible, or if it was turned off in the [InferenceConfiguration]. * - * Currently, this can only infer variables in the [GlobalScope], but not in a namespace. + * We mainly try to infer global variables and fields here, since these are possibly parts of the + * code we do not "see". We do not try to infer local variables, because we are under the assumption + * that even with incomplete code, we at least have the complete current function code. */ -internal fun TranslationContext.tryGlobalVariableInference(ref: Reference): Declaration? { - // For now, we only infer globals at the top-most global level, i.e., no globals in - // namespaces - if (ref.name.isQualified()) { +internal fun TranslationContext.tryVariableInference( + language: Language<*>?, + ref: Reference, +): Declaration? { + var currentRecordType = scopeManager.currentRecord?.toType() as? ObjectType + return if ( + language is HasImplicitReceiver && + !ref.name.isQualified() && + !ref.isStaticAccess && + currentRecordType != null + ) { + // This could potentially be a reference to a field with an implicit receiver call + tryFieldInference(ref, currentRecordType) + } else if (ref.name.isQualified()) { + // For now, we only infer globals at the top-most global level, i.e., no globals in + // namespaces val (scope, _) = scopeManager.extractScope(ref, null) when (scope) { is NameScope -> { log.warn( "We should infer a namespace variable ${ref.name} at this point, but this is not yet implemented." ) - return null + null } else -> { log.warn( "We should infer a variable ${ref.name} in ${scope}, but this is not yet implemented." ) - return null + null } } + } else if (ref.language is HasGlobalVariables) { + // We can try to infer a possible global variable (at top-level), if the language supports + // this + scopeManager.globalScope?.astNode?.startInference(this)?.inferVariableDeclaration(ref) + } else { + // Nothing to infer + null } - - if (ref.language !is HasGlobalVariables) { - return null - } - - // Forward this to our inference system. This will also check whether and how inference is - // configured. - return scopeManager.globalScope?.astNode?.startInference(this)?.inferVariableDeclaration(ref) } /** @@ -171,22 +178,20 @@ internal fun TranslationContext.tryGlobalVariableInference(ref: Reference): Decl */ internal fun TranslationContext.tryFieldInference( ref: Reference, - objectType: ObjectType + targetType: ObjectType ): ValueDeclaration? { // We only want to infer fields here, this can either happen if we have a reference with an // implicit receiver or if we have a scoped reference and the scope points to a record - val (scope, _) = scopeManager.extractScope(ref, null) + val (scope, _) = scopeManager.extractScope(ref) if (scope != null && scope !is RecordScope) { return null } - val name = ref.name - - var record = objectType.recordDeclaration + var record = targetType.recordDeclaration if (record == null) { // We access an unknown field of an unknown record. so we need to handle that along the // way as well - record = tryRecordInference(objectType, locationHint = ref, updateType = true) + record = tryRecordInference(targetType, locationHint = ref, updateType = true) } if (record == null) { @@ -198,7 +203,7 @@ internal fun TranslationContext.tryFieldInference( val declaration = ref.newFieldDeclaration( - name.localName, + ref.name.localName, // we will set the type later through the type inference observer record.unknownType(), listOf(), diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index a576b9f742..30b4718970 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -44,7 +44,7 @@ import org.slf4j.Logger import org.slf4j.LoggerFactory import tryFieldInference import tryFunctionInference -import tryGlobalVariableInference +import tryVariableInference /** * Creates new connections between the place where a variable is declared and where it is used. @@ -161,34 +161,31 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return target } - protected fun handleReference(currentClass: RecordDeclaration?, current: Node?) { - val language = current?.language - - if (current !is Reference || current is MemberExpression) return + protected fun handleReference(currentClass: RecordDeclaration?, ref: Reference) { + val language = ref.language // Ignore references to anonymous identifiers, if the language supports it (e.g., the _ // identifier in Go) if ( - language is HasAnonymousIdentifier && - current.name.localName == language.anonymousIdentifier + language is HasAnonymousIdentifier && ref.name.localName == language.anonymousIdentifier ) { return } // Ignore references to "super" if the language has super expressions, because they will be // handled separately in handleMemberExpression - if (language is HasSuperClasses && current.name.localName == language.superClassKeyword) { + if (language is HasSuperClasses && ref.name.localName == language.superClassKeyword) { return } // Find a list of candidate symbols. Currently, this is only used the in the "next-gen" call // resolution, but in future this will also be used in resolving regular references. - current.candidates = scopeManager.findSymbols(current.name, current.location).toSet() + ref.candidates = scopeManager.findSymbols(ref.name, ref.location).toSet() // Preparation for a future without legacy call resolving. Taking the first candidate is not // ideal since we are running into an issue with function pointers here (see workaround // below). - var wouldResolveTo = current.candidates.singleOrNull() + var wouldResolveTo = ref.candidates.singleOrNull() // For now, we need to ignore reference expressions that are directly embedded into call // expressions, because they are the "callee" property. In the future, we will use this @@ -199,7 +196,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // of this call expression back to its original variable declaration. In the future, we want // to extend this particular code to resolve all callee references to their declarations, // i.e., their function definitions and get rid of the separate CallResolver. - if (current.resolutionHelper is CallExpression) { + if (ref.resolutionHelper is CallExpression) { // Peek into the declaration, and if it is only one declaration and a variable, we can // proceed normally, as we are running into the special case explained above. Otherwise, // we abort here (for now). @@ -213,7 +210,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // percentage of references now. if (wouldResolveTo is FunctionDeclaration) { // We need to invoke the legacy resolver, just to be sure - var legacy = scopeManager.resolveReference(current) + var legacy = scopeManager.resolveReference(ref) // This is just for us to catch these differences in symbol resolving in the future. The // difference is pretty much only that the legacy system takes parameters of the @@ -230,45 +227,27 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // Only consider resolving, if the language frontend did not specify a resolution. If we // already have populated the wouldResolveTo variable, we can re-use this instead of // resolving again - var refersTo = current.refersTo ?: wouldResolveTo + var refersTo = ref.refersTo ?: wouldResolveTo var recordDeclType: Type? = null if (currentClass != null) { recordDeclType = currentClass.toType() } - val helperType = current.resolutionHelper?.type + val helperType = ref.resolutionHelper?.type if (helperType is FunctionPointerType && refersTo == null) { - refersTo = resolveMethodFunctionPointer(current, helperType) + refersTo = resolveMethodFunctionPointer(ref, helperType) } // If we did not resolve the reference up to this point, we can try to infer the declaration if (refersTo == null) { - refersTo = - // Try to infer fields or namespace variables, if it makes sense - if ( - language is HasImplicitReceiver && - !current.name.isQualified() && - !current.isStaticAccess && - recordDeclType is ObjectType && - recordDeclType.recordDeclaration != null - ) { - ctx.tryFieldInference(current, recordDeclType) - } else { - // We can try to infer a possible global variable (either at top-level or - // namespace level), if the language supports this - ctx.tryGlobalVariableInference(current) - } + refersTo = ctx.tryVariableInference(language, ref) } if (refersTo != null) { - current.refersTo = refersTo + ref.refersTo = refersTo } else { - Util.warnWithFileLocation( - current, - log, - "Did not find a declaration for ${current.name}" - ) + Util.warnWithFileLocation(ref, log, "Did not find a declaration for ${ref.name}") } } @@ -353,6 +332,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { val record = type.recordDeclaration if (record != null) { + // TODO(oxisto): This should use symbols rather than the AST fields member = record.fields .filter { it.name.lastPartsMatch(reference.name) } @@ -367,15 +347,16 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { .map { it.definition } .firstOrNull() } + if (member == null && record is EnumDeclaration) { member = record.entries[reference.name.localName] } - if (member != null) { - return member + if (member == null) { + member = ctx.tryFieldInference(reference, containingClass) } - return ctx.tryFieldInference(reference, containingClass) + return member } protected fun handle(node: Node?, currClass: RecordDeclaration?) { From d4d29d64a6d7851dab4c0abee9a585f6c0915d96 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Wed, 2 Oct 2024 00:03:23 +0200 Subject: [PATCH 19/19] Fixed test --- .../cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpg-language-typescript/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt b/cpg-language-typescript/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt index 0ac64f07ca..b553cd7f9b 100644 --- a/cpg-language-typescript/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt +++ b/cpg-language-typescript/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt @@ -262,7 +262,7 @@ class TypeScriptLanguageFrontendTest { assertLocalName("Users", usersComponent) assertEquals(1, usersComponent.constructors.size) assertEquals(2, usersComponent.methods.size) - assertEquals(/*0*/ 2 /* because of dummy nodes */, usersComponent.fields.size) + assertEquals(0, usersComponent.fields.size) val render = usersComponent.methods["render"] assertNotNull(render)