From 344d3663128c4f7f3f2a6c96ab95f551eb3e905f Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Sun, 6 Oct 2024 19:19:07 +0200 Subject: [PATCH] Add new function `lookupUniqueTypeSymbolByName` (#1781) * Add new function `lookupUniqueTypeSymbolByName` This adds two new functions `ScopeManager.lookupUniqueTypeSymbolByName` and `Reference.doesReferToType`. This harmonizes a lot of boilerplate code in type resolver, cxx extra pass and resolve ambuigity pass, which were all trying to achieve the same thing. Fixes #1766 * Fixed issue with Go test, more robust handling of wrapped references * Addressed code review --- .../de/fraunhofer/aisec/cpg/ScopeManager.kt | 25 ++++++++ .../de/fraunhofer/aisec/cpg/TypeManager.kt | 29 +++++++++ .../ResolveCallExpressionAmbiguityPass.kt | 60 +++++-------------- .../aisec/cpg/passes/TypeResolver.kt | 15 +---- .../aisec/cpg/passes/CXXExtraPass.kt | 23 +++---- .../cpg/frontends/cxx/CXXExpressionTest.kt | 2 +- .../aisec/cpg/passes/GoExtraPass.kt | 14 ----- 7 files changed, 84 insertions(+), 84 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt index f45809b3a5..104fdfed57 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt @@ -31,6 +31,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.scopes.* import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.* +import de.fraunhofer.aisec.cpg.graph.types.DeclaresType import de.fraunhofer.aisec.cpg.graph.types.FunctionPointerType import de.fraunhofer.aisec.cpg.graph.types.IncompleteType import de.fraunhofer.aisec.cpg.graph.types.Type @@ -1009,6 +1010,30 @@ class ScopeManager : ScopeProvider { return list } + + /** + * This function tries to look up the symbol contained in [name] (using [lookupSymbolByName]) + * and returns a [DeclaresType] node, if this name resolved to something which declares a type. + * + * It is important to know that the lookup needs to be unique, so if multiple declarations match + * this symbol, a warning is triggered and null is returned. + */ + fun lookupUniqueTypeSymbolByName(name: Name, startScope: Scope?): DeclaresType? { + var symbols = + lookupSymbolByName(name = name, startScope = startScope) { it is DeclaresType } + .filterIsInstance() + + // We need to have a single match, otherwise we have an ambiguous type, and we cannot + // normalize it. + if (symbols.size > 1) { + LOGGER.warn( + "Lookup of type {} returned more than one symbol which declares a type, this is an ambiguity and the following analysis might not be correct.", + name + ) + } + + return symbols.singleOrNull() + } } /** diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt index 8745e7944d..3069f74195 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt @@ -32,7 +32,10 @@ import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.TemplateDeclaration import de.fraunhofer.aisec.cpg.graph.scopes.Scope import de.fraunhofer.aisec.cpg.graph.scopes.TemplateScope +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.types.* +import de.fraunhofer.aisec.cpg.passes.Pass +import de.fraunhofer.aisec.cpg.passes.ResolveCallExpressionAmbiguityPass import java.util.* import java.util.concurrent.ConcurrentHashMap import org.slf4j.Logger @@ -353,3 +356,29 @@ val Collection.commonType: Type? // root node is 0) and re-wrap the final common type back into the original wrap state return commonAncestors.minByOrNull(Type.Ancestor::depth)?.type?.let { typeOp.apply(it) } } + +/** + * A utility function that checks whether our [Reference] refers to a [Type]. This is used by many + * passes that replace certain [Reference] nodes with other nodes, e.g., the + * [ResolveCallExpressionAmbiguityPass]. + * + * Note: This involves some symbol lookup (using [ScopeManager.lookupUniqueTypeSymbolByName]), so + * this can only be used in passes. + */ +context(Pass<*>) +fun Reference.nameIsType(): Type? { + // First, check if it is a simple type + var type = language?.getSimpleTypeOf(name) + if (type != null) { + return type + } + + // This could also be a typedef + type = scopeManager.typedefFor(name, scope) + if (type != null) { + return type + } + + // Lastly, check if the reference contains a symbol that points to type (declaration) + return scopeManager.lookupUniqueTypeSymbolByName(name, scope)?.declaredType +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ResolveCallExpressionAmbiguityPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ResolveCallExpressionAmbiguityPass.kt index 42f5ec7c8b..ecd614701e 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ResolveCallExpressionAmbiguityPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ResolveCallExpressionAmbiguityPass.kt @@ -40,6 +40,7 @@ import de.fraunhofer.aisec.cpg.graph.types.ObjectType import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker import de.fraunhofer.aisec.cpg.helpers.replace +import de.fraunhofer.aisec.cpg.nameIsType import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn import de.fraunhofer.aisec.cpg.passes.configuration.ExecuteBefore import de.fraunhofer.aisec.cpg.passes.configuration.RequiresLanguageTrait @@ -85,27 +86,20 @@ class ResolveCallExpressionAmbiguityPass(ctx: TranslationContext) : TranslationU var callee = call.callee val language = callee.language + // We need to "unwrap" some references because they might be nested in unary operations such + // as pointers. We are interested in the references in the "core". We can skip all + // references that are member expressions + val ref = callee.unwrapReference() + if (ref == null || ref is MemberExpression) { + return + } + // Check, if this is cast is really a construct expression (if the language supports // functional-constructs) if (language is HasFunctionStyleConstruction) { - // Make sure, we do not accidentally "construct" primitive types - if (language.builtInTypes.contains(callee.name.toString()) == true) { - return - } - - val fqn = - if (callee.name.parent == null) { - scopeManager.currentNamespace.fqn( - callee.name.localName, - delimiter = callee.name.delimiter - ) - } else { - callee.name - } - // Check for our type. We are only interested in object types - val type = typeManager.lookupResolvedType(fqn) - if (type is ObjectType) { + var type = ref.nameIsType() + if (type is ObjectType && !type.isPrimitive) { walker.replaceCallWithConstruct(type, parent, call) } } @@ -114,34 +108,10 @@ class ResolveCallExpressionAmbiguityPass(ctx: TranslationContext) : TranslationU // cast expression. And this is only really necessary, if the function call has a single // argument. if (language is HasFunctionStyleCasts && call.arguments.size == 1) { - var pointer = false - // If the argument is a UnaryOperator, unwrap them - if (callee is UnaryOperator && callee.operatorCode == "*") { - pointer = true - callee = callee.input - } - - // First, check if this is a built-in type - var builtInType = language.getSimpleTypeOf(callee.name) - if (builtInType != null) { - walker.replaceCallWithCast(builtInType, parent, call, false) - } else { - // If not, then this could still refer to an existing type. We need to make sure - // that we take the current namespace into account - val fqn = - if (callee.name.parent == null) { - scopeManager.currentNamespace.fqn( - callee.name.localName, - delimiter = callee.name.delimiter - ) - } else { - callee.name - } - - val type = typeManager.lookupResolvedType(fqn) - if (type != null) { - walker.replaceCallWithCast(type, parent, call, pointer) - } + // Check if it is type and replace the call + var type = ref.nameIsType() + if (type != null) { + walker.replaceCallWithCast(type, parent, call, false) } } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index 7ceab7561a..e4345e98d2 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -104,20 +104,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { // filter for nodes that implement DeclaresType, because otherwise we will get a lot of // constructor declarations and such with the same name. It seems this is ok since most // languages will prefer structs/classes over functions when resolving types. - var symbols = - scopeManager - .lookupSymbolByName(type.name, startScope = type.scope) { it is DeclaresType } - .filterIsInstance() - - // We need to have a single match, otherwise we have an ambiguous type, and we cannot - // normalize it. - if (symbols.size > 1) { - log.warn( - "Lookup of type {} returned more than one symbol which declares a type, this is an ambiguity and the following analysis might not be correct.", - name - ) - } - var declares = symbols.singleOrNull() + var declares = scopeManager.lookupUniqueTypeSymbolByName(type.name, type.scope) // If we did not find any declaration, we can try to infer a record declaration for it if (declares == null) { diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt index b6eeae1365..0bd84d65fe 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/CXXExtraPass.kt @@ -36,6 +36,7 @@ import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker import de.fraunhofer.aisec.cpg.helpers.replace +import de.fraunhofer.aisec.cpg.nameIsType import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn import de.fraunhofer.aisec.cpg.passes.configuration.ExecuteBefore @@ -77,7 +78,8 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) { * the graph. */ private fun removeBracketOperators(node: UnaryOperator, parent: Node?) { - if (node.operatorCode == "()" && !typeManager.typeExists(node.input.name)) { + val input = node.input + if (node.operatorCode == "()" && input is Reference && input.nameIsType() == null) { // It was really just parenthesis around an identifier, but we can only make this // distinction now. // @@ -101,24 +103,25 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) { private fun convertOperators(binOp: BinaryOperator, parent: Node?) { val fakeUnaryOp = binOp.lhs val language = fakeUnaryOp.language as? CLanguage + // We need to check, if the expression in parentheses is really referring to a type or // not. A common example is code like `(long) &addr`. We could end up parsing this as a // binary operator with the left-hand side of `(long)`, an operator code `&` and a rhs // of `addr`. - if ( - language != null && - fakeUnaryOp is UnaryOperator && - fakeUnaryOp.operatorCode == "()" && - typeManager.typeExists(fakeUnaryOp.input.name) - ) { - // If the name (`long` in the example) is a type, then the unary operator (`(long)`) - // is really a cast and our binary operator is really a unary operator `&addr`. + if (language == null || fakeUnaryOp !is UnaryOperator || fakeUnaryOp.operatorCode != "()") { + return + } + + // If the name (`long` in the example) is a type, then the unary operator (`(long)`) + // is really a cast and our binary operator is really a unary operator `&addr`. + var type = (fakeUnaryOp.input as? Reference)?.nameIsType() + if (type != null) { // We need to perform the following steps: // * create a cast expression out of the ()-unary operator, with the type that is // referred to in the op. val cast = newCastExpression().codeAndLocationFrom(fakeUnaryOp) cast.language = language - cast.castType = fakeUnaryOp.objectType(fakeUnaryOp.input.name) + cast.castType = type // * create a unary operator with the rhs of the binary operator (and the same // operator code). diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXExpressionTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXExpressionTest.kt index aa6d6ee292..0b5276252a 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXExpressionTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXExpressionTest.kt @@ -45,6 +45,6 @@ class CXXExpressionTest { // We should have two calls (int and myint64) val casts = tu.casts assertEquals(2, casts.size) - assertEquals(listOf("int", "myint64"), casts.map { it.name.localName }) + assertEquals(listOf("int", "long long int"), casts.map { it.name.localName }) } } diff --git a/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/GoExtraPass.kt b/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/GoExtraPass.kt index 5297e13826..77e3cc32ac 100644 --- a/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/GoExtraPass.kt +++ b/cpg-language-go/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/GoExtraPass.kt @@ -81,20 +81,6 @@ import de.fraunhofer.aisec.cpg.passes.inference.startInference * In the frontend we only do the assignment, therefore we need to create a new * [VariableDeclaration] for `b` and inject a [DeclarationStatement]. * - * ## Converting Call Expressions into Cast Expressions - * - * In Go, it is possible to convert compatible types by "calling" the type name as a function, such - * as - * - * ```go - * var i = int(2.0) - * ``` - * - * This is also possible with more complex types, such as interfaces or aliased types, as long as - * they are compatible. Because types in the same package can be defined in multiple files, we - * cannot decide during the frontend run. Therefore, we need to execute this pass before the - * [SymbolResolver] and convert certain [CallExpression] nodes into a [CastExpression]. - * * ## Adjust Names of Keys in Key Value Expressions to FQN * * This pass also adjusts the names of keys in a [KeyValueExpression], which is part of an