Skip to content

Commit

Permalink
Moving python import decision to pass (#1889)
Browse files Browse the repository at this point in the history
  • Loading branch information
oxisto authored Dec 16, 2024
1 parent 9738250 commit 9a762bb
Show file tree
Hide file tree
Showing 15 changed files with 370 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ private constructor(
registerPass<ControlFlowSensitiveDFGPass>()
registerPass<FilenameMapper>()
registerPass<ResolveCallExpressionAmbiguityPass>()
registerPass<ResolveMemberExpressionAmbiguityPass>()
useDefaultPasses = true
return this
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,20 @@ interface HasGlobalVariables : LanguageTrait
*/
interface HasGlobalFunctions : LanguageTrait

/**
* A common trait for classes, in which supposed member expressions (and thus also member calls) in
* the form of "a.b" have an ambiguity between a real field/method access (when "a" is an object)
* and a qualified call because of an import, if "a" is an import / namespace.
*
* We can only resolve this after we have dealt with imports and know all symbols. Therefore, we
* invoke the [ResolveMemberExpressionAmbiguityPass].
*/
interface HasMemberExpressionAmbiguity : LanguageTrait

/**
* A common super-class for all language traits that arise because they are an ambiguity of a
* function call, e.g., function-style casts. This means that we cannot differentiate between a
* [CallExpression] and other expressions during the frontend and we need to invoke the
* [CallExpression] and other expressions during the frontend, and we need to invoke the
* [ResolveCallExpressionAmbiguityPass] to resolve this.
*/
sealed interface HasCallExpressionAmbiguity : LanguageTrait
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1011,13 +1011,46 @@ fun Expression?.unwrapReference(): Reference? {
/** Returns the [TranslationUnitDeclaration] where this node is located in. */
val Node.translationUnit: TranslationUnitDeclaration?
get() {
var node: Node? = this
while (node != null) {
if (node is TranslationUnitDeclaration) {
return node
}
node = node.astParent
return firstParentOrNull { it is TranslationUnitDeclaration } as? TranslationUnitDeclaration
}

/**
* This helper function be used to find out if a particular expression (usually a [CallExpression]
* or a [Reference]) is imported through a [ImportDeclaration].
*
* It returns a [Pair], with the [Pair.first] being a boolean value whether it was imported and
* [Pair.second] the [ImportDeclaration] if applicable.
*/
val Expression.importedFrom: List<ImportDeclaration>
get() {
if (this is CallExpression) {
return this.callee.importedFrom
} else if (this is MemberExpression) {
return this.base.importedFrom
} else if (this is Reference) {
val imports = this.translationUnit.imports

return if (name.parent == null) {
// If the name does not have a parent, this reference could directly be the name
// of an import, let's check
imports.filter { it.name.lastPartsMatch(name) }
} else {
// Otherwise, the parent name could be the import
imports.filter { it.name == this.name.parent }
} ?: listOf<ImportDeclaration>()
}

return null
return listOf<ImportDeclaration>()
}

/**
* Determines whether the expression is imported from another source.
*
* This property evaluates to `true` if the expression originates from an external or supplemental
* source by checking if the [importedFrom] property contains any entries. Otherwise, it evaluates
* to `false`.
*/
val Expression.isImported: Boolean
get() {
return this.importedFrom.isNotEmpty()
}
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,9 @@ abstract class Node :
* further children that have no alternative connection paths to the rest of the graph.
*/
fun disconnectFromGraph() {
// Disconnect all AST children first
this.astChildren.forEach { it.disconnectFromGraph() }

nextDFGEdges.clear()
prevDFGEdges.clear()
prevCDGEdges.clear()
Expand All @@ -292,6 +295,8 @@ abstract class Node :
nextPDGEdges.clear()
nextEOGEdges.clear()
prevEOGEdges.clear()

astParent = null
}

override fun toString(): String {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@ import de.fraunhofer.aisec.cpg.graph.StatementHolder
import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration
import de.fraunhofer.aisec.cpg.graph.edges.ast.AstEdge
import de.fraunhofer.aisec.cpg.graph.edges.collections.EdgeCollection
import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberCallExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference
import de.fraunhofer.aisec.cpg.graph.types.HasType
import de.fraunhofer.aisec.cpg.passes.Pass
import de.fraunhofer.aisec.cpg.processing.strategy.Strategy
Expand Down Expand Up @@ -350,14 +354,17 @@ object SubgraphWalker {
/**
* Tries to replace the [old] expression with a [new] one, given the [parent].
*
* There are three things to consider:
* There are different things to consider:
* - First, this only works if [parent] is either an [ArgumentHolder] or [StatementHolder].
* Otherwise, we cannot instruct the parent to exchange the node
* - Second, since exchanging the node has influence on their edges (such as EOG, DFG, etc.), we
* only support a replacement very early in the pass system. To be specific, we only allow
* replacement before any DFG edges are set. We are re-wiring EOG edges, but nothing else. If one
* replacement BEFORE any DFG edges are set. We are re-wiring EOG edges, but nothing else. If one
* tries to replace a node with existing [Node.nextDFG] or [Node.prevDFG], we fail.
* - We also migrate [HasType.typeObservers] from the [old] to the [new] node.
* - Lastly, if the [new] node is a [CallExpression.callee] of a [CallExpression] parent, and the
* [old] and [new] expressions are of different types (e.g., exchanging a simple [Reference] for a
* [MemberExpression]), we also replace the [CallExpression] with a [MemberCallExpression].
*/
context(ContextProvider)
fun SubgraphWalker.ScopedWalker.replace(parent: Node?, old: Expression, new: Expression): Boolean {
Expand All @@ -370,6 +377,25 @@ fun SubgraphWalker.ScopedWalker.replace(parent: Node?, old: Expression, new: Exp

val success =
when (parent) {
is CallExpression -> {
if (parent.callee == old) {
// Now we are running into a problem. If the previous callee and the new callee
// are of different types (ref/vs. member expression). We also need to replace
// the whole call expression instead.
if (parent is MemberCallExpression && new is Reference) {
val newCall = parent.toCallExpression(new)
return replace(parent.astParent, parent, newCall)
} else if (new is MemberExpression) {
val newCall = parent.toMemberCallExpression(new)
return replace(parent.astParent, parent, newCall)
} else {
parent.callee = new
true
}
} else {
parent.replace(old, new)
}
}
is ArgumentHolder -> parent.replace(old, new)
is StatementHolder -> parent.replace(old, new)
else -> {
Expand Down Expand Up @@ -399,9 +425,45 @@ fun SubgraphWalker.ScopedWalker.replace(parent: Node?, old: Expression, new: Exp
new.registerTypeObserver(it)
}

old.astParent = null
new.astParent = parent

// Make sure to inform the walker about our change
this.registerReplacement(old, new)
}

return success
}

private fun CallExpression.duplicateTo(call: CallExpression, callee: Reference) {
call.ctx = this.ctx
call.language = this.language
call.scope = this.scope
call.arguments = this.arguments
call.type = this.type
call.assignedTypes = this.assignedTypes
call.code = this.code
call.location = this.location
call.argumentIndex = this.argumentIndex
call.annotations = this.annotations
call.comment = this.comment
call.file = this.file
call.callee = callee
callee.resolutionHelper = call
call.isImplicit = this.isImplicit
call.isInferred = this.isInferred
}

fun MemberCallExpression.toCallExpression(callee: Reference): CallExpression {
val call = CallExpression()
duplicateTo(call, callee)

return call
}

fun CallExpression.toMemberCallExpression(callee: MemberExpression): MemberCallExpression {
val call = MemberCallExpression()
duplicateTo(call, callee)

return call
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Copyright (c) 2024, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg.passes

import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.frontends.HasCallExpressionAmbiguity
import de.fraunhofer.aisec.cpg.frontends.HasMemberExpressionAmbiguity
import de.fraunhofer.aisec.cpg.graph.Name
import de.fraunhofer.aisec.cpg.graph.codeAndLocationFrom
import de.fraunhofer.aisec.cpg.graph.declarations.NamespaceDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration
import de.fraunhofer.aisec.cpg.graph.fqn
import de.fraunhofer.aisec.cpg.graph.imports
import de.fraunhofer.aisec.cpg.graph.newReference
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression
import de.fraunhofer.aisec.cpg.graph.translationUnit
import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker
import de.fraunhofer.aisec.cpg.helpers.replace
import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn
import de.fraunhofer.aisec.cpg.passes.configuration.ExecuteBefore
import de.fraunhofer.aisec.cpg.passes.configuration.RequiresLanguageTrait

/**
* A translation unit pass that resolves ambiguities in member expressions within a translation
* unit. This pass checks whether the base or member name in a member expression refers to an import
* and, if so, replaces the member expression with a reference using the fully qualified name.
*
* This pass is dependent on the [ImportResolver] pass and requires the language trait
* [HasCallExpressionAmbiguity]. It is executed before the [EvaluationOrderGraphPass].
*
* @constructor Initializes the pass with the provided translation context.
*/
@ExecuteBefore(EvaluationOrderGraphPass::class)
@DependsOn(ImportResolver::class)
@RequiresLanguageTrait(HasMemberExpressionAmbiguity::class)
class ResolveMemberExpressionAmbiguityPass(ctx: TranslationContext) : TranslationUnitPass(ctx) {

lateinit var walker: SubgraphWalker.ScopedWalker

override fun accept(tu: TranslationUnitDeclaration) {
walker = SubgraphWalker.ScopedWalker(ctx.scopeManager)
walker.registerHandler { _, _, node ->
when (node) {
is MemberExpression -> resolveAmbiguity(node)
}
}

walker.iterate(tu)
}

/**
* Resolves ambiguities in a given member expression. Checks whether the base or member name of
* the member expression refers to an import, and if so, replaces the member expression with a
* reference that uses the fully qualified name.
*
* @param me The member expression to disambiguate and potentially replace.
*/
private fun resolveAmbiguity(me: MemberExpression) {
// We need to check, if our "base" (or our expression) is really a name that refers to an
// import, because in this case we do not have a member expression, but a reference with a
// qualified name
val baseName = me.base.reconstructedImportName
var isImportedNamespace = isImportedNamespace(baseName, me)

if (isImportedNamespace) {
with(me) {
val ref = newReference(baseName.fqn(me.name.localName)).codeAndLocationFrom(this)
walker.replace(me.astParent, me, ref)
}
}
}

private fun isImportedNamespace(name: Name, hint: Expression): Boolean {
val resolved =
scopeManager.lookupSymbolByName(
name,
language = hint.language,
location = hint.location,
startScope = hint.scope
)
var isImportedNamespace = resolved.singleOrNull() is NamespaceDeclaration
if (!isImportedNamespace) {
// It still could be an imported namespace of an imported package that we do not know.
// The problem is that we do not really know at this point whether we import a
// (sub)module or a global variable of the namespace. We tend to assume that this is a
// namespace
val imports = hint.translationUnit.imports
isImportedNamespace =
imports.any { it.name.lastPartsMatch(name) || it.name.startsWith(name) }
}
return isImportedNamespace
}

override fun cleanup() {
// Nothing to do
}
}

/**
* This utility function tries to reconstruct the name as if the expression was part of an imported
* symbol. This is needed because the [MemberExpression.name] includes the [MemberExpression.base]'s
* type instead of the name, and thus it might be "UNKNOWN".
*/
val Expression.reconstructedImportName: Name
get() {
return if (this is MemberExpression) {
this.base.reconstructedImportName.fqn(this.name.localName)
} else {
this.name
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
package de.fraunhofer.aisec.cpg.frontends.python

import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.ImportDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import jep.python.PyObject
Expand Down Expand Up @@ -473,19 +472,7 @@ class ExpressionHandler(frontend: PythonLanguageFrontend) :
private fun handleAttribute(node: Python.AST.Attribute): Expression {
var base = handle(node.value)

// We do a quick check, if this refers to an import. This is faster than doing
// this in a pass and most likely valid, since we are under the assumption that
// our current file is (more or less) complete, but we might miss some
// additional dependencies
var ref =
if (isImport(base.name)) {
// Yes, it's an import, so we need to construct a reference with an FQN
newReference(base.name.fqn(node.attr), rawNode = node)
} else {
newMemberExpression(name = node.attr, base = base, rawNode = node)
}

return ref
return newMemberExpression(name = node.attr, base = base, rawNode = node)
}

private fun handleConstant(node: Python.AST.Constant): Expression {
Expand Down Expand Up @@ -553,14 +540,6 @@ class ExpressionHandler(frontend: PythonLanguageFrontend) :
return ret
}

private fun isImport(name: Name): Boolean {
val decl =
frontend.scopeManager.currentScope
?.lookupSymbol(name.localName, replaceImports = false)
?.filterIsInstance<ImportDeclaration>()
return decl?.isNotEmpty() == true
}

private fun handleName(node: Python.AST.Name): Expression {
val r = newReference(name = node.id, rawNode = node)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class PythonLanguage :
Language<PythonLanguageFrontend>(),
HasShortCircuitOperators,
HasOperatorOverloading,
HasFunctionStyleConstruction {
HasFunctionStyleConstruction,
HasMemberExpressionAmbiguity {
override val fileExtensions = listOf("py", "pyi")
override val namespaceDelimiter = "."
@Transient
Expand Down
Loading

0 comments on commit 9a762bb

Please sign in to comment.