diff --git a/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/UnreachableEOGPass.kt b/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/UnreachableEOGPass.kt index 01ab3025e4..40c6a95e82 100644 --- a/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/UnreachableEOGPass.kt +++ b/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/UnreachableEOGPass.kt @@ -35,13 +35,14 @@ import de.fraunhofer.aisec.cpg.graph.edges.flows.EvaluationOrder import de.fraunhofer.aisec.cpg.graph.statements.IfStatement import de.fraunhofer.aisec.cpg.graph.statements.WhileStatement import de.fraunhofer.aisec.cpg.helpers.* +import de.fraunhofer.aisec.cpg.helpers.LatticeElement import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn /** * A [Pass] which uses a simple logic to determine constant values and mark unreachable code regions * by setting the [EvaluationOrder.unreachable] property to true. */ -@DependsOn(ControlFlowSensitiveDFGPass::class) +@DependsOn(ControlFlowSensitiveDFGPass::class, softDependency = true) class UnreachableEOGPass(ctx: TranslationContext) : TranslationUnitPass(ctx) { override fun cleanup() { // Nothing to do diff --git a/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/query/Query.kt b/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/query/Query.kt index 12598f020c..bebcd62644 100644 --- a/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/query/Query.kt +++ b/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/query/Query.kt @@ -219,10 +219,16 @@ fun dataFlow( from: Node, predicate: (Node) -> Boolean, collectFailedPaths: Boolean = true, - findAllPossiblePaths: Boolean = true + findAllPossiblePaths: Boolean = true, + continueAfterHit: Boolean = false ): QueryTree { val evalRes = - from.followNextFullDFGEdgesUntilHit(collectFailedPaths, findAllPossiblePaths, predicate) + from.followNextFullDFGEdgesUntilHit( + collectFailedPaths, + findAllPossiblePaths, + continueAfterHit, + predicate + ) val allPaths = evalRes.fulfilled.map { QueryTree(it) }.toMutableList() if (collectFailedPaths) allPaths.addAll(evalRes.failed.map { QueryTree(it) }) return QueryTree( diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt index 56e5b62bbf..b4221681ff 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt @@ -539,7 +539,7 @@ private constructor( registerPass() registerPass() // creates EOG registerPass() - registerPass() + // registerPass() registerPass() registerPass() registerPass() diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilder.kt index aa13ec0ded..3b88579656 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/ExpressionBuilder.kt @@ -477,6 +477,48 @@ fun MetadataProvider.newReference( return node } +/** + * Creates a new [PointerReference]. The [MetadataProvider] receiver will be used to fill different + * meta-data using [Node.applyMetadata]. Calling this extension function outside of Kotlin requires + * an appropriate [MetadataProvider], such as a [LanguageFrontend] as an additional prepended + * argument. + */ +@JvmOverloads +fun MetadataProvider.newPointerReference( + name: CharSequence?, + type: Type = unknownType(), + rawNode: Any? = null +): PointerReference { + val node = PointerReference() + node.applyMetadata(this, name, rawNode, true) + + node.type = type + + log(node) + return node +} + +/** + * Creates a new [PointerReference]. The [MetadataProvider] receiver will be used to fill different + * meta-data using [Node.applyMetadata]. Calling this extension function outside of Kotlin requires + * an appropriate [MetadataProvider], such as a [LanguageFrontend] as an additional prepended + * argument. + */ +@JvmOverloads +fun MetadataProvider.newPointerDereference( + name: CharSequence?, + type: Type = unknownType(), + rawNode: Any? = null +): PointerDereference { + val node = PointerDereference() + node.applyMetadata(this, name, rawNode, true) + + node.type = type + + log(node) + return node +} + /** * Creates a new [DeleteExpression]. The [MetadataProvider] receiver will be used to fill different * meta-data using [Node.applyMetadata]. Calling this extension function outside of Kotlin requires diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt index e4f287f6c4..6bb5e8f909 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt @@ -480,6 +480,7 @@ fun Node.followXUntilHit( x: (Node) -> List, collectFailedPaths: Boolean = true, findAllPossiblePaths: Boolean = true, + continueAfterHit: Boolean = false, predicate: (Node) -> Boolean ): FulfilledAndFailedPaths { // Looks complicated but at least it's not recursive... @@ -517,9 +518,18 @@ fun Node.followXUntilHit( } // The next node is new in the current path (i.e., there's no loop), so we add the path // with the next step to the worklist. + // For our daily dose of special magic, we check that the path reaching the next node + // differs. If the path is different, we do accept seeing the same node multiple times. + val indexedPath = + currentPath + .mapIndexed { index, node -> if (node == next) Pair(index, node) else null } + .filterNotNull() if ( - next !in currentPath && - (findAllPossiblePaths || + (indexedPath.isEmpty() || + indexedPath.all { + it.first == 0 || currentNode != currentPath[it.first - 1] + }) && + ((findAllPossiblePaths && currentPath.count { it == next } <= 2) || (next !in alreadySeenNodes && worklist.none { next in it })) ) { worklist.add(nextPath) @@ -542,12 +552,14 @@ fun Node.followXUntilHit( fun Node.followNextFullDFGEdgesUntilHit( collectFailedPaths: Boolean = true, findAllPossiblePaths: Boolean = true, + continueAfterHit: Boolean = true, predicate: (Node) -> Boolean ): FulfilledAndFailedPaths { return followXUntilHit( x = { currentNode -> currentNode.nextFullDFG }, collectFailedPaths = collectFailedPaths, findAllPossiblePaths = findAllPossiblePaths, + continueAfterHit = continueAfterHit, predicate = predicate ) } @@ -1001,8 +1013,8 @@ private fun Node.eogDistanceTo(to: Node): Int { fun Expression?.unwrapReference(): Reference? { return when { this is Reference -> this - this is UnaryOperator && (this.operatorCode == "*" || this.operatorCode == "&") -> - this.input.unwrapReference() + this is PointerReference -> this + this is PointerDereference -> this this is CastExpression -> this.expression.unwrapReference() else -> null } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/MermaidPrinter.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/MermaidPrinter.kt index 65ff85b03b..8b23d81ddd 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/MermaidPrinter.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/MermaidPrinter.kt @@ -28,6 +28,7 @@ package de.fraunhofer.aisec.cpg.graph import de.fraunhofer.aisec.cpg.graph.edges.Edge import de.fraunhofer.aisec.cpg.graph.edges.flows.Dataflow import de.fraunhofer.aisec.cpg.graph.edges.flows.PartialDataflowGranularity +import de.fraunhofer.aisec.cpg.graph.edges.flows.PointerDataflowGranularity import de.fraunhofer.aisec.cpg.helpers.identitySetOf import kotlin.reflect.KProperty1 @@ -115,6 +116,8 @@ private fun Edge.label(): String { var granularity = this.granularity if (granularity is PartialDataflowGranularity) { builder.append(" (partial, ${granularity.partialTarget?.name})") + } else if (granularity is PointerDataflowGranularity) { + builder.append(" (pointer, ${granularity.pointerTarget.name})") } else { builder.append(" (full)") } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Name.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Name.kt index c8cf38536a..bb01c6261d 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Name.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Name.kt @@ -38,7 +38,7 @@ import kotlin.uuid.Uuid */ class Name( /** The local name (sometimes also called simple name) without any namespace information. */ - val localName: String, + var localName: String, /** The parent name, e.g., the namespace this name lives in. */ val parent: Name? = null, /** A potential namespace delimiter, usually either `.` or `::`. */ diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt index 8c07a46ce2..268d341144 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Node.kt @@ -178,13 +178,13 @@ abstract class Node : /** Incoming data flow edges */ @Relationship(value = "DFG", direction = Relationship.Direction.INCOMING) - @PopulatedByPass(DFGPass::class, ControlFlowSensitiveDFGPass::class) + @PopulatedByPass(DFGPass::class, PointsToPass::class) var prevDFGEdges: Dataflows = Dataflows(this, mirrorProperty = Node::nextDFGEdges, outgoing = false) protected set /** Virtual property for accessing [prevDFGEdges] without property edges. */ - @PopulatedByPass(DFGPass::class, ControlFlowSensitiveDFGPass::class) + @PopulatedByPass(DFGPass::class, PointsToPass::class) var prevDFG by unwrapping(Node::prevDFGEdges) /** @@ -192,7 +192,7 @@ abstract class Node : * [de.fraunhofer.aisec.cpg.graph.edges.flows.FullDataflowGranularity]. */ @DoNotPersist - @PopulatedByPass(DFGPass::class, ControlFlowSensitiveDFGPass::class) + @PopulatedByPass(DFGPass::class, PointsToPass::class, ControlFlowSensitiveDFGPass::class) val prevFullDFG: List get() { return prevDFGEdges @@ -201,14 +201,14 @@ abstract class Node : } /** Outgoing data flow edges */ - @PopulatedByPass(DFGPass::class, ControlFlowSensitiveDFGPass::class) + @PopulatedByPass(DFGPass::class, PointsToPass::class) @Relationship(value = "DFG", direction = Relationship.Direction.OUTGOING) var nextDFGEdges: Dataflows = Dataflows(this, mirrorProperty = Node::prevDFGEdges, outgoing = true) protected set /** Virtual property for accessing [nextDFGEdges] without property edges. */ - @PopulatedByPass(DFGPass::class, ControlFlowSensitiveDFGPass::class) + @PopulatedByPass(DFGPass::class, PointsToPass::class) var nextDFG by unwrapping(Node::nextDFGEdges) /** @@ -216,7 +216,7 @@ abstract class Node : * [de.fraunhofer.aisec.cpg.graph.edges.flows.FullDataflowGranularity]. */ @DoNotPersist - @PopulatedByPass(DFGPass::class, ControlFlowSensitiveDFGPass::class) + @PopulatedByPass(DFGPass::class, PointsToPass::class, ControlFlowSensitiveDFGPass::class) val nextFullDFG: List get() { return nextDFGEdges.filter { it.granularity is FullDataflowGranularity }.map { it.end } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/PointerAccess.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/PointerAccess.kt new file mode 100644 index 0000000000..018af99668 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/PointerAccess.kt @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph + +/** A Dataflow for a pointer can have different types: Either to its address, or to its value. */ +enum class PointerAccess { + ADDRESS, + VALUE +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt index 10f318283f..ed78577fe3 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/TypeBuilder.kt @@ -46,7 +46,7 @@ fun LanguageProvider.autoType(): Type { } fun MetadataProvider?.incompleteType(): Type { - return IncompleteType() + return IncompleteType((this as? LanguageProvider)?.language) } /** Returns a [PointerType] that describes an array reference to the current type. */ diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/Declaration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/Declaration.kt index 76d597190e..14f32aa751 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/Declaration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/Declaration.kt @@ -27,6 +27,7 @@ package de.fraunhofer.aisec.cpg.graph.declarations import de.fraunhofer.aisec.cpg.graph.Node import de.fraunhofer.aisec.cpg.graph.scopes.Symbol +import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemoryAddress import de.fraunhofer.aisec.cpg.persistence.DoNotPersist import org.neo4j.ogm.annotation.NodeEntity @@ -46,4 +47,12 @@ abstract class Declaration : Node() { get() { return this.name.localName } + + /** + * Each Declaration allocates new memory, AKA a new address, so we create a new MemoryAddress + * node + */ + open lateinit var memoryAddress: MemoryAddress + + fun memoryAddressIsInitialized() = ::memoryAddress.isInitialized } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/FunctionDeclaration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/FunctionDeclaration.kt index d489bd4c8a..f841e4de4c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/FunctionDeclaration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/FunctionDeclaration.kt @@ -76,6 +76,15 @@ open class FunctionDeclaration : ValueDeclaration(), DeclarationHolder, EOGStart return if (isDefinition) this else field } + /** + * Saves the information on which parameter(s) of the function are modified by the function. + * This is interesting since we need to add DFG edges between the modified parameter and the + * respective argument(s). For each [ParameterDeclaration] as well as the + * [MethodDeclaration.receiver] that has some incoming DFG-edge within this + * [FunctionDeclaration], we store all previous DFG nodes. + */ + var functionSummary = mutableMapOf>>() + /** Returns true, if this function has a [body] statement. */ fun hasBody(): Boolean { return body != null diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/ParameterDeclaration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/ParameterDeclaration.kt index cd820498d0..ad2ef0689c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/ParameterDeclaration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/ParameterDeclaration.kt @@ -26,9 +26,11 @@ package de.fraunhofer.aisec.cpg.graph.declarations import de.fraunhofer.aisec.cpg.graph.HasDefault +import de.fraunhofer.aisec.cpg.graph.Name import de.fraunhofer.aisec.cpg.graph.edges.ast.astOptionalEdgeOf import de.fraunhofer.aisec.cpg.graph.edges.unwrapping import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.ParameterMemoryValue import java.util.* import org.neo4j.ogm.annotation.Relationship @@ -36,6 +38,10 @@ import org.neo4j.ogm.annotation.Relationship class ParameterDeclaration : ValueDeclaration(), HasDefault { var isVariadic = false + var memoryValue: ParameterMemoryValue = ParameterMemoryValue(Name("value")) /*.apply { + memoryAddress = this@ParameterDeclaration.memoryAddress + }*/ + @Relationship(value = "DEFAULT", direction = Relationship.Direction.OUTGOING) var defaultValueEdge = astOptionalEdgeOf() private var defaultValue by unwrapping(ParameterDeclaration::defaultValueEdge) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/VariableDeclaration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/VariableDeclaration.kt index ebf498fc1b..f8b8dd901a 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/VariableDeclaration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/VariableDeclaration.kt @@ -30,9 +30,7 @@ import de.fraunhofer.aisec.cpg.graph.edges.ast.astEdgesOf import de.fraunhofer.aisec.cpg.graph.edges.ast.astOptionalEdgeOf import de.fraunhofer.aisec.cpg.graph.edges.unwrapping import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope -import de.fraunhofer.aisec.cpg.graph.statements.expressions.ConstructExpression -import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression -import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.types.AutoType import de.fraunhofer.aisec.cpg.graph.types.HasType import de.fraunhofer.aisec.cpg.graph.types.TupleType diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edges/flows/Dataflow.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edges/flows/Dataflow.kt index 07ba1dc104..1fb3d21fb5 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edges/flows/Dataflow.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/edges/flows/Dataflow.kt @@ -27,11 +27,18 @@ package de.fraunhofer.aisec.cpg.graph.edges.flows import com.fasterxml.jackson.annotation.JsonIgnore import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.PointerAccess import de.fraunhofer.aisec.cpg.graph.declarations.* +import de.fraunhofer.aisec.cpg.graph.declarations.Declaration +import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.TupleDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration import de.fraunhofer.aisec.cpg.graph.edges.Edge import de.fraunhofer.aisec.cpg.graph.edges.collections.EdgeSet import de.fraunhofer.aisec.cpg.graph.edges.collections.MirroredEdgeCollection import de.fraunhofer.aisec.cpg.graph.statements.expressions.* +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression import de.fraunhofer.aisec.cpg.graph.types.HasType import de.fraunhofer.aisec.cpg.helpers.neo4j.DataflowGranularityConverter import kotlin.reflect.KProperty @@ -53,12 +60,21 @@ sealed interface Granularity */ data object FullDataflowGranularity : Granularity +/** + * This dataflow granularity denotes that the value or address of a pointer is flowing from + * [Dataflow.start] to [Dataflow.end]. + */ +data class PointerDataflowGranularity( + /** Does the Dataflow affect the pointer's address or its value? */ + val pointerTarget: PointerAccess +) : Granularity + /** * This dataflow granularity denotes that not the "whole" object is flowing from [Dataflow.start] to * [Dataflow.end] but only parts of it. Common examples include [MemberExpression] nodes, where we * model a dataflow to the base, but only partially scoped to a particular field. */ -class PartialDataflowGranularity( +data class PartialDataflowGranularity( /** The target that is affected by this partial dataflow. */ val partialTarget: Declaration? ) : Granularity @@ -80,6 +96,14 @@ fun partial(target: Declaration?): PartialDataflowGranularity { return PartialDataflowGranularity(target) } +/** + * Creates a new [PointerDataflowGranularity]. The [ValueAccess] is specified if the pointer's value + * is accessed, or its address. + */ +fun pointer(access: PointerAccess): PointerDataflowGranularity { + return PointerDataflowGranularity(access) +} + /** * This edge class defines a flow of data between [start] and [end]. The flow can have a certain * [granularity]. diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/ForEachStatement.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/ForEachStatement.kt index ed47af14a6..4ed2991fa6 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/ForEachStatement.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/ForEachStatement.kt @@ -49,6 +49,7 @@ class ForEachStatement : LoopStatement(), BranchingNode, StatementHolder { val end = new?.end if (end is Reference) { end.access = AccessValues.WRITE + end.dfgHandlerHint = true } } ) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/AssignExpression.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/AssignExpression.kt index 89ae115c2a..38f90de77f 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/AssignExpression.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/AssignExpression.kt @@ -33,6 +33,7 @@ import de.fraunhofer.aisec.cpg.graph.edges.unwrapping import de.fraunhofer.aisec.cpg.graph.types.HasType import de.fraunhofer.aisec.cpg.graph.types.TupleType import de.fraunhofer.aisec.cpg.graph.types.Type +import de.fraunhofer.aisec.cpg.helpers.Util import org.neo4j.ogm.annotation.Relationship import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -68,13 +69,41 @@ class AssignExpression : var base = (end as? MemberExpression)?.base as? MemberExpression while (base != null) { base.access = AccessValues.READWRITE + // TODO think about it: base.dfgHandlerHint = true base = base.base as? MemberExpression } if (isSimpleAssignment) { - (end as? Reference)?.access = AccessValues.WRITE + val unwrapped = + if (end is SubscriptExpression) end.arrayExpression + else end.unwrapReference() + + if (unwrapped is Reference) { + unwrapped.let { + it.access = AccessValues.WRITE + it.dfgHandlerHint = true + } + } } else { - (end as? Reference)?.access = AccessValues.READWRITE + val unwrapped = end.unwrapReference() + if (unwrapped is Reference) { + unwrapped.let { + it.access = AccessValues.READWRITE + it.dfgHandlerHint = true + } + } + + if (!isCompoundAssignment) { + // If this is neither a simple nor a compound assignment, probably something + // went wrong, we still model this as a READWRITE, but we indicate a warning + // to + // the user + Util.warnWithFileLocation( + this, + log, + "Assignment is neither a simple nor a compound assignment. This is suspicious." + ) + } } } ) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Expression.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Expression.kt index 409fefcfd7..b5e4cdb0ec 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Expression.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Expression.kt @@ -86,6 +86,9 @@ abstract class Expression : Statement(), HasType { informObservers(HasType.TypeObserver.ChangeType.ASSIGNED_TYPE) } + /** Each Expression also has a MemoryAddress. */ + var memoryAddress = mutableSetOf() + override fun toString(): String { return ToStringBuilder(this, TO_STRING_STYLE) .appendSuper(super.toString()) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/MemoryAddress.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/MemoryAddress.kt new file mode 100644 index 0000000000..086307ed62 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/MemoryAddress.kt @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph.statements.expressions + +import de.fraunhofer.aisec.cpg.graph.Name +import de.fraunhofer.aisec.cpg.graph.Node + +open class MemoryAddress(override var name: Name) : Node() { + /* + * When the node represents the MemoryAddress of a struct or an array, we use the fieldAddresses map to store the MemoryAddresses of the different fields. + * Therefore, for structs the key should be a FieldDeclaration. + * For arrays, it may also be a literal if the MemoryAddress is accesses with something like `array[0]` + */ + // FIXME: The FieldDeclarations don't seem to be unique. Also, for arrays, the literals in + // different lines won't be the same, so we try a string as index + val fieldAddresses = mutableMapOf>() + + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } + if (other !is MemoryAddress) { + return false + } + // TODO: What else do we need to compare? + return name == other.name && fieldAddresses == other.fieldAddresses + } + + override fun hashCode(): Int { + return super.hashCode() + } +} + +/** + * There is a value, but we cannot determine it while processing this node. We assume that this + * value will definitely be set when we really execute the code. E.g., it's set outside the + * function's context. This is used for a [ParameterDeclaration] and serves as some sort of stepping + * stone. + */ +class ParameterMemoryValue(override var name: Name) : MemoryAddress(name) { + // The ParameterMemoryValue is usually the Value of a parameter. Let's use this little helper to + // get to the parameter's address + var memoryAddress: Node? = null +} + +/** We don't know the value. It might be set somewhere else or not. No idea. */ +class UnknownMemoryValue(override var name: Name = Name("")) : MemoryAddress(name) {} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/PointerDereference.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/PointerDereference.kt new file mode 100644 index 0000000000..f11c453e1f --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/PointerDereference.kt @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph.statements.expressions + +import de.fraunhofer.aisec.cpg.graph.edges.ast.astEdgeOf +import de.fraunhofer.aisec.cpg.graph.edges.unwrapping +import de.fraunhofer.aisec.cpg.graph.types.HasType +import de.fraunhofer.aisec.cpg.graph.types.Type +import org.neo4j.ogm.annotation.Relationship + +/** A c-style dereference, such as *i->f. */ +open class PointerDereference : Reference() { + @Relationship("INPUT") + var inputEdge = + astEdgeOf( + of = ProblemExpression("could not parse input"), + onChanged = { old, new -> exchangeTypeObserver(old, new) } + ) + /** The expression on which the operation is applied. */ + var input by unwrapping(PointerDereference::inputEdge) + + override fun typeChanged(newType: Type, src: HasType) { + // Only accept type changes from out input + if (src != input) { + return + } + + this.type = newType.dereference() + } + + override fun assignedTypeChanged(assignedTypes: Set, src: HasType) { + // Only accept type changes from out input + if (src != input) { + return + } + + // Apply our operator to all assigned types and forward them to us + this.addAssignedTypes(assignedTypes.map(Type::dereference).toSet()) + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/PointerReference.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/PointerReference.kt new file mode 100644 index 0000000000..17cd5c3425 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/PointerReference.kt @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.graph.statements.expressions + +import de.fraunhofer.aisec.cpg.graph.edges.ast.astEdgeOf +import de.fraunhofer.aisec.cpg.graph.edges.unwrapping +import de.fraunhofer.aisec.cpg.graph.pointer +import de.fraunhofer.aisec.cpg.graph.types.HasType +import de.fraunhofer.aisec.cpg.graph.types.Type +import org.neo4j.ogm.annotation.Relationship + +/** A c-style reference, such as &i. */ +open class PointerReference : Reference() { + @Relationship("INPUT") + var inputEdge = + astEdgeOf( + of = ProblemExpression("could not parse input"), + onChanged = { old, new -> exchangeTypeObserver(old, new) } + ) + /** The expression on which the operation is applied. */ + var input by unwrapping(PointerReference::inputEdge) + + override fun typeChanged(newType: Type, src: HasType) { + // Only accept type changes from out input + if (src != input) { + return + } + + this.type = newType.pointer() + } + + override fun assignedTypeChanged(assignedTypes: Set, src: HasType) { + // Only accept type changes from out input + if (src != input) { + return + } + + // Apply our operator to all assigned types and forward them to us + this.addAssignedTypes(assignedTypes.map { it.pointer() }.toSet()) + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Reference.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Reference.kt index 3ed5399018..e887534d4b 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Reference.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/Reference.kt @@ -96,6 +96,13 @@ open class Reference : Expression(), HasType.TypeObserver, HasAliases { var access = AccessValues.READ var isStaticAccess = false + /** + * Is this reference used in the [AssignExpression.lhs] or [UnaryOperator.input] or + * [ForEachStatement.variable] which has a dedicated handling in the + * [ControlFlowSensitiveDFGPass]? + */ + var dfgHandlerHint = false + /** * This is a MAJOR workaround needed to resolve function pointers, until we properly re-design * the call resolver. When this [Reference] contains a function pointer reference that is @@ -134,6 +141,18 @@ open class Reference : Expression(), HasType.TypeObserver, HasAliases { } override fun assignedTypeChanged(assignedTypes: Set, src: HasType) { + // Alias are broken. Do not trust type updates from them + if (this.aliases.isNotEmpty()) { + if (src is HasAliases && this.aliases.contains(src)) { + return + } + + val decl = (src as? Reference)?.refersTo + if (decl is HasAliases && this.aliases.contains(decl)) { + return + } + } + // Make sure that the update comes from our declaration, if we change our assigned types if (src == refersTo) { // Set our type diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/UnaryOperator.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/UnaryOperator.kt index 3bb1931a57..6a573f169c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/UnaryOperator.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/expressions/UnaryOperator.kt @@ -25,19 +25,17 @@ */ package de.fraunhofer.aisec.cpg.graph.statements.expressions -import de.fraunhofer.aisec.cpg.graph.AccessValues -import de.fraunhofer.aisec.cpg.graph.ArgumentHolder -import de.fraunhofer.aisec.cpg.graph.HasOverloadedOperation +import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.edges.ast.astEdgeOf import de.fraunhofer.aisec.cpg.graph.edges.unwrapping -import de.fraunhofer.aisec.cpg.graph.pointer import de.fraunhofer.aisec.cpg.graph.types.HasType import de.fraunhofer.aisec.cpg.graph.types.Type import org.apache.commons.lang3.builder.ToStringBuilder import org.neo4j.ogm.annotation.Relationship /** A unary operator expression, involving one expression and an operator, such as `a++`. */ -class UnaryOperator : Expression(), HasOverloadedOperation, ArgumentHolder, HasType.TypeObserver { +class UnaryOperator : + Expression(), HasOverloadedOperation, ArgumentHolder, HasType.TypeObserver, HasAliases { @Relationship("INPUT") var inputEdge = astEdgeOf( @@ -77,6 +75,7 @@ class UnaryOperator : Expression(), HasOverloadedOperation, ArgumentHolder, HasT var access = AccessValues.READ if (operatorCode == "++" || operatorCode == "--") { access = AccessValues.READWRITE + (input as? Reference)?.dfgHandlerHint = true } if (input is Reference) { (input as? Reference)?.access = access @@ -141,6 +140,9 @@ class UnaryOperator : Expression(), HasOverloadedOperation, ArgumentHolder, HasT return false } + override var aliases = + mutableSetOf() // TODO can this be removed? also HasAliases interface + override fun hasArgument(expression: Expression): Boolean { return this.input == expression } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/OrderingHelpers.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/OrderingHelpers.kt new file mode 100644 index 0000000000..927f48def7 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/OrderingHelpers.kt @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.helpers + +import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.callees +import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration +import de.fraunhofer.aisec.cpg.passes.Pass.Companion.log +import java.util.IdentityHashMap +import kotlin.collections.component1 +import kotlin.collections.component2 +import kotlin.collections.flatMap +import kotlin.collections.iterator + +/** + * Add all functions in [dependencies] which do not have a dependency (i.e., the value of the + * respective entry is empty) to the [orderedList] since all of their dependencies are fulfilled. We + * also delete the entries from the [dependencies]. + */ +fun addFunctionsWithoutDependency( + orderedList: MutableList, + dependencies: IdentityHashMap> +) { + // All functions which do not have a dependency will never get one. + // We already remove them to save a bit of time in the subsequent really slow part... + // We also do this multiple times to have the next part as small as possible because it has + // a much higher performance penalty + while (dependencies.isNotEmpty() && dependencies.any { (_, v) -> v.isEmpty() }) { + // We always try to find functions without any (unsatisfied) dependencies. That's + // obviously the best scenario because we can be sure that all prerequisites have been + // fulfilled. + val nextFunctions = dependencies.filterValues { it.isEmpty() }.keys + + nextFunctions.forEach { nextFunction -> + // It's no longer needed in the map + dependencies.remove(nextFunction) + // It's no longer an unsatisfied dependency. + dependencies.forEach { (_, v) -> v.remove(nextFunction) } + } + orderedList.addAll(nextFunctions.sortedBy { it.name }) + } +} + +/** + * Maps a function to its callees which won't have been analyzed yet and thus represents an + * unsatisfied dependency. Whenever we add a function to the "orderedList" (which indicates in which + * order to analyze the functions), we remove them from the dependencies (as a key but also in the + * value of other functions' dependencies because we will definitely analyze it before). + */ +fun prepareCallGraph( + functions: Iterable +): IdentityHashMap> { + val functionCalleesMap = IdentityHashMap(functions.associateWith { it.callees.toIdentitySet() }) + + var functionCallersMap = + IdentityHashMap>() + + for ((k, v) in functionCalleesMap) { + v.forEach { callee -> + functionCallersMap.computeIfAbsent(callee) { identitySetOf() }.add(k) + } + } + + return IdentityHashMap( + mutableMapOf( + *functionCalleesMap.map { (k, v) -> Pair(k, v.toIdentitySet()) }.toTypedArray() + ) + ) +} + +/** + * Analyzes the call graph to identify an ordering for analyzing the [eogStarters] in which the + * dependencies (in terms of required function calls which could affect the currently analyzed + * function) are hopefully resolved most of the time. Here, a function f1 depends on function f2 + * exist if f1 calls f2. This might be unsuitable for other analyses. + */ +fun orderEOGStartersBasedOnDependencies(eogStarters: Iterable): List { + val functions = eogStarters.filterIsInstance() + val noFunction = eogStarters.subtract(functions) + + // Maps a function to its callees which won't have been analyzed yet and thus represents an + // unsatisfied dependency. Whenever we add a function to the "orderedList" (which indicates + // in which order to analyze the functions), we remove them from the dependencies (as a key + // but also in the value of other functions' dependencies because we will definitely analyze + // it before). + val dependencies = prepareCallGraph(functions) + + val orderedList = mutableListOf() + + addFunctionsWithoutDependency(orderedList, dependencies) + + // All remaining nodes still have some unfulfilled dependencies. We make some heuristics + // based on how many dependencies we cannot fulfill. We therefore first collect all + // transitive dependencies. + val changed = dependencies.keys.toMutableList() + + while (changed.isNotEmpty()) { + val k = changed.removeFirst() + val additionalValues = + dependencies[k] + ?.flatMap { dependencies.computeIfAbsent(it, ::identitySetOf) } + ?.toIdentitySet() ?: identitySetOf() + if (dependencies.computeIfAbsent(k, ::identitySetOf).addAll(additionalValues)) { + changed.addAll(dependencies.filterValues { k in it }.keys.filter { it !in changed }) + } + } + dependencies.forEach { (k, v) -> v.remove(k) } + + log.info("Ordering all functions according to their dependencies") + + while (dependencies.isNotEmpty()) { + // We always try to find functions without any (unsatisfied) dependencies. That's + // obviously the best scenario because we can be sure that all prerequisites have + // been fulfilled. + var nextFunctions = dependencies.filterValues { it.isEmpty() }.keys + if (nextFunctions.isEmpty()) { + // Each function has at least one dependency :( Then, we pick the function with the + // smallest number of missing dependencies. + // TODO: A more sophisticated approach could improve the results here. E.g. least + // dependencies but used in most other functions. + val mappedEntries = dependencies.entries.map { Pair(it, it.value.size) } + val minimum = mappedEntries.minOf { it.second } + nextFunctions = + identitySetOf( + mappedEntries + .filter { it.second == minimum } + .minBy { it.first.key.name } + .first + .key + ) + } + + nextFunctions.forEach { nextFunction -> + // It's no longer needed in the map + dependencies.remove(nextFunction) + // It's no longer an unsatisfied dependency. + dependencies.forEach { (_, v) -> v.remove(nextFunction) } + } + orderedList.addAll(nextFunctions.sortedBy { it.name }) + } + + // We add all things which are not a function declaration to the end because they won't be + // called at a specific point in time (we hope) + orderedList.addAll(noFunction) + return orderedList +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/functional/BasicLattices.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/functional/BasicLattices.kt new file mode 100644 index 0000000000..402221500c --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/functional/BasicLattices.kt @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.helpers.functional + +import de.fraunhofer.aisec.cpg.helpers.toIdentitySet +import kotlin.Pair +import kotlin.collections.component1 +import kotlin.collections.component2 +import kotlin.collections.toMap +import kotlin.hashCode + +/** + * A complete lattice is an ordered structure of values of type [T]. [T] could be anything, e.g., a + * set, a new data structure (like a range), or anything else. [T] depends on the analysis and + * typically has to abstract the value for the specific purpose. + * + * This class is actually used to hold individual instances of the lattice's elements and to compute + * bigger elements depending on these two elements. + * + * Implementations of this class have to implement the comparator, the least upper bound of two + * lattices. + */ +abstract class LatticeElement(val elements: T) : Comparable> { + /** + * Computes the least upper bound of this lattice and [other]. It returns a new object and does + * not modify either of the objects. + */ + abstract fun lub(other: LatticeElement): LatticeElement + + /** Duplicates the object, i.e., makes a deep copy. */ + abstract fun duplicate(): LatticeElement +} + +typealias PowersetLatticeT = LatticeElement> + +inline fun emptyPowersetLattice() = PowersetLattice(setOf()) + +/** + * Implements the [LatticeElement] for a lattice over a set of nodes. The lattice itself is + * constructed by the powerset. + */ +class PowersetLattice(elements: Set) : LatticeElement>(elements) { + override fun lub(other: LatticeElement>) = + PowersetLattice(this.elements.union(other.elements)) + + override fun duplicate(): LatticeElement> = + PowersetLattice(this.elements.toIdentitySet()) + + override fun compareTo(other: LatticeElement>): Int { + return if (this.elements == other.elements) { + 0 + } else if (this.elements.containsAll(other.elements)) { + 1 + } else { + -1 + } + } + + override fun equals(other: Any?): Boolean { + return other is PowersetLattice && this.elements == other.elements + } + + override fun hashCode(): Int { + return super.hashCode() * 31 + elements.hashCode() + } +} + +typealias MapLatticeT = LatticeElement> + +inline fun emptyMapLattice() = MapLattice>(mapOf()) + +/** Implements the [LatticeElement] for a lattice over a map of nodes to another lattice. */ +class MapLattice(elements: Map>) : + LatticeElement>>(elements) { + override fun lub( + other: LatticeElement>> + ): LatticeElement>> { + val allKeys = other.elements.keys.union(this.elements.keys) + val newMap = + allKeys.fold(mutableMapOf>()) { current, key -> + val otherValue = other.elements[key] + val thisValue = this.elements[key] + val newValue = + if (thisValue != null && otherValue != null) { + thisValue.lub(otherValue) + } else if (thisValue != null) { + thisValue + } else otherValue + newValue?.let { current[key] = it } + current + } + return MapLattice(newMap) + } + + override fun duplicate(): LatticeElement>> { + return MapLattice( + this.elements.map { (k, v) -> Pair>(k, v.duplicate()) }.toMap() + ) + } + + override fun compareTo(other: LatticeElement>>): Int { + if (this.elements.entries == other.elements.entries) return 0 + if ( + this.elements.keys.containsAll(other.elements.keys) && + this.elements.entries.all { (k, v) -> + other.elements[k]?.let { otherV -> v >= otherV } != false + } + ) + return 1 + return -1 + } + + override fun equals(other: Any?): Boolean { + return other is MapLattice && this.elements.entries == other.elements.entries + } + + override fun hashCode(): Int { + return super.hashCode() * 31 + elements.hashCode() + } +} + +open class TupleLattice(elements: Pair, LatticeElement>) : + LatticeElement, LatticeElement>>(elements) { + override fun lub( + other: LatticeElement, LatticeElement>> + ): LatticeElement, LatticeElement>> { + return TupleLattice( + Pair( + this.elements.first.lub(other.elements.first), + this.elements.second.lub(other.elements.second) + ) + ) + } + + override fun duplicate(): LatticeElement, LatticeElement>> { + return TupleLattice(Pair(elements.first.duplicate(), elements.second.duplicate())) + } + + override fun compareTo(other: LatticeElement, LatticeElement>>): Int { + if ( + this.elements.first == other.elements.first && + this.elements.second == other.elements.second + ) + return 0 + if ( + this.elements.first >= other.elements.first && + this.elements.second >= other.elements.second + ) + return 1 + return -1 + } + + override fun equals(other: Any?): Boolean { + if (other !is TupleLattice) return false + return other.elements.first == this.elements.first && + other.elements.second == this.elements.second + } + + override fun hashCode(): Int { + return super.hashCode() * 31 + elements.hashCode() + } + + operator fun component1() = this.elements.first + + operator fun component2() = this.elements.second +} + +class TripleLattice( + elements: Triple, LatticeElement, LatticeElement> +) : LatticeElement, LatticeElement, LatticeElement>>(elements) { + override fun lub( + other: LatticeElement, LatticeElement, LatticeElement>> + ): LatticeElement, LatticeElement, LatticeElement>> { + return TripleLattice( + Triple( + this.elements.first.lub(other.elements.first), + this.elements.second.lub(other.elements.second), + this.elements.third.lub(other.elements.third) + ) + ) + } + + override fun duplicate(): + LatticeElement, LatticeElement, LatticeElement>> { + return TripleLattice( + Triple( + elements.first.duplicate(), + elements.second.duplicate(), + elements.third.duplicate() + ) + ) + } + + override fun compareTo( + other: LatticeElement, LatticeElement, LatticeElement>> + ): Int { + if ( + this.elements.first == other.elements.first && + this.elements.second == other.elements.second && + this.elements.third == other.elements.third + ) + return 0 + if ( + this.elements.first >= other.elements.first && + this.elements.second >= other.elements.second && + this.elements.third >= other.elements.third + ) + return 1 + return -1 + } + + override fun equals(other: Any?): Boolean { + if (other !is TripleLattice) return false + return other.elements.first == this.elements.first && + other.elements.second == this.elements.second && + other.elements.third == this.elements.third + } + + override fun hashCode(): Int { + return super.hashCode() * 31 + elements.hashCode() + } + + operator fun component1() = this.elements.first + + operator fun component2() = this.elements.second + + operator fun component3() = this.elements.third +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/functional/EOGWorklist.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/functional/EOGWorklist.kt new file mode 100644 index 0000000000..263a6d46ca --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/helpers/functional/EOGWorklist.kt @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.helpers.functional + +import de.fraunhofer.aisec.cpg.graph.edges.flows.EvaluationOrder +import java.util.IdentityHashMap + +inline fun iterateEOGClean( + startEdges: List, + startState: LatticeElement, + transformation: (EvaluationOrder, LatticeElement) -> LatticeElement +): LatticeElement { + val globalState = IdentityHashMap>() + for (startEdge in startEdges) { + globalState[startEdge] = startState + } + val edgesList = mutableListOf() + startEdges.forEach { edgesList.add(it) } + + while (edgesList.isNotEmpty()) { + val nextEdge = edgesList.first() + edgesList.removeFirst() + + val nextGlobal = globalState[nextEdge] ?: continue + val newState = transformation(nextEdge, nextGlobal) + nextEdge.end.nextEOGEdges.forEach { + val oldGlobalIt = globalState[it] + val newGlobalIt = oldGlobalIt?.let { newState.lub(it) } ?: newState + globalState[it] = newGlobalIt + if (it !in edgesList && (oldGlobalIt == null || newGlobalIt != oldGlobalIt)) + edgesList.add(0, it) + } + } + + return globalState.values.fold(globalState.values.firstOrNull()) { state, value -> + state?.lub(value) + } ?: startState +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlDependenceGraphPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlDependenceGraphPass.kt index c8177d627d..43e1a2860c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlDependenceGraphPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlDependenceGraphPass.kt @@ -38,8 +38,10 @@ import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement import de.fraunhofer.aisec.cpg.graph.statements.expressions.ConditionalExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.ShortCircuitOperator import de.fraunhofer.aisec.cpg.helpers.* +import de.fraunhofer.aisec.cpg.helpers.LatticeElement import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn import java.util.* +import kotlin.system.measureTimeMillis /** This pass builds the Control Dependence Graph (CDG) by iterating through the EOG. */ @DependsOn(EvaluationOrderGraphPass::class) @@ -83,6 +85,7 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : EOGStarterPass( ) return } + log.info("[CDG] Analyzing function ${startNode.name}. Complexity: $c") // Maps nodes to their "cdg parent" (i.e. the dominator) and also has the information // through which path it is reached. If all outgoing paths of the node's dominator result in @@ -91,8 +94,11 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : EOGStarterPass( val identityMap = IdentityHashMap>() identityMap[startNode] = identitySetOf(startNode) startState.push(startNode, PrevEOGLattice(identityMap)) - val finalState = iterateEOG(startNode.nextEOGEdges, startState, ::handleEdge) ?: return - + var finalState: State>> + val executionTime = measureTimeMillis { + finalState = iterateEOG(startNode.nextEOGEdges, startState, ::handleEdge) ?: return + } + log.info("[CDG] iterated EOG for ${startNode.name}. Time: $executionTime") val branchingNodeConditionals = getBranchingNodeConditions(startNode) // Collect the information, identify merge points, etc. This is not really efficient yet :( @@ -126,11 +132,13 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : EOGStarterPass( } } } - val alreadySeen = mutableSetOf>>() + val alreadySeen = mutableSetOf() while (dominatorsList.isNotEmpty()) { val (k, v) = dominatorsList.removeFirst() - alreadySeen.add(Pair(k, v)) + if (!alreadySeen.add(Pair(k, v).hashCode())) { + continue + } if (k != startNode && v.containsAll(branchingNodeConditionals[k] ?: setOf())) { // We are reachable from all the branches of a branching node. Add this parent // to the worklist or update an existing entry. Also consider already existing @@ -150,16 +158,14 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : EOGStarterPass( val update = entry.second.addAll(newV) if ( update && - alreadySeen.none { - it.first == entry.first && - it.second.containsAll(entry.second) - } - ) + Pair(entry.first, entry.second).hashCode() !in alreadySeen + ) { dominatorsList.add(entry) - else finalDominators.add(entry) + } else finalDominators.add(entry) } alreadySeen.none { - it.first == newK && it.second.containsAll(newV) + // it.first == newK && it.second == newV + it == Pair(newK, newV.toMutableSet()).hashCode() } -> { // We don't have an entry yet => add a new one val newEntry = Pair(newK, newV.toMutableSet()) @@ -182,6 +188,7 @@ open class ControlDependenceGraphPass(ctx: TranslationContext) : EOGStarterPass( // We have all the dominators of this node and potentially traversed the graph // "upwards". Add the CDG edges + // log.info("[CDG] iterating through the finalDomniators") finalDominators .filter { (k, _) -> k != node } .forEach { (k, v) -> diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt index e9b06fc1ab..924d144364 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ControlFlowSensitiveDFGPass.kt @@ -29,12 +29,12 @@ import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.edges.Edge -import de.fraunhofer.aisec.cpg.graph.edges.flows.CallingContext import de.fraunhofer.aisec.cpg.graph.edges.flows.CallingContextOut -import de.fraunhofer.aisec.cpg.graph.edges.flows.partial import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.helpers.* +import de.fraunhofer.aisec.cpg.helpers.LatticeElement +import de.fraunhofer.aisec.cpg.helpers.PowersetLattice import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn import kotlin.collections.set import kotlin.contracts.ExperimentalContracts @@ -56,7 +56,8 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass * [Statement.cyclomaticComplexity]) a [FunctionDeclaration] must have in order to be * considered. */ - var maxComplexity: Int? = null + var maxComplexity: Int? = null, + val parallel: Boolean = true ) : PassConfiguration() override fun cleanup() { @@ -65,7 +66,7 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass /** We perform the actions for each [FunctionDeclaration]. */ override fun accept(node: Node) { - // For now, we only execute this for function declarations, we will support all EOG starters + /* // For now, we only execute this for function declarations, we will support all EOG starters // in the future. if (node !is FunctionDeclaration) { return @@ -88,7 +89,7 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass log.trace("Handling {} (complexity: {})", node.name, c) - clearFlowsOfVariableDeclarations(node) + // clearFlowsOfVariableDeclarations(node) val startState = DFGPassState>() startState.declarationsState.push(node, PowersetLattice(identitySetOf())) @@ -122,32 +123,39 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass } } else { value.elements.forEach { + val edgePropertyMapElement = edgePropertiesMap[Triple(it, key, true)] if ((it is VariableDeclaration || it is ParameterDeclaration) && key == it) { // Nothing to do - } else if ( - Pair(it, key) in edgePropertiesMap && - edgePropertiesMap[Pair(it, key)] is CallingContext - ) { + } else if (edgePropertyMapElement is CallingContext) { key.prevDFGEdges.addContextSensitive( it, - callingContext = (edgePropertiesMap[Pair(it, key)] as CallingContext) + callingContext = edgePropertyMapElement ) + } else if (edgePropertyMapElement is PointerDataflowGranularity) { + key.prevDFGEdges += + Dataflow(start = it, end = key, granularity = edgePropertyMapElement) } else { - key.prevDFGEdges += it + key.prevDFGEdges += + Dataflow( + start = it, + end = key, + ) // TODO: seriously think about this and re-write the api } } } - } + }*/ } /** - * Checks if there's an entry in [edgePropertiesMap] with key `(x, null)` where `x` is in [from] - * and, if so, adds an entry with key `(x, to)` and the same value + * Checks if there's an entry in [edgePropertiesMap] with key `(x, null, )` where `x` is in + * [from] and, if so, adds an entry with key `(x, to, true)` and the same value */ - protected fun findAndSetProperties(from: Set, to: Node) { + protected open fun findAndSetProperties(from: Set, to: Node) { edgePropertiesMap - .filter { it.key.first in from && it.key.second == null } - .forEach { edgePropertiesMap[Pair(it.key.first, to)] = it.value } + .filter { entry -> + entry.key.first in from && (to as? Reference)?.refersTo == entry.key.second + } + .forEach { edgePropertiesMap[Triple(it.key.first, to, true)] = it.value } } /** @@ -185,6 +193,7 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass state: State>, worklist: Worklist, Node, Set> ): State> { + log.debug("In transfer") // We will set this if we write to a variable val writtenDeclaration: Declaration? val currentNode = currentEdge.end @@ -292,11 +301,31 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass // correct mapping, we use the "assignments" property which already searches for us. currentNode.assignments.forEach { assignment -> // This was the last write to the respective declaration. - (assignment.target as? Declaration ?: (assignment.target as? Reference)?.refersTo) - ?.let { - doubleState.declarationsState[it] = - PowersetLattice(identitySetOf(assignment.target as Node)) + val declPair: Pair? = + if (assignment.target is Declaration) + Pair(assignment.target as Declaration, assignment.target) + else { + val unwrappedTarget = (assignment.target as? Expression).unwrapReference() + if (unwrappedTarget is Reference) { + if (assignment.target is SubscriptExpression) { + val subscriptExpression = assignment.target as? SubscriptExpression + val unwrappedBufTarget = + subscriptExpression?.arrayExpression?.unwrapReference() + if (unwrappedBufTarget is Reference) { + unwrappedBufTarget.refersTo?.let { Pair(it, assignment.target) } + } else null + } else if (unwrappedTarget?.refersTo == null) { + null + } else { + Pair(unwrappedTarget.refersTo!!, unwrappedTarget) + } + } else null + } + declPair?.let { (decl, target) -> + if (target != null) { + doubleState.declarationsState[decl] = PowersetLattice(identitySetOf(target)) } + } } } else if (isIncOrDec(currentNode)) { // Increment or decrement => Add the prevWrite of the input to the input. After the @@ -321,23 +350,27 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass // The write operation goes to the variable in the lhs val lhs = currentNode.lhs.singleOrNull() - writtenDeclaration = (lhs as? Reference)?.refersTo - - if (writtenDeclaration != null) { - val prev = doubleState.declarationsState[writtenDeclaration] - findAndSetProperties(prev?.elements ?: setOf(), currentNode) - // Data flows from the last writes to the lhs variable to this node - state.push(lhs, prev) - - // The whole current node is the place of the last update, not (only) the lhs! - doubleState.declarationsState[writtenDeclaration] = - PowersetLattice(identitySetOf(lhs)) + val lhsref = lhs.unwrapReference() + if (lhsref is Reference) { + writtenDeclaration = lhsref.refersTo + + if (writtenDeclaration != null && lhs != null) { + val prev = doubleState.declarationsState[writtenDeclaration] + findAndSetProperties(prev?.elements ?: setOf(), currentNode) + // Data flows from the last writes to the lhs variable to this node + state.push(lhs, prev) + + // The whole current node is the place of the last update, not (only) the lhs! + doubleState.declarationsState[writtenDeclaration] = + PowersetLattice(identitySetOf(lhs)) + } } } else if ( (currentNode as? Reference)?.access == AccessValues.READ && (currentNode.refersTo is VariableDeclaration || currentNode.refersTo is ParameterDeclaration) && - currentNode.refersTo !is FieldDeclaration + currentNode.refersTo !is FieldDeclaration && + (currentNode.refersTo as? VariableDeclaration)?.isGlobal != true ) { // We can only find a change if there's a state for the variable doubleState.declarationsState[currentNode.refersTo]?.let { @@ -361,6 +394,29 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass // the other steps state.push(currentNode, it) } + } else if ( + (currentNode as? Reference)?.access == AccessValues.READWRITE && + !currentNode.dfgHandlerHint + ) { + /* This branch collects all READWRITE accesses which are not handled separately as compoundAssignment or inc/dec unary operation. This could for example be a pointer passed to an unknown function which is modified in this function but other things are also possible. */ + // We can only find a change if there's a state for the variable + doubleState.declarationsState[currentNode.refersTo]?.let { + // We only read the variable => Get previous write which have been collected in + // the other steps + state.push(currentNode, it) + } + // We read and write to the variable => Update the declarationState accordingly because + // there was probably some other kind of DFG edge into the reference + doubleState.declarationsState[currentNode.refersTo] = + PowersetLattice(identitySetOf(currentNode)) + } else if ( + (currentNode as? Reference)?.access == AccessValues.WRITE && !currentNode.dfgHandlerHint + ) { + /* Also here, we want/have to filter out variables in ForEachStatements because this must be handled separately. */ + // We write to the variable => Update the declarationState accordingly because + // there was probably some other kind of DFG edge into the reference + doubleState.declarationsState[currentNode.refersTo] = + PowersetLattice(identitySetOf(currentNode)) } else if (currentNode is ComprehensionExpression) { val writtenTo = when (val variable = currentNode.variable) { @@ -369,7 +425,7 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass variable.singleDeclaration } else { log.error( - "Cannot handle multiple declarations in the ComprehensionExpresdsion: Node $currentNode" + "Cannot handle multiple declarations in the ComprehensionExpression: Node $currentNode" ) null } @@ -414,7 +470,7 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass null } } - else -> currentNode.variable + else -> variable } // We wrote something to this variable declaration @@ -487,7 +543,11 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass } doubleState.declarationsState[arg?.refersTo] = PowersetLattice(identitySetOf(param)) - edgePropertiesMap[Pair(param, null)] = CallingContextOut(currentNode) + + if (arg != null) { + edgePropertiesMap[Triple(param, arg.refersTo, false)] = + CallingContextOut(currentNode) + } } } } else { @@ -511,10 +571,11 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass * state. This is for example the case to identify if the resulting edge will receive a * context-sensitivity label (i.e., if the node used as key is somehow inside the called * function and the next usage happens inside the function under analysis right now). The key of - * an entry works as follows: The 1st item in the pair is the prevDFG of the 2nd item. If the - * 2nd item is null, it's obviously not relevant. Ultimately, it will be 2nd -prevDFG-> 1st. + * an entry works as follows: The 1st item in the triple is the prevDFG of the 2nd item. If the + * 2nd item is null, it's obviously not relevant. Ultimately, it will be 2nd -prevDFG-> 1st. If + * the third item is false, we also don't consider it. */ - val edgePropertiesMap = mutableMapOf, Any>() + val edgePropertiesMap = mutableMapOf, Any>() /** * Checks if the node performs an operation and an assignment at the same time e.g. with the @@ -563,16 +624,16 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass */ protected class DFGPassState( /** - * A mapping of a [Node] to its [LatticeElement]. The keys of this state will later get the - * DFG edges from the value! + * A mapping of a [Node] to its [de.fraunhofer.aisec.cpg.helpers.functional.LatticeElement]. + * The keys of this state will later get the DFG edges from the value! */ var generalState: State = State(), /** * It's main purpose is to store the most recent mapping of a [Declaration] to its - * [LatticeElement]. However, it is also used to figure out if we have to continue with the - * iteration (something in the declarationState has changed) which is why we store all nodes - * here. However, since we never use them except from determining if we changed something, - * it won't affect the result. + * [de.fraunhofer.aisec.cpg.helpers.functional.LatticeElement]. However, it is also used to + * figure out if we have to continue with the iteration (something in the declarationState + * has changed) which is why we store all nodes here. However, since we never use them + * except from determining if we changed something, it won't affect the result. */ var declarationsState: State = State(), @@ -615,10 +676,7 @@ open class ControlFlowSensitiveDFGPass(ctx: TranslationContext) : EOGStarterPass } /** Pushes the [newNode] and its [newLatticeElement] to the [declarationsState]. */ - fun pushToDeclarationsState( - newNode: Declaration, - newLatticeElement: LatticeElement? - ): Boolean { + fun pushToDeclarationsState(newNode: Node, newLatticeElement: LatticeElement?): Boolean { return declarationsState.push(newNode, newLatticeElement) } } @@ -667,6 +725,7 @@ fun Node.objectIdentifier(): Int? { return when (this) { is MemberExpression -> this.objectIdentifier() is Reference -> this.objectIdentifier() + is UnaryOperator -> this.objectIdentifier() is Declaration -> this.hashCode() else -> null } @@ -691,3 +750,18 @@ fun MemberExpression.objectIdentifier(): Int? { fun Reference.objectIdentifier(): Int? { return this.refersTo?.hashCode() } + +/** Implements [Node.objectIdentifier] for a [UnaryOperator]. */ +fun UnaryOperator.objectIdentifier(): Int? { + val op = this.operatorCode + return if (op == null) { + null + } else { + val inputIdentifier = input.objectIdentifier() + if (inputIdentifier != null) { + op.hashCode() + inputIdentifier + } else { + null + } + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt index 49b5028ff0..3ce5656bed 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/DFGPass.kt @@ -29,6 +29,7 @@ import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.edges.flows.CallingContextOut +import de.fraunhofer.aisec.cpg.graph.edges.flows.Dataflow import de.fraunhofer.aisec.cpg.graph.edges.flows.partial import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.* @@ -69,8 +70,7 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { private fun connectInferredCallArguments(functionSummaries: DFGFunctionSummaries) { for (call in callsInferredFunctions) { for (invoked in call.invokes.filter { it.isInferred }) { - val changedParams = - functionSummaries.functionToChangedParameters[invoked] ?: mapOf() + val changedParams = invoked.functionSummary for ((param, _) in changedParams) { if (param == (invoked as? MethodDeclaration)?.receiver) { (call as? MemberCallExpression) @@ -84,7 +84,9 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { callingContext = CallingContextOut(call) ) (arg as? Reference)?.let { - it.access = AccessValues.READWRITE + // The access value stays on READ. Even if it's a pointer, only the + // dereference will be written. + // it.access = AccessValues.READWRITE it.refersTo?.let { it1 -> it.nextDFGEdges += it1 } } } @@ -115,11 +117,10 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { is SubscriptExpression -> handleSubscriptExpression(node) is ConditionalExpression -> handleConditionalExpression(node) is MemberExpression -> handleMemberExpression(node) - is Reference -> handleReference(node) + // The ControlFlowSensitiveDFGPass will draw the DFG Edges for these + // is Reference -> handleReference(node) is ExpressionList -> handleExpressionList(node) is NewExpression -> handleNewExpression(node) - // We keep the logic for the InitializerListExpression in that class because the - // performance would decrease too much. is InitializerListExpression -> handleInitializerListExpression(node) is KeyValueExpression -> handleKeyValueExpression(node) is LambdaExpression -> handleLambdaExpression(node) @@ -176,7 +177,7 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { } protected fun handleAssignExpression(node: AssignExpression) { - // If this is a compound assign, we also need to model a dataflow to the node itself + /* // If this is a compound assign, we also need to model a dataflow to the node itself if (node.isCompoundAssignment) { node.lhs.firstOrNull()?.let { node.prevDFGEdges += it @@ -187,7 +188,7 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { // Find all targets of rhs and connect them node.rhs.forEach { val targets = node.findTargets(it) - targets.forEach { target -> it.nextDFGEdges += target } + targets.forEach { target -> it.nextDFGEdges += Dataflow(start = it, end = target) } } } @@ -195,7 +196,7 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { // rhs to the node itself if (node.usedAsExpression) { node.expressionValue?.nextDFGEdges += node - } + }*/ } /** @@ -365,14 +366,17 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { * case of the operators "++" and "--" also from the node back to the input. */ protected fun handleUnaryOperator(node: UnaryOperator) { - node.input.let { - node.prevDFGEdges += it - if (node.operatorCode == "++" || node.operatorCode == "--") { - node.nextDFGEdges += it + if ((node.input as? Reference)?.access == AccessValues.WRITE) { + node.input.let { node.nextDFGEdges += it } + } else { + node.input.let { + node.prevDFGEdges += it + if (node.operatorCode == "++" || node.operatorCode == "--") { + node.nextDFGEdges += it + } } } } - /** * Adds the DFG edge for a [LambdaExpression]. The data flow from the function representing the * lambda to the expression. @@ -423,10 +427,10 @@ class DFGPass(ctx: TranslationContext) : ComponentPass(ctx) { node.refersTo?.let { when (node.access) { AccessValues.WRITE -> node.nextDFGEdges += it - AccessValues.READ -> node.prevDFGEdges += it + AccessValues.READ -> node.prevDFGEdges += Dataflow(start = it, end = node) else -> { node.nextDFGEdges += it - node.prevDFGEdges += it + node.prevDFGEdges += Dataflow(start = it, end = node) } } } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt index 6b043a39d2..827dac1151 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/EvaluationOrderGraphPass.kt @@ -397,6 +397,8 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa is Literal<*> -> handleDefault(node) is DefaultStatement -> handleDefault(node) is TypeIdExpression -> handleDefault(node) + is PointerDereference -> handlePointerDereference(node) + is PointerReference -> handlePointerReference(node) is Reference -> handleDefault(node) // These nodes are not added to the EOG is IncludeDeclaration -> doNothing() @@ -404,6 +406,18 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa } } + protected fun handlePointerReference(node: PointerReference) { + handleEOG(node.input) + + attachToEOG(node) + } + + protected fun handlePointerDereference(node: PointerDereference) { + handleEOG(node.input) + + attachToEOG(node) + } + /** * Default handler for nodes. The node is simply attached to the EOG and the ast subtree is * ignored. @@ -613,7 +627,7 @@ open class EvaluationOrderGraphPass(ctx: TranslationContext) : TranslationUnitPa // Handle left hand side(s) first node.lhs.forEach { handleEOG(it) } - // Then the right side(s). Avoid creating the EOG twice if it's already part of the + // Then, handle the right side(s). Avoid creating the EOG twice if it's already part of the // initializer of a declaration node.rhs.forEach { if (it !in node.declarations.map { decl -> decl.initializer }) { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/Pass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/Pass.kt index ee91444425..067636338c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/Pass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/Pass.kt @@ -35,6 +35,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration import de.fraunhofer.aisec.cpg.graph.scopes.Scope import de.fraunhofer.aisec.cpg.helpers.Benchmark import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.ScopedWalker +import de.fraunhofer.aisec.cpg.helpers.orderEOGStartersBasedOnDependencies import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn import de.fraunhofer.aisec.cpg.passes.configuration.ExecuteBefore import de.fraunhofer.aisec.cpg.passes.configuration.ExecuteFirst @@ -78,7 +79,8 @@ abstract class TranslationUnitPass(ctx: TranslationContext) : Pass(ctx) +abstract class EOGStarterPass(ctx: TranslationContext, val orderDependencies: Boolean = false) : + Pass(ctx) open class PassConfiguration {} @@ -286,7 +288,11 @@ fun executePass( consumeTargets( (prototype as EOGStarterPass)::class, ctx, - result.allEOGStarters, + if (prototype.orderDependencies) { + orderEOGStartersBasedOnDependencies(result.allEOGStarters) + } else { + result.allEOGStarters + }, executedFrontends ) } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PointsToPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PointsToPass.kt new file mode 100644 index 0000000000..30d0de3a3d --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PointsToPass.kt @@ -0,0 +1,891 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.passes + +import de.fraunhofer.aisec.cpg.TranslationContext +import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.declarations.* +import de.fraunhofer.aisec.cpg.graph.edges.Edge +import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.* +import de.fraunhofer.aisec.cpg.helpers.IdentitySet +import de.fraunhofer.aisec.cpg.helpers.functional.* +import de.fraunhofer.aisec.cpg.helpers.identitySetOf +import de.fraunhofer.aisec.cpg.helpers.toIdentitySet +import de.fraunhofer.aisec.cpg.passes.ControlFlowSensitiveDFGPass.Configuration +import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn + +@DependsOn(SymbolResolver::class) +@DependsOn(EvaluationOrderGraphPass::class) +@DependsOn(DFGPass::class) +class PointsToPass(ctx: TranslationContext) : EOGStarterPass(ctx, orderDependencies = true) { + + // For recursive creation of FunctionSummaries, we have to make sure that we don't run in + // circles. + // Therefore, we store the chain of FunctionDeclarations we currently analyse + val functionSummaryAnalysisChain = mutableSetOf() + + override fun cleanup() { + // Nothing to do + } + + override fun accept(node: Node) { + functionSummaryAnalysisChain.clear() + return acceptInternal(node) + } + + fun acceptInternal(node: Node) { + // For now, we only execute this for function declarations, we will support all EOG starters + // in the future. + if (node !is FunctionDeclaration) { + return + } + // If the node already has a function summary, we have visited it before and can + // return here. + if ( + node.functionSummary.isNotEmpty() && + node.functionSummary.keys.any { it in node.parameters || it in node.returns } + ) { + return + } + + // Skip empty functions + if (node.body == null) { + return + } + + // Calculate the complexity of the function and see, if it exceeds our threshold + val max = passConfig()?.maxComplexity + val c = node.body?.cyclomaticComplexity ?: 0 + if (max != null && c > max) { + log.info( + "Ignoring function ${node.name} because its complexity (${c}) is greater than the configured maximum (${max})" + ) + return + } + + log.trace("Handling {} (complexity: {})", node.name, c) + + var startState = PointsToState2() + startState = + startState.pushToDeclarationsState( + node, + TupleLattice( + Pair(PowersetLattice(identitySetOf()), PowersetLattice(identitySetOf())) + ) + ) + + startState = initializeParameters(node.parameters, startState) + + val finalState = iterateEOGClean(node.nextEOGEdges, startState, ::transfer) + if (finalState !is PointsToState2) return + + /* Store function summary for this FunctionDeclaration. */ + storeFunctionSummary(node, finalState) + + for ((key, value) in finalState.generalState.elements) { + // All nodes in the state get new memoryValues, Expressions and Declarations + // additionally get new MemoryAddresses + val newMemoryValues = value.elements.second.elements + val newMemoryAddresses = value.elements.first.elements as Collection + if (newMemoryValues.isNotEmpty()) { + key.prevDFG.clear() + key.prevDFG.addAll(newMemoryValues) + } + if (newMemoryAddresses.isNotEmpty()) { + when (key) { + is Expression -> { + key.memoryAddress.clear() + key.memoryAddress.addAll(newMemoryAddresses) + } + is Declaration -> { + if ( + newMemoryAddresses.size == 1 && + newMemoryAddresses.first() is MemoryAddress + ) + key.memoryAddress = newMemoryAddresses.first() as MemoryAddress + } + } + } + } + } + + private fun storeFunctionSummary(node: FunctionDeclaration, doubleState: PointsToState2) { + node.parameters.forEach { param -> + val addresses = doubleState.getAddresses(param) + val indexes = mutableSetOf() + addresses.forEach { addr -> + indexes.addAll(doubleState.getValues(addr)) + // Additionally check for partial writes to fields + if (addr is MemoryAddress) { + addr.fieldAddresses.flatMap { it.value }.forEach { indexes.add(it) } + } + // TODO: Check for writes to deref-derefs + } + // Also check for writes to the ParameterMemoryValue of the deref in case of + // pointer-to-pointers + indexes.addAll(doubleState.getValues(param.memoryValue)) + indexes.forEach { index -> + val finalValue = + doubleState.declarationsState.elements + .filter { it.key == index } + .entries + .firstOrNull() + ?.value + ?.elements + ?.second + ?.elements + finalValue + // See if we can find something that is different from the initial value + ?.filter { + !(it is ParameterMemoryValue && + it.name.localName.contains("derefvalue") && + it.name.parent == param.name) + } + // If so, store the last write for the parameter in the FunctionSummary + ?.forEach { value -> + // TODO: To we also map the writes of pointer-to-pointer to the param or + // should we do something else? + node.functionSummary + .computeIfAbsent(param) { mutableSetOf() } + .add(Pair(value, true)) + } + } + } + } + + protected fun transfer(currentEdge: Edge, state: LatticeElement<*>): PointsToState2 { + val currentNode = currentEdge.end + + var doubleState = state as PointsToState2 + + // Used to keep iterating for steps which do not modify the alias-state otherwise + doubleState = + doubleState.pushToDeclarationsState( + currentNode, + doubleState.getFromDecl(currentEdge.end) + ?: TupleLattice(Pair(emptyPowersetLattice(), emptyPowersetLattice())) + ) + + doubleState = + when (currentNode) { + is Declaration, + is MemoryAddress -> handleDeclaration(currentNode, doubleState) + is AssignExpression -> handleAssignExpression(currentNode, doubleState) + is UnaryOperator -> handleUnaryOperator(currentNode, doubleState) + is CallExpression -> handleCallExpression(currentNode, doubleState) + is Expression -> handleExpression(currentNode, doubleState) + is ReturnStatement -> handleReturnStatement(currentNode, doubleState) + else -> doubleState + } + + return doubleState + } + + private fun handleReturnStatement( + currentNode: ReturnStatement, + doubleState: PointsToPass.PointsToState2 + ): PointsToPass.PointsToState2 { + /* For Return Statements, all we really want to do is to collect their return values + to add them to the FunctionSummary */ + var doubleState = doubleState + if (currentNode.returnValues.isNotEmpty()) { + val parentFD = + currentNode.firstParentOrNull { it is FunctionDeclaration } as? FunctionDeclaration + if (parentFD != null) { + currentNode.returnValues.forEach { retval -> + parentFD.functionSummary + .computeIfAbsent(currentNode) { mutableSetOf() } + .addAll(doubleState.getValues(retval).map { Pair(it, false) }) + } + } + } + return doubleState + } + + private fun handleCallExpression( + currentNode: CallExpression, + doubleState: PointsToPass.PointsToState2 + ): PointsToPass.PointsToState2 { + var doubleState = doubleState + + val mapDstToSrc = mutableMapOf>() + + // First, check if there are missing FunctionSummaries + currentNode.invokes.forEach { invoke -> + if (invoke.functionSummary.isEmpty()) { + if (invoke.hasBody()) { + log.debug("functionSummaryAnalysisChain: {}", functionSummaryAnalysisChain) + if (invoke !in functionSummaryAnalysisChain) { + val summaryCopy = functionSummaryAnalysisChain.toSet() + functionSummaryAnalysisChain.add(invoke) + acceptInternal(invoke) + functionSummaryAnalysisChain.clear() + functionSummaryAnalysisChain.addAll(summaryCopy) + } else { + log.error( + "Cannot calculate functionSummary for $invoke as it's recursively called. callChain: $functionSummaryAnalysisChain" + ) + val newValues: MutableSet> = + invoke.parameters.map { Pair(it, false) }.toMutableSet() + invoke.functionSummary[ReturnStatement()] = newValues + } + } else { + // Add a dummy function summary so that we don't try this every time + // In this dummy, all parameters point to the return + // TODO: This actually generates a new return statement but it's not part of the + // function. Wouldn't the edges better point to the FunctionDeclaration and in a + // case with a body, all returns flow to the FunctionDeclaration too? + val newValues: MutableSet> = + invoke.parameters.map { Pair(it, false) }.toMutableSet() + invoke.functionSummary[ReturnStatement()] = newValues + } + } + currentNode.arguments.forEach { arg -> + if (arg.argumentIndex < invoke.parameters.size) { + // Create a DFG-Edge from the argument to the parameter's memoryValue + val p = invoke.parameters[arg.argumentIndex] + doubleState = + doubleState.push( + p.memoryValue, + TupleLattice( + Pair( + PowersetLattice(identitySetOf(p.memoryValue)), + PowersetLattice(identitySetOf(arg)) + ) + ) + ) + } + } + + for ((param, newValues) in invoke.functionSummary) { + val destination = + when (param) { + is ParameterDeclaration -> + // Dereference the parameter + if (param.argumentIndex < currentNode.arguments.size) { + currentNode.arguments[param.argumentIndex] + } else null + is ReturnStatement -> currentNode + else -> null + } + if (destination != null) { + val sources = mutableSetOf() + newValues.forEach { (value, derefSource) -> + when (value) { + is ParameterDeclaration -> + // Add the value of the respective argument in the CallExpression + // Only dereference the parameter when we stored that in the + // functionSummary + if (value.argumentIndex < currentNode.arguments.size) { + if (derefSource) { + doubleState + .getValues(currentNode.arguments[value.argumentIndex]) + .forEach { + sources.addAll( + doubleState.fetchElementFromDeclarationState(it) + ) + } + } else { + sources.add(currentNode.arguments[value.argumentIndex]) + } + } + is ParameterMemoryValue -> { + // In case the FunctionSummary says that we have to use the + // dereferenced value here, we look up the argument, dereference it, + // and then add it to the sources + if (value.name.localName == "derefvalue") { + val p = + currentNode.invokes + .flatMap { it.parameters } + .filter { it.name == value.name.parent } + p.forEach { + if (it.argumentIndex < currentNode.arguments.size) { + val arg = currentNode.arguments[it.argumentIndex] + sources.addAll( + doubleState.getValues(arg).flatMap { + doubleState.getValues(it) + } + ) + } + } + } + } + else -> sources.add(value) + } + } + if (sources.isNotEmpty()) { + /* destination.forEach { dst -> + mapDstToSrc.computeIfAbsent(dst) { mutableSetOf() } += sources + }*/ + mapDstToSrc.computeIfAbsent(destination) { mutableSetOf() } += sources + } + // } + } + } + } + + mapDstToSrc.forEach { (dst, src) -> + // If the values of the destination are the same as the destination (e.g. if dst is a + // CallExpression), we also add destinations to update the generalState, otherwise, the + // destinationAddresses for the DeclarationState are enough + val dstValues = doubleState.getValues(dst) + if (dstValues.all { it == dst }) + doubleState = doubleState.updateValues(src, dstValues, dstValues) + else doubleState = doubleState.updateValues(src, identitySetOf(), dstValues) + } + // } + + return doubleState + } + + private fun handleUnaryOperator( + currentNode: UnaryOperator, + doubleState: PointsToPass.PointsToState2 + ): PointsToPass.PointsToState2 { + var doubleState = doubleState + /* For UnaryOperators, we have to update the value if it's a ++ or -- operator + */ + // TODO: Check out cases where the input is no Reference + if (currentNode.operatorCode in (listOf("++", "--")) && currentNode.input is Reference) { + val addresses = doubleState.getAddresses(currentNode) + val newDeclState = doubleState.declarationsState.elements.toMutableMap() + /* Update the declarationState for the refersTo */ + doubleState.getAddresses(currentNode.input).forEach { addr -> + newDeclState.replace( + addr, + TupleLattice( + Pair( + PowersetLattice(addresses), + PowersetLattice(identitySetOf(currentNode)) + ) + ) + ) + } + // TODO: Should we already update the input's value in the generalState, or is it + // enough at the next use? + doubleState = PointsToState2(doubleState.generalState, MapLattice(newDeclState)) + } + return doubleState + } + + private fun handleAssignExpression( + currentNode: AssignExpression, + doubleState: PointsToPass.PointsToState2 + ): PointsToPass.PointsToState2 { + var doubleState = doubleState + /* For AssignExpressions, we update the value of the rhs with the lhs + * In C(++), both the lhs and the rhs should only have one element + */ + if (currentNode.lhs.size == 1 && currentNode.rhs.size == 1) { + val sources = currentNode.rhs.flatMap { doubleState.getValues(it) }.toIdentitySet() + val destinations = currentNode.lhs.map { it }.toIdentitySet().toIdentitySet() + val destinationsAddresses = + destinations.flatMap { doubleState.getAddresses(it) }.toIdentitySet() + doubleState = doubleState.updateValues(sources, destinations, destinationsAddresses) + } + + return doubleState + } + + private fun handleExpression( + currentNode: Expression, + doubleState: PointsToPass.PointsToState2 + ): PointsToPass.PointsToState2 { + var doubleState = doubleState + /* If we have an Expression that is written to, we handle it later and ignore it now */ + val access = + if (currentNode is Reference) currentNode.access + else if (currentNode is SubscriptExpression && currentNode.arrayExpression is Reference) + (currentNode.arrayExpression as Reference).access + else null + if (access == AccessValues.READ) { + val addresses = doubleState.getAddresses(currentNode) + val values = doubleState.getValues(currentNode) + + doubleState = + doubleState.push( + currentNode, + TupleLattice(Pair(PowersetLattice(addresses), PowersetLattice(values))) + ) + } + return doubleState + } + + private fun handleDeclaration( + currentNode: Node, + doubleState: PointsToPass.PointsToState2 + ): PointsToState2 { + /* No need to set the address, this already happens in the constructor */ + val addresses = doubleState.getAddresses(currentNode) + + val values = identitySetOf() + + (currentNode as? HasInitializer)?.initializer?.let { initializer -> + if (initializer is Literal<*>) values.add(initializer) + else values.addAll(doubleState.getValues(initializer)) + } + + var doubleState = + doubleState.push( + currentNode, + TupleLattice(Pair(PowersetLattice(addresses), PowersetLattice(values))) + ) + /* In the DeclarationsState, we save the address which we wrote to the value for easier work with pointers + * */ + addresses.forEach { addr -> + doubleState = + doubleState.pushToDeclarationsState( + addr, + TupleLattice(Pair(PowersetLattice(addresses), PowersetLattice(values))) + ) + } + return doubleState + } + + private fun initializeParameters( + parameters: MutableList, + doubleState: PointsToState2 + ): PointsToState2 { + var doubleState = doubleState + parameters.forEach { param -> + val addresses = doubleState.getAddresses(param) + param.memoryValue.name = Name("value", param.name) + // Since the ParameterDeclaration is never change, we map the ParameterMemoryValue to + // the same address + // TODO: It may be nicer to use the ParameterDeclaration itself, that could maybe also + // work + param.memoryValue.memoryAddress = param.memoryAddress + val paramState: LatticeElement, PowersetLatticeT>> = + TupleLattice( + Pair( + PowersetLattice(addresses), + PowersetLattice(identitySetOf(param.memoryValue)) + ) + ) + // We also need to track the MemoryValue of the dereference of the parameter, since that + // is what would have an influence outside the function + val paramDeref = ParameterMemoryValue(Name("derefvalue", param.name)) + paramDeref.memoryAddress = param.memoryValue + val paramDerefState: + LatticeElement, PowersetLatticeT>> = + TupleLattice( + Pair( + PowersetLattice(identitySetOf(param.memoryValue)), + PowersetLattice(identitySetOf(paramDeref)) + ) + ) + addresses.forEach { addr -> + doubleState = doubleState.pushToDeclarationsState(addr, paramState) + } + doubleState = doubleState.pushToDeclarationsState(param.memoryValue, paramDerefState) + + doubleState = doubleState.push(param, paramState) + + // In case the param is a pointer-to-pointer, we also need a dereference of the + // dereference + val paramDerefDeref = ParameterMemoryValue(Name("derefderefvalue", param.name)) + paramDerefDeref.memoryAddress = paramDeref + val paramDerefDerefState: + LatticeElement, PowersetLatticeT>> = + TupleLattice( + Pair( + PowersetLattice(identitySetOf(paramDeref)), + PowersetLattice(identitySetOf(paramDerefDeref)) + ) + ) + doubleState = doubleState.pushToDeclarationsState(paramDeref, paramDerefDerefState) + } + return doubleState + } + + protected class PointsToState2( + generalState: + LatticeElement< + Map< + Node, LatticeElement>, LatticeElement>>> + > + > = + MapLattice(mutableMapOf()), + declarationsState: + LatticeElement< + Map< + Node, LatticeElement>, LatticeElement>>> + > + > = + MapLattice(mutableMapOf()) + ) : + TupleLattice< + Map>, LatticeElement>>>>, + Map>, LatticeElement>>>> + >(Pair(generalState, declarationsState)) { + override fun lub( + other: + LatticeElement< + Pair< + MapLatticeT< + Node, + LatticeElement< + Pair>, LatticeElement>> + > + >, + MapLatticeT< + Node, + LatticeElement< + Pair>, LatticeElement>> + > + > + > + > + ) = + PointsToState2( + this.generalState.lub(other.elements.first), + this.elements.second.lub(other.elements.second) + ) + + override fun duplicate() = + PointsToState2(elements.first.duplicate(), elements.second.duplicate()) + + val generalState: + MapLatticeT< + Node, LatticeElement>, LatticeElement>>> + > + get() = this.elements.first + + val declarationsState: + MapLatticeT< + Node, LatticeElement>, LatticeElement>>> + > + get() = this.elements.second + + fun get( + key: Node + ): LatticeElement>, LatticeElement>>>? { + return this.generalState.elements[key] ?: this.declarationsState.elements[key] + } + + fun getFromDecl( + key: Node + ): LatticeElement>, LatticeElement>>>? { + return this.declarationsState.elements[key] + } + + fun push( + newNode: Node, + newLatticeElement: + LatticeElement>, LatticeElement>>> + ): PointsToState2 { + val newGeneralState = + this.generalState.lub(MapLattice(mutableMapOf(Pair(newNode, newLatticeElement)))) + return PointsToState2(newGeneralState, declarationsState) + } + + /** Pushes the [newNode] and its [newLatticeElement] to the [declarationsState]. */ + fun pushToDeclarationsState( + newNode: Node, + newLatticeElement: + LatticeElement>, LatticeElement>>> + ): PointsToState2 { + val newDeclarationsState = + this.declarationsState.lub( + MapLattice(mutableMapOf(Pair(newNode, newLatticeElement))) + ) + return PointsToState2(generalState, newDeclarationsState) + } + + override fun equals(other: Any?): Boolean { + if (other !is PointsToState2) return false + return other.elements.first == this.elements.first && + other.elements.second == this.elements.second + } + + /* Fetch the entry for `node` from the DeclarationState. If there isn't any, create + an UnknownMemoryValue + */ + fun fetchElementFromDeclarationState( + node: Node, + // useAddress: Boolean = false + ): IdentitySet { + val elements = + // if (useAddress) + // this.declarationsState.elements[node]?.elements?.first?.elements + /*else*/ this.declarationsState.elements[node]?.elements?.second?.elements + if (elements.isNullOrEmpty()) { + val newName = if (node is Literal<*>) Name(node.value.toString()) else node.name + val newEntry = identitySetOf(UnknownMemoryValue(newName)) + (this.declarationsState.elements + as? + MutableMap< + Node, + LatticeElement< + Pair>, LatticeElement>> + > + >) + ?.computeIfAbsent(node) { + TupleLattice( + Pair(PowersetLattice(identitySetOf(node)), PowersetLattice(newEntry)) + ) + } + val newElements = + /*if (useAddress) this.declarationsState.elements[node]?.elements?.first?.elements + else*/ this.declarationsState.elements[node] + ?.elements + ?.second + ?.elements + (newElements as? IdentitySet)?.addAll(newEntry) + return newEntry + } else return elements.toIdentitySet() + } + + fun getValues(node: Node): Set { + return when (node) { + is PointerReference -> { + /* For PointerReferences, the value is the address of the input + * For example, the value of `&i` is the address of `i` + * */ + this.getAddresses(node.input) + } + is PointerDereference -> { + /* To find the value for PointerDereferences, we first check what's the current value of the input, which is probably a MemoryAddress + * Then we look up the current value at this MemoryAddress + */ + val inputVal = + when (node.input) { + is Reference -> this.getValues(node.input) + else -> // TODO: How can we handle other cases? + identitySetOf(UnknownMemoryValue(node.name)) + } + val retVal = identitySetOf() + inputVal.forEach { retVal.addAll(fetchElementFromDeclarationState(it)) } + retVal + } + is Declaration -> { + /* For Declarations, we have to look up the last value written to it. + */ + if (!node.memoryAddressIsInitialized()) + node.memoryAddress = MemoryAddress(node.name) + fetchElementFromDeclarationState(node) + } + is MemoryAddress -> { + fetchElementFromDeclarationState(node) + } + is Reference -> { + /* For References, we have to look up the last value written to its declaration. + */ + this.getAddresses(node).flatMap { this.getValues(it) }.toIdentitySet() + } + is CastExpression -> { + this.getValues(node.expression) + } + is UnaryOperator -> this.getValues(node.input) + is SubscriptExpression -> + this.getAddresses(node).flatMap { this.getValues(it) }.toIdentitySet() + is CallExpression -> { + identitySetOf(node) + // Let's see if we have a functionSummary for the CallExpression + /*val functionDeclaration = + node.invokes.firstOrNull() + ?: return identitySetOf(UnknownMemoryValue(node.name)) + val functionSummaries = node.ctx?.config?.functionSummaries + if ( + functionSummaries?.hasSummary(functionDeclaration) == true && + // Also check that we don't just have a dummy Summary + node.ctx + ?.config + ?.functionSummaries + ?.getLastWrites(functionDeclaration) + ?.isNotEmpty() == true + ) { + // Get all the ReturnValues from the Summary and return their values + val retVals = + node.ctx?.config?.functionSummaries?.getLastWrites(functionDeclaration) + if (retVals != null) { + val r = identitySetOf() + for ((param, values) in retVals) { + if (param is ReturnStatement) { + values.forEach { (v, derefSource) -> + if (v is ParameterDeclaration) { + if (derefSource) { + this.getValues(node.arguments[v.argumentIndex]) + .forEach { r.addAll(this.getValues(it)) } + } else if (v.argumentIndex < node.arguments.size) { + r.add(node.arguments[v.argumentIndex]) + } + } else r.add(v) + } + } + } + r + } else identitySetOf(UnknownMemoryValue(node.name)) + } else identitySetOf(UnknownMemoryValue(node.name))*/ + } + /*is BinaryOperator -> identitySetOf(node)*/ + /* In these cases, we simply have to fetch the current value for the MemoryAddress from the DeclarationState */ + else -> /*fetchElementFromDeclarationState(node)*/ identitySetOf(node) + } + } + + fun getAddresses(node: Node): Set { + return when (node) { + is Declaration -> { + /* + * For declarations, we created a new MemoryAddress node, so that's the one we use here + */ + if (!node.memoryAddressIsInitialized()) + node.memoryAddress = MemoryAddress(node.name) + identitySetOf(node.memoryAddress) + } + is ParameterMemoryValue -> { + if (node.memoryAddress != null) identitySetOf(node.memoryAddress!!) + else identitySetOf() + } + is MemoryAddress -> { + TODO() + // fetchElementFromDeclarationState(node, useAddress = true) + identitySetOf(node) + } + is PointerReference -> { + identitySetOf() + } + is PointerDereference -> { + /* + PointerDereferences have as address the value of their input. + For example, the address of `*a` is the value of `a` + */ + this.getValues(node.input) + } + is MemberExpression -> { + /* + * For MemberExpressions, the fieldAddresses in the MemoryAddress node of the base hold the information we are looking for + */ + // TODO: Are there any cases where the address of the base is no MemoryAddress? + // but still relevant for us? + getFieldAddresses( + this.getAddresses(node.base).filterIsInstance(), + /*node.refersTo?.name.toString(),*/ + node.name + ) + } + is Reference -> { + /* + For references, the address is the same as for the declaration, AKA the refersTo + */ + node.refersTo?.let { refersTo -> + /* In some cases, the refersTo might not yet have an initialized MemoryAddress, for example if it's a FunctionDeclaration. So let's to this here */ + if (!refersTo.memoryAddressIsInitialized()) + refersTo.memoryAddress = MemoryAddress(node.name) + + identitySetOf(refersTo.memoryAddress) + } ?: identitySetOf() + } + is CastExpression -> { + /* + For CastExpressions we take the expression as the cast itself does not have any impact on the address + */ + this.getAddresses(node.expression) + } + is SubscriptExpression -> { + val localName = + if (node.subscriptExpression is Literal<*>) + (node.subscriptExpression as? Literal<*>)?.value.toString() + else node.subscriptExpression.name.toString() + getFieldAddresses( + this.getAddresses(node.base).filterIsInstance(), + Name(localName, node.arrayExpression.name) + ) + } + else -> identitySetOf(node) + } + } + + /* + * Look up the `indexString` in the `baseAddress`es and return the fieldAddresses + * If no MemoryAddress exits at `indexString`, it will be created + */ + fun getFieldAddresses(baseAddresses: List, nodeName: Name): Set { + val fieldAddresses = identitySetOf() + + /* Theoretically, the base can have multiple addresses. Additionally, also the fieldDeclaration can have multiple Addresses. To simplify, we flatten the set and collect all possible addresses of the fieldDeclaration in a flat set */ + baseAddresses.forEach { addr -> + addr.fieldAddresses[nodeName.localName]?.forEach { fieldAddresses.add(it) } + } + /* If we do not yet have a MemoryAddress for this FieldDeclaration, we create one */ + if (fieldAddresses.isEmpty()) { + val newMemoryAddress = MemoryAddress(nodeName) + + fieldAddresses.add(newMemoryAddress) + baseAddresses.forEach { addr -> + addr.fieldAddresses[nodeName.localName] = identitySetOf(newMemoryAddress) + } + } + return fieldAddresses + } + + /** + * Updates the declarationState at `destinationAddresses` to the values in `sources`. + * Additionally updates the generalstate at `destinations` if there is any + */ + fun updateValues( + sources: Set, + destinations: Set, + destinationAddresses: Set + ): PointsToState2 { + // val addresses = destinations.flatMap { this.getAddresses(it) }.toIdentitySet() + val newDeclState = this.declarationsState.elements.toMutableMap() + val newGenState = this.generalState.elements.toMutableMap() + /* Update the declarationState for the address */ + destinationAddresses.forEach { addr -> + newDeclState[addr] = + TupleLattice( + Pair(PowersetLattice(destinationAddresses), PowersetLattice(sources)) + ) + } + /* Also update the generalState for dst (if we have any destinations) */ + destinations.forEach { d -> + newGenState[d] = + TupleLattice( + Pair(PowersetLattice(destinationAddresses), PowersetLattice(sources)) + ) + } + var doubleState = PointsToState2(MapLattice(newGenState), MapLattice(newDeclState)) + + /* When we are dealing with SubscriptExpression, we also have to initialise the arrayExpression + , since that hasn't been done yet */ + destinations.filterIsInstance().forEach { d -> + val AEaddresses = this.getAddresses(d.arrayExpression) + val AEvalues = this.getValues(d.arrayExpression) + + doubleState = + doubleState.push( + d.arrayExpression, + TupleLattice(Pair(PowersetLattice(AEaddresses), PowersetLattice(AEvalues))) + ) + } + + return doubleState + } + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPass.kt index a848bd51b5..c7b6dba3f1 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/ProgramDependenceGraphPass.kt @@ -109,7 +109,9 @@ class ProgramDependenceGraphPass(ctx: TranslationContext) : TranslationUnitPass( while (worklist.isNotEmpty()) { val currentStatus = worklist.removeFirst() - alreadySeenNodes.add(currentStatus) + if (!alreadySeenNodes.add(currentStatus)) { + continue + } val nextEOG = currentStatus.nextEOG.filter { it != through } if (nextEOG.isEmpty()) { // This path always flows through "through" or has not seen "to", so we're good diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index d463d64090..4cff97c62b 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -400,7 +400,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return member } - protected fun handle(node: Node?, currClass: RecordDeclaration?) { + protected open fun handle(node: Node?, currClass: RecordDeclaration?) { when (node) { is MemberExpression -> handleMemberExpression(currClass, node) is Reference -> handleReference(currClass, node) @@ -410,7 +410,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - protected fun handleCallExpression(call: CallExpression) { + protected open fun handleCallExpression(call: CallExpression) { // Some local variables for easier smart casting val callee = call.callee val language = call.language @@ -487,7 +487,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { when (result.success) { PROBLEMATIC -> { log.error( - "Resolution of ${call.name} returned an problematic result and we cannot decide correctly, the invokes edge will contain all possible viable functions" + "Resolution of ${call.name} returned a problematic result and we cannot decide correctly, the invokes edge will contain all possible viable functions" ) call.invokes = result.bestViable.toMutableList() } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/configuration/ReplacePass.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/configuration/ReplacePass.kt index cda13f6f59..ef659cdb38 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/configuration/ReplacePass.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/configuration/ReplacePass.kt @@ -28,6 +28,7 @@ package de.fraunhofer.aisec.cpg.passes.configuration import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.passes.EvaluationOrderGraphPass import de.fraunhofer.aisec.cpg.passes.Pass +import de.fraunhofer.aisec.cpg.passes.TranslationUnitPass import kotlin.reflect.KClass /** @@ -36,6 +37,9 @@ import kotlin.reflect.KClass * * The primary use-case for this annotation is to allow language frontends to override specific * passes, such as the [EvaluationOrderGraphPass] in order to optimize language specific graphs. + * + * Please, be careful: DO NOT register the to-be-replaced pass with registerPass. Additionally, + * currently, only a [TranslationUnitPass] can be replaced. */ @Retention(AnnotationRetention.RUNTIME) @Target(AnnotationTarget.CLASS) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/DFGFunctionSummaries.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/DFGFunctionSummaries.kt index 873aa1d873..38c89dea20 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/DFGFunctionSummaries.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/DFGFunctionSummaries.kt @@ -38,6 +38,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.parseName import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.graph.unknownType +import de.fraunhofer.aisec.cpg.helpers.identitySetOf import de.fraunhofer.aisec.cpg.matchesSignature import de.fraunhofer.aisec.cpg.tryCast import java.io.File @@ -55,21 +56,12 @@ class DFGFunctionSummaries { /** Caches a mapping of the [FunctionDeclarationEntry] to a list of its [DFGEntry]. */ val functionToDFGEntryMap = mutableMapOf>() - /** - * Saves the information on which parameter(s) of a function are modified by the function. This - * is interesting since we need to add DFG edges between the modified parameter and the - * respective argument(s). For each [ParameterDeclaration] as well as the - * [MethodDeclaration.receiver] that has some incoming DFG-edge within this - * [FunctionDeclaration], we store all previous DFG nodes. - */ - val functionToChangedParameters = - mutableMapOf>>() - fun hasSummary(functionDeclaration: FunctionDeclaration) = - functionDeclaration in functionToChangedParameters + functionDeclaration.functionSummary.isNotEmpty() - fun getLastWrites(functionDeclaration: FunctionDeclaration): Map> = - functionToChangedParameters[functionDeclaration] ?: mapOf() + fun getLastWrites( + functionDeclaration: FunctionDeclaration + ): Map>> = functionDeclaration.functionSummary /** This function returns a list of [DataflowEntry] from the specified file. */ private fun addEntriesFromFile(file: File): Map> { @@ -247,11 +239,17 @@ class DFGFunctionSummaries { dfgEntries: List ) { for (entry in dfgEntries) { + var derefSource = false val from = if (entry.from.startsWith("param")) { try { - val paramIndex = entry.from.removePrefix("param").toInt() - functionDeclaration.parameters[paramIndex] + val e = entry.from.split(".") + val paramIndex = e.getOrNull(0)?.removePrefix("param")?.toInt() + if (paramIndex != null) { + val foo = functionDeclaration.parameters.getOrNull(paramIndex) + if (e.getOrNull(1) == "address") derefSource = true + foo + } else null } catch (e: NumberFormatException) { null } @@ -263,13 +261,14 @@ class DFGFunctionSummaries { val to = if (entry.to.startsWith("param")) { try { - val paramIndex = entry.to.removePrefix("param").toInt() - val paramTo = functionDeclaration.parameters[paramIndex] - if (from != null) { - functionToChangedParameters - .computeIfAbsent(functionDeclaration) { mutableMapOf() } - .computeIfAbsent(paramTo) { mutableSetOf() } - .add(from) + val e = entry.to.split(".") + val paramIndex = e.getOrNull(0)?.removePrefix("param")?.toInt() + val paramTo = + paramIndex?.let { functionDeclaration.parameters.getOrNull(it) } + if (from != null && paramTo != null) { + functionDeclaration.functionSummary + .computeIfAbsent(paramTo) { identitySetOf>() } + .add(Pair(from, derefSource)) } paramTo } catch (e: NumberFormatException) { @@ -279,10 +278,9 @@ class DFGFunctionSummaries { val receiver = (functionDeclaration as? MethodDeclaration)?.receiver if (from != null) { if (receiver != null) { - functionToChangedParameters - .computeIfAbsent(functionDeclaration) { mutableMapOf() } - .computeIfAbsent(receiver, ::mutableSetOf) - .add(from) + functionDeclaration.functionSummary + .computeIfAbsent(receiver) { identitySetOf>() } + .add(Pair(from, derefSource)) } } receiver diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt index 74b1cbf3c6..0ddf1773a1 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt @@ -646,15 +646,23 @@ class GraphExamples { ifStmt { condition { ref("b") gt literal(0, t("int")) } thenStmt { - ref("d") assign ref("a") * literal(2, t("int")) + ref("d") assign + { + ref("a") * literal(2, t("int")) + } ref("a") assign - ref("a") + ref("d") * literal(2, t("int")) + { + ref("a") + + ref("d") * literal(2, t("int")) + } } elseIf { condition { ref("b") lt literal(-2, t("int")) } thenStmt { ref("a") assign - ref("a") - literal(10, t("int")) + { + ref("a") - literal(10, t("int")) + } } } } diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/DFGFunctionSummariesTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/DFGFunctionSummariesTest.kt index 68158c6137..36b3fb48ce 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/DFGFunctionSummariesTest.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/DFGFunctionSummariesTest.kt @@ -44,6 +44,7 @@ import de.fraunhofer.aisec.cpg.passes.* import de.fraunhofer.aisec.cpg.passes.inference.DFGFunctionSummaries import de.fraunhofer.aisec.cpg.passes.inference.startInference import java.io.File +import kotlin.test.Ignore import kotlin.test.Test import kotlin.test.assertEquals import kotlin.test.assertNotNull @@ -298,6 +299,9 @@ class DFGFunctionSummariesTest { } @Test + @Ignore( + "This test does not make sense because the DFGPass does not draw the edges between a reference to the Declaration any more, which is, however, the functionality that this test aims at." + ) fun testPropagateArgumentsControlFlowInsensitive() { // We don't use the ControlFlowSensitiveDFGPass here to check the method // DFGPass.connectInferredCallArguments diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/ShortcutsTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/ShortcutsTest.kt index c1e929a2e1..1a8e6661eb 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/ShortcutsTest.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/ShortcutsTest.kt @@ -362,13 +362,20 @@ class ShortcutsTest { fun testUnwrapReference() { with(TestLanguageFrontend()) { val a = newReference("a") + val aPtrRef = newPointerReference("a") val op = newUnaryOperator("&", prefix = true, postfix = false) op.input = a val cast = newCastExpression() cast.castType = objectType("int64") cast.expression = op - - assertEquals(a, cast.unwrapReference()) + val castPtrRef = newCastExpression() + castPtrRef.castType = objectType("int64") + castPtrRef.expression = aPtrRef + + assertNull(cast.unwrapReference()) + assertNull(op.unwrapReference()) + assertEquals(aPtrRef, aPtrRef.unwrapReference()) + assertEquals(aPtrRef, castPtrRef.unwrapReference()) } } diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/TupleDeclarationTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/TupleDeclarationTest.kt index a2bda404c4..08ab069c96 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/TupleDeclarationTest.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/graph/declarations/TupleDeclarationTest.kt @@ -34,12 +34,16 @@ import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.types.TupleType import de.fraunhofer.aisec.cpg.test.* +import kotlin.test.Ignore import kotlin.test.Test import kotlin.test.assertContains import kotlin.test.assertIs import kotlin.test.assertNotNull class TupleDeclarationTest { + @Ignore( + "This test does not make sense because the DFGPass does not draw the edges between a reference to the Declaration any more. This affects global variables." + ) @Test fun testTopLevelTuple() { with( diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontend.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontend.kt index 28b5ee5ed8..1ce57bfd17 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontend.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontend.kt @@ -572,7 +572,7 @@ open class CXXLanguageFrontend(language: Language, ctx: Tra } // void type specifier.type == IASTSimpleDeclSpecifier.t_void -> { - IncompleteType() + IncompleteType(language) } // __typeof__ type specifier.type == IASTSimpleDeclSpecifier.t_typeof -> { diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/ExpressionHandler.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/ExpressionHandler.kt index 464b7bc4a2..650649a0dd 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/ExpressionHandler.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/ExpressionHandler.kt @@ -357,9 +357,11 @@ class ExpressionHandler(lang: CXXLanguageFrontend) : return newMemberExpression( name, - base, + if (ctx.isPointerDereference) + newPointerDereference(base.name, rawNode = ctx).apply { this.input = base } + else base, unknownType(), - if (ctx.isPointerDereference) "->" else ".", + /*if (ctx.isPointerDereference) "->" else */ ".", rawNode = ctx ) } @@ -409,17 +411,33 @@ class ExpressionHandler(lang: CXXLanguageFrontend) : else -> Util.errorWithFileLocation(frontend, ctx, log, "unknown operator {}", ctx.operator) } - val unaryOperator = - newUnaryOperator( - operatorCode, - ctx.isPostfixOperator, - !ctx.isPostfixOperator, - rawNode = ctx - ) - if (input != null) { - unaryOperator.input = input + if (operatorCode == "&") { + return newPointerReference(handle(ctx.operand)?.name, unknownType(), rawNode = ctx) + .apply { + if (input != null) { + this.input = input + } + } + } else if (operatorCode == "*") { + return newPointerDereference(handle(ctx.operand)?.name, unknownType(), rawNode = ctx) + .apply { + if (input != null) { + this.input = input + } + } + } else { + val unaryOperator = + newUnaryOperator( + operatorCode, + ctx.isPostfixOperator, + !ctx.isPostfixOperator, + rawNode = ctx + ) + if (input != null) { + unaryOperator.input = input + } + return unaryOperator } - return unaryOperator } private fun handleFunctionCallExpression(ctx: IASTFunctionCallExpression): Expression { diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt index 8592f8d0f7..b018937434 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt @@ -579,12 +579,29 @@ internal class CXXLanguageFrontendTest : BaseTest() { // b = *ptr; val assign = statements[++line] as AssignExpression - val dereference = assign.rhs() + val dereference = assign.rhs() assertNotNull(dereference) - input = dereference.input - assertLocalName("ptr", input) - assertEquals("*", dereference.operatorCode) - assertTrue(dereference.isPrefix) + assertLocalName("ptr", dereference.refersTo) + + // int* c; + val cDecl = statements[++line] as DeclarationStatement + // *c = 7; + val cAssignment = statements[++line] as AssignExpression + + val cDeref = cAssignment.lhs() + assertNotNull(cDeref) + assertLocalName("c", cDeref.refersTo) + + val literal7 = cAssignment.rhs>() + assertNotNull(literal7) + assertEquals(setOf(cDeref), literal7.nextDFG) + + val cNextUsageStmt = statements[++line] as AssignExpression + val cNextUsage = cNextUsageStmt.rhs() + assertNotNull(cNextUsage) + assertEquals(setOf(cNextUsage), cDeref.nextDFG) + + // TODO: this no longer tests UnaryOperator -> move to test PointerDereference } @Test @@ -1564,11 +1581,12 @@ internal class CXXLanguageFrontendTest : BaseTest() { // We do not want any inferred functions assertTrue(tu.functions.none { it.isInferred }) - val noParamPointerCall = tu.calls("no_param").firstOrNull { it.callee is UnaryOperator } + val noParamPointerCall = + tu.calls("no_param").firstOrNull { it.callee is PointerDereference } assertInvokes(assertNotNull(noParamPointerCall), target) val noParamNoInitPointerCall = - tu.calls("no_param_uninitialized").firstOrNull { it.callee is UnaryOperator } + tu.calls("no_param_uninitialized").firstOrNull { it.callee is PointerDereference } assertInvokes(assertNotNull(noParamNoInitPointerCall), target) val noParamCall = tu.calls("no_param").firstOrNull { it.callee is Reference } diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/PointsToPassTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/PointsToPassTest.kt new file mode 100644 index 0000000000..d22cc613e8 --- /dev/null +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/passes/PointsToPassTest.kt @@ -0,0 +1,1387 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.passes + +import de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage +import de.fraunhofer.aisec.cpg.graph.allChildren +import de.fraunhofer.aisec.cpg.graph.declarations.Declaration +import de.fraunhofer.aisec.cpg.graph.declarations.ParameterDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.statements.expressions.* +import de.fraunhofer.aisec.cpg.test.analyzeAndGetFirstTU +import de.fraunhofer.aisec.cpg.test.assertLocalName +import java.io.File +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +class PointsToPassTest { + companion object { + private val topLevel = java.nio.file.Path.of("src", "test", "resources") + } + + @Test + fun testBasics() { + val file = File("src/test/resources/pointsto.cpp") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.registerPass() + it.registerFunctionSummaries(File("src/test/resources/hardcodedDFGedges.yml")) + } + assertNotNull(tu) + + // Declarations + val iDecl = + tu.allChildren { it.location?.region?.startLine == 4 }.first() + val jDecl = + tu.allChildren { it.location?.region?.startLine == 5 }.first() + val aDecl = + tu.allChildren { it.location?.region?.startLine == 6 }.first() + val bDecl = + tu.allChildren { it.location?.region?.startLine == 7 }.first() + + // Literals + val literal0 = tu.allChildren> { it.location?.region?.startLine == 4 }.first() + val literal1 = tu.allChildren> { it.location?.region?.startLine == 5 }.first() + val literal2 = tu.allChildren> { it.location?.region?.startLine == 9 }.first() + val literal3 = tu.allChildren> { it.location?.region?.startLine == 17 }.first() + + // PointerReferences + val iPointerRef = + tu.allChildren { it.location?.region?.startLine == 6 }.first() + val jPointerRef = + tu.allChildren { it.location?.region?.startLine == 15 }.first() + + // PointerDeReferences + val aPointerDerefLine12 = + tu.allChildren { it.location?.region?.startLine == 12 }.first() + val aPointerDerefLine14 = + tu.allChildren { it.location?.region?.startLine == 14 }.first() + val aPointerDerefLine16 = + tu.allChildren { it.location?.region?.startLine == 16 }.first() + val aPointerDerefLine17 = + tu.allChildren { it.location?.region?.startLine == 17 }.first() + val bPointerDerefLine18 = + tu.allChildren { it.location?.region?.startLine == 18 }.first() + + // References + val iRefLine8 = tu.allChildren { it.location?.region?.startLine == 8 }.first() + val iRefLine9 = tu.allChildren { it.location?.region?.startLine == 9 }.first() + val iRefLine10 = tu.allChildren { it.location?.region?.startLine == 10 }.first() + val iRefLine11 = tu.allChildren { it.location?.region?.startLine == 11 }.first() + val aRefLine15 = + tu.allChildren { + it.location?.region?.startLine == 15 && it.name.localName == "a" + } + .first() + + // UnaryOperators + val iUO = tu.allChildren { it.location?.region?.startLine == 13 }.first() + + // Line 4 + assertLocalName("i", iDecl.memoryAddress) + assertEquals(1, iDecl.prevDFG.size) + assertEquals(literal0, iDecl.prevDFG.first()) + + // Line 5 + assertLocalName("j", jDecl.memoryAddress) + assertEquals(1, jDecl.prevDFG.size) + assertEquals(literal1, jDecl.prevDFG.first()) + + // Line 6 + assertLocalName("a", aDecl.memoryAddress) + assertEquals(1, aDecl.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), iDecl.memoryAddress) + assertTrue(iPointerRef.memoryAddress.isEmpty()) + assertEquals(1, iPointerRef.prevDFG.size) + assertEquals(iPointerRef.prevDFG.first(), iDecl.memoryAddress) + + // Line 7 + assertLocalName("b", bDecl.memoryAddress) + assertEquals(1, bDecl.prevDFG.size) + assertEquals(iDecl.memoryAddress, bDecl.prevDFG.first()) + + // Line 8 + assertEquals(1, iRefLine8.memoryAddress.size) + assertEquals(iDecl.memoryAddress, iRefLine8.memoryAddress.first()) + assertEquals(1, iRefLine8.prevDFG.size) + assertEquals(literal0, iRefLine8.prevDFG.first()) + + // Line 9 + assertEquals(1, iRefLine9.memoryAddress.size) + assertEquals(iDecl.memoryAddress, iRefLine9.memoryAddress.first()) + assertEquals(1, iRefLine9.prevDFG.size) + assertEquals(literal2, iRefLine9.prevDFG.filterIsInstance>().first()) + + // Line 10 + assertEquals(1, iRefLine10.memoryAddress.size) + assertEquals(iDecl.memoryAddress, iRefLine10.memoryAddress.first()) + assertEquals(1, iRefLine10.prevDFG.size) + assertEquals(literal2, iRefLine10.prevDFG.first()) + + // Line 11 + assertEquals(1, iRefLine11.memoryAddress.size) + assertEquals(iDecl.memoryAddress, iRefLine11.memoryAddress.first()) + assertEquals(1, iRefLine11.prevDFG.size) + assertTrue(iRefLine11.prevDFG.filterIsInstance().isNotEmpty()) + + // Line 12 + assertEquals(1, aPointerDerefLine12.memoryAddress.size) + assertEquals(iDecl.memoryAddress, aPointerDerefLine12.memoryAddress.first()) + assertEquals(1, aPointerDerefLine12.prevDFG.size) + assertTrue(aPointerDerefLine12.prevDFG.filterIsInstance().isNotEmpty()) + + // Line 13 should only update the DeclarationState, not much here to test + // Line 14 + assertEquals(1, aPointerDerefLine14.memoryAddress.size) + assertEquals(iDecl.memoryAddress, aPointerDerefLine14.memoryAddress.first()) + assertEquals(1, aPointerDerefLine14.prevDFG.size) + assertEquals(iUO, aPointerDerefLine14.prevDFG.first()) + + // Line 15 + assertTrue(jPointerRef.memoryAddress.isEmpty()) + assertEquals(1, jPointerRef.prevDFG.size) + assertEquals(jDecl.memoryAddress, jPointerRef.prevDFG.first()) + assertEquals(1, aRefLine15.memoryAddress.size) + assertEquals(aDecl.memoryAddress, aRefLine15.memoryAddress.first()) + assertEquals(1, aRefLine15.prevDFG.size) + assertEquals(jDecl.memoryAddress, aRefLine15.prevDFG.first()) + + // Line 16 + assertEquals(1, aPointerDerefLine16.memoryAddress.size) + assertEquals(jDecl.memoryAddress, aPointerDerefLine16.memoryAddress.first()) + assertEquals(1, aPointerDerefLine16.prevDFG.size) + assertEquals(literal1, aPointerDerefLine16.prevDFG.first()) + + // Line 17 + assertEquals(1, aPointerDerefLine17.memoryAddress.size) + assertEquals(jDecl.memoryAddress, aPointerDerefLine17.memoryAddress.first()) + assertEquals(1, aPointerDerefLine17.prevDFG.size) + assertEquals(literal3, aPointerDerefLine17.prevDFG.first()) + + // Line 18 + assertEquals(1, bPointerDerefLine18.memoryAddress.size) + assertEquals(iDecl.memoryAddress, bPointerDerefLine18.memoryAddress.first()) + assertEquals(1, bPointerDerefLine18.prevDFG.size) + assertEquals(iUO, bPointerDerefLine18.prevDFG.first()) + } + + @Test + fun testConditions() { + val file = File("src/test/resources/pointsto.cpp") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.registerPass() + it.registerFunctionSummaries(File("src/test/resources/hardcodedDFGedges.yml")) + } + assertNotNull(tu) + + // Declarations + val iDecl = tu.allChildren { it.location?.region?.startLine == 22 }.first() + val jDecl = tu.allChildren { it.location?.region?.startLine == 23 }.first() + val aDecl = tu.allChildren { it.location?.region?.startLine == 24 }.first() + + // PointerDerefs + val aPointerDerefLine27 = + tu.allChildren { + it.location?.region?.startLine == 27 && it.name.localName == "a" + } + .first() + val aPointerDerefLine30 = + tu.allChildren { + it.location?.region?.startLine == 30 && it.name.localName == "a" + } + .first() + val aPointerDerefLine32 = + tu.allChildren { + it.location?.region?.startLine == 32 && it.name.localName == "a" + } + .first() + val aPointerDerefLine37 = + tu.allChildren { + it.location?.region?.startLine == 37 && it.name.localName == "a" + } + .first() + + // UnaryOperator + val iUO = tu.allChildren { it.location?.region?.startLine == 35 }.first() + + // Line 27 + assertEquals(iDecl.memoryAddress, aPointerDerefLine27.memoryAddress.firstOrNull()) + assertEquals(1, aPointerDerefLine27.prevDFG.size) + assertEquals(iDecl.prevDFG.first(), aPointerDerefLine27.prevDFG.first()) + + // Line 30 + assertEquals(jDecl.memoryAddress, aPointerDerefLine30.memoryAddress.firstOrNull()) + assertEquals(1, aPointerDerefLine30.prevDFG.size) + assertEquals(jDecl.prevDFG.first(), aPointerDerefLine30.prevDFG.first()) + + // Line 32 + assertEquals(2, aPointerDerefLine32.memoryAddress.size) + aPointerDerefLine32.memoryAddress.containsAll( + setOf(iDecl.memoryAddress, jDecl.memoryAddress) + ) + assertEquals(2, aPointerDerefLine32.prevDFG.size) + assertTrue(aPointerDerefLine32.prevDFG.contains(iDecl.prevDFG.first())) + assertTrue(aPointerDerefLine32.prevDFG.contains(jDecl.prevDFG.first())) + + // Line 37 + assertEquals(2, aPointerDerefLine37.memoryAddress.size) + assertTrue( + aPointerDerefLine37.memoryAddress.containsAll( + setOf(iDecl.memoryAddress, jDecl.memoryAddress) + ) + ) + assertEquals(3, aPointerDerefLine37.prevDFG.size) + assertTrue(aPointerDerefLine37.prevDFG.contains(iDecl.prevDFG.first())) + assertTrue(aPointerDerefLine37.prevDFG.contains(jDecl.prevDFG.first())) + assertTrue(aPointerDerefLine37.prevDFG.contains(iUO)) + } + + @Test + fun testStructs() { + val file = File("src/test/resources/pointsto.cpp") + // val file = File("/tmp/pointsto.c") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + // it.registerLanguage() + it.registerPass() + it.registerFunctionSummaries(File("src/test/resources/hardcodedDFGedges.yml")) + } + assertNotNull(tu) + + // References + val saLine51 = + tu.allChildren { it.location?.region?.startLine == 51 }.first() + val sbLine52 = + tu.allChildren { it.location?.region?.startLine == 52 }.first() + val saLine53 = + tu.allChildren { + it.location?.region?.startLine == 53 && it.name.localName == "a" + } + .first() + val sbLine53 = + tu.allChildren { + it.location?.region?.startLine == 53 && it.name.localName == "b" + } + .first() + val paLine55 = + tu.allChildren { + it.location?.region?.startLine == 55 && it.name.localName == "a" + } + .first() + val pbLine55 = + tu.allChildren { + it.location?.region?.startLine == 55 && it.name.localName == "b" + } + .first() + val paLine56 = + tu.allChildren { it.location?.region?.startLine == 56 }.first() + val pbLine57 = + tu.allChildren { it.location?.region?.startLine == 57 }.first() + val paLine59 = + tu.allChildren { + it.location?.region?.startLine == 59 && it.name.localName == "a" + } + .first() + val pbLine59 = + tu.allChildren { + it.location?.region?.startLine == 59 && it.name.localName == "b" + } + .first() + + // Literals + val literal1 = tu.allChildren> { it.location?.region?.startLine == 51 }.first() + val literal2 = tu.allChildren> { it.location?.region?.startLine == 52 }.first() + val literal3 = tu.allChildren> { it.location?.region?.startLine == 56 }.first() + val literal4 = tu.allChildren> { it.location?.region?.startLine == 57 }.first() + + // Line 51 + assertEquals(1, saLine51.memoryAddress.size) + assertEquals( + ((saLine51.base as? Reference)?.memoryAddress?.firstOrNull() as? MemoryAddress) + ?.fieldAddresses + ?.filter { it.key == saLine51.refersTo?.name?.localName } + ?.entries + ?.firstOrNull() + ?.value + ?.firstOrNull(), + saLine51.memoryAddress.firstOrNull() + ) + assertEquals(1, saLine51.prevDFG.size) + assertEquals(literal1, saLine51.prevDFG.firstOrNull()) + + // Line 52 + assertEquals(1, sbLine52.memoryAddress.size) + assertEquals( + ((sbLine52.base as? Reference)?.memoryAddress?.firstOrNull() as? MemoryAddress) + ?.fieldAddresses + ?.filter { it.key == sbLine52.refersTo?.name?.localName } + ?.entries + ?.firstOrNull() + ?.value + ?.firstOrNull(), + sbLine52.memoryAddress.firstOrNull() + ) + assertEquals(1, sbLine52.prevDFG.size) + assertEquals(literal2, sbLine52.prevDFG.firstOrNull()) + + // Line 53 + assertEquals(1, saLine53.memoryAddress.size) + assertEquals( + ((saLine53.base as? Reference)?.memoryAddress?.firstOrNull() as? MemoryAddress) + ?.fieldAddresses + ?.filter { it.key == saLine53.refersTo?.name?.localName } + ?.entries + ?.firstOrNull() + ?.value + ?.firstOrNull(), + saLine53.memoryAddress.firstOrNull() + ) + assertEquals(1, saLine53.prevDFG.size) + assertEquals(literal1, saLine53.prevDFG.firstOrNull()) + + assertEquals(1, sbLine53.memoryAddress.size) + assertEquals( + ((sbLine53.base as? Reference)?.memoryAddress?.firstOrNull() as? MemoryAddress) + ?.fieldAddresses + ?.filter { it.key == sbLine53.refersTo?.name?.localName } + ?.entries + ?.firstOrNull() + ?.value + ?.firstOrNull(), + sbLine53.memoryAddress.firstOrNull() + ) + assertEquals(1, sbLine53.prevDFG.size) + assertEquals(literal2, sbLine53.prevDFG.firstOrNull()) + + // Line 55 + assertEquals(1, paLine55.memoryAddress.size) + assertEquals(saLine51.memoryAddress.first(), paLine55.memoryAddress.first()) + assertEquals(1, paLine55.prevDFG.size) + assertEquals(literal1, paLine55.prevDFG.first()) + + assertEquals(1, pbLine55.memoryAddress.size) + assertEquals(sbLine52.memoryAddress.first(), pbLine55.memoryAddress.first()) + assertEquals(1, pbLine55.prevDFG.size) + assertEquals(literal2, pbLine55.prevDFG.first()) + + // Line 56 + assertEquals(1, paLine56.memoryAddress.size) + assertEquals(saLine51.memoryAddress.first(), paLine56.memoryAddress.first()) + assertEquals(1, paLine56.prevDFG.size) + assertEquals(literal3, paLine56.prevDFG.first()) + + // Line 57 + assertEquals(1, pbLine57.memoryAddress.size) + assertEquals(sbLine52.memoryAddress.first(), pbLine57.memoryAddress.first()) + assertEquals(1, pbLine57.prevDFG.size) + assertEquals(literal4, pbLine57.prevDFG.first()) + + // Line 59 + assertEquals(1, paLine59.memoryAddress.size) + assertEquals(saLine51.memoryAddress.first(), paLine59.memoryAddress.first()) + assertEquals(1, paLine59.prevDFG.size) + assertEquals(literal3, paLine59.prevDFG.first()) + + assertEquals(1, pbLine59.memoryAddress.size) + assertEquals(sbLine52.memoryAddress.first(), pbLine59.memoryAddress.first()) + assertEquals(1, pbLine59.prevDFG.size) + assertEquals(literal4, pbLine59.prevDFG.first()) + } + + @Test + fun testArrays() { + val file = File("src/test/resources/pointsto.cpp") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.registerPass() + it.registerFunctionSummaries(File("src/test/resources/hardcodedDFGedges.yml")) + } + assertNotNull(tu) + + // References + val n0Line66 = + tu.allChildren { it.location?.region?.startLine == 66 }.first() + val n0Line67 = + tu.allChildren { it.location?.region?.startLine == 67 }.first() + val n0Line68 = + tu.allChildren { it.location?.region?.startLine == 68 }.first() + val niLine71 = + tu.allChildren { it.location?.region?.startLine == 71 }.first() + val njLine75 = + tu.allChildren { it.location?.region?.startLine == 75 }.first() + + // Literals + val literal1 = + tu.allChildren> { it.location?.region?.startLine == 67 && it.value == 1 } + .first() + + // Expressions + val exprLine71 = + tu.allChildren { it.location?.region?.startLine == 71 }.first() + + // Line 66 + assertEquals(1, n0Line66.memoryAddress.size) + assertTrue(n0Line66.arrayExpression.memoryAddress.first() is MemoryAddress) + assertEquals( + (n0Line66.arrayExpression.memoryAddress + .filterIsInstance() + .firstOrNull() + ?.fieldAddresses + ?.get("0") + ?.first() as MemoryAddress), + n0Line66.memoryAddress.first() + ) + assertEquals(1, n0Line66.prevDFG.size) + assertTrue(n0Line66.prevDFG.first() is UnknownMemoryValue) + + // Line 67 + assertEquals(1, n0Line67.memoryAddress.size) + assertEquals( + (n0Line67.arrayExpression.memoryAddress + .filterIsInstance() + .firstOrNull() + ?.fieldAddresses + ?.get("0") + ?.first() as MemoryAddress), + n0Line67.memoryAddress.first() + ) + assertEquals(1, n0Line67.prevDFG.size) + assertEquals(literal1, n0Line67.prevDFG.firstOrNull()) + + // Line 68 + assertEquals(1, n0Line68.memoryAddress.size) + assertEquals( + (n0Line68.arrayExpression.memoryAddress + .filterIsInstance() + .firstOrNull() + ?.fieldAddresses + ?.get("0") + ?.first() as MemoryAddress), + n0Line68.memoryAddress.first() + ) + assertEquals(1, n0Line68.prevDFG.size) + assertEquals(literal1, n0Line68.prevDFG.firstOrNull()) + + // Line 71 + assertEquals(1, niLine71.memoryAddress.size) + assertEquals( + (niLine71.arrayExpression.memoryAddress + .filterIsInstance() + .firstOrNull() + ?.fieldAddresses + ?.get("i") + ?.first() as MemoryAddress), + niLine71.memoryAddress.first() + ) + assertEquals(1, niLine71.prevDFG.size) + assertEquals(exprLine71, niLine71.prevDFG.firstOrNull()) + + // Line 75 + assertEquals(1, njLine75.memoryAddress.size) + assertEquals( + (njLine75.arrayExpression.memoryAddress + .filterIsInstance() + .firstOrNull() + ?.fieldAddresses + ?.get("j") + ?.first() as MemoryAddress), + njLine75.memoryAddress.first() + ) + // TODO: What are our expections for njLine75.prevDFG? I think null is fine, since we + // never defined that + assertEquals(1, njLine75.prevDFG.size) + assertTrue(njLine75.prevDFG.first() is UnknownMemoryValue) + } + + @Test + fun testMemcpy() { + val file = File("src/test/resources/pointsto.cpp") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.registerPass() + it.registerFunctionSummaries(File("src/test/resources/hardcodedDFGedges.yml")) + } + assertNotNull(tu) + + // Declarations + val aDecl = + tu.allChildren { it.location?.region?.startLine == 89 }.firstOrNull() + assertNotNull(aDecl) + val bDecl = + tu.allChildren { it.location?.region?.startLine == 90 }.firstOrNull() + assertNotNull(bDecl) + val cDecl = + tu.allChildren { it.location?.region?.startLine == 91 }.firstOrNull() + assertNotNull(cDecl) + val caddrDecl = + tu.allChildren { it.location?.region?.startLine == 92 }.firstOrNull() + assertNotNull(caddrDecl) + val dDecl = + tu.allChildren { it.location?.region?.startLine == 93 }.firstOrNull() + assertNotNull(dDecl) + val eDecl = + tu.allChildren { it.location?.region?.startLine == 94 }.firstOrNull() + assertNotNull(eDecl) + val fDecl = + tu.allChildren { it.location?.region?.startLine == 95 }.firstOrNull() + assertNotNull(fDecl) + val gDecl = + tu.allChildren { it.location?.region?.startLine == 96 }.firstOrNull() + assertNotNull(gDecl) + val hDecl = + tu.allChildren { it.location?.region?.startLine == 97 }.firstOrNull() + assertNotNull(hDecl) + + val paDecl = + tu.allChildren { it.location?.region?.startLine == 99 }.firstOrNull() + assertNotNull(paDecl) + val pbDecl = + tu.allChildren { it.location?.region?.startLine == 100 }.firstOrNull() + assertNotNull(pbDecl) + val pcDecl = + tu.allChildren { it.location?.region?.startLine == 101 }.firstOrNull() + assertNotNull(pcDecl) + val pdDecl = + tu.allChildren { it.location?.region?.startLine == 102 }.firstOrNull() + assertNotNull(pdDecl) + val peDecl = + tu.allChildren { it.location?.region?.startLine == 103 }.firstOrNull() + assertNotNull(peDecl) + val pfDecl = + tu.allChildren { it.location?.region?.startLine == 104 }.firstOrNull() + assertNotNull(pfDecl) + val pgDecl = + tu.allChildren { it.location?.region?.startLine == 105 }.firstOrNull() + assertNotNull(pgDecl) + val phDecl = + tu.allChildren { it.location?.region?.startLine == 106 }.firstOrNull() + assertNotNull(phDecl) + + // References + val aRef = + tu.allChildren { + it.location?.region?.startLine == 128 && + it.location?.region?.startColumn == 86 && + it.name.localName == "a" + } + .firstOrNull() + assertNotNull(aRef) + val bRef = + tu.allChildren { + it.location?.region?.startLine == 128 && + it.location?.region?.startColumn == 93 && + it.name.localName == "b" + } + .firstOrNull() + assertNotNull(bRef) + val cRef = + tu.allChildren { + it.location?.region?.startLine == 128 && + it.location?.region?.startColumn == 100 && + it.name.localName == "c" + } + .firstOrNull() + assertNotNull(cRef) + val dRef = + tu.allChildren { + it.location?.region?.startLine == 128 && + it.location?.region?.startColumn == 107 && + it.name.localName == "d" + } + .firstOrNull() + assertNotNull(dRef) + val eRef = + tu.allChildren { + it.location?.region?.startLine == 128 && + it.location?.region?.startColumn == 114 && + it.name.localName == "e" + } + .firstOrNull() + assertNotNull(eRef) + val fRef = + tu.allChildren { + it.location?.region?.startLine == 128 && + it.location?.region?.startColumn == 121 && + it.name.localName == "f" + } + .firstOrNull() + assertNotNull(fRef) + val paRef = + tu.allChildren { + it.location?.region?.startLine == 129 && + it.location?.region?.startColumn == 91 && + it.name.localName == "pa" + } + .firstOrNull() + assertNotNull(paRef) + val pbRef = + tu.allChildren { + it.location?.region?.startLine == 129 && + it.location?.region?.startColumn == 100 && + it.name.localName == "pb" + } + .firstOrNull() + assertNotNull(pbRef) + val pcRef = + tu.allChildren { + it.location?.region?.startLine == 129 && + it.location?.region?.startColumn == 109 && + it.name.localName == "pc" + } + .firstOrNull() + assertNotNull(pcRef) + val pdRef = + tu.allChildren { + it.location?.region?.startLine == 129 && + it.location?.region?.startColumn == 118 && + it.name.localName == "pd" + } + .firstOrNull() + assertNotNull(pdRef) + val peRef = + tu.allChildren { + it.location?.region?.startLine == 129 && + it.location?.region?.startColumn == 127 && + it.name.localName == "pe" + } + .firstOrNull() + assertNotNull(peRef) + val pfRef = + tu.allChildren { + it.location?.region?.startLine == 129 && + it.location?.region?.startColumn == 136 && + it.name.localName == "pf" + } + .firstOrNull() + assertNotNull(pfRef) + + val aPointerRef = + tu.allChildren { + it.location?.region?.startLine == 128 && it.name.localName == "a" + } + .firstOrNull() + assertNotNull(aPointerRef) + val bPointerRef = + tu.allChildren { + it.location?.region?.startLine == 128 && it.name.localName == "b" + } + .firstOrNull() + assertNotNull(bPointerRef) + val cPointerRef = + tu.allChildren { + it.location?.region?.startLine == 128 && it.name.localName == "c" + } + .firstOrNull() + assertNotNull(cPointerRef) + val dPointerRef = + tu.allChildren { + it.location?.region?.startLine == 128 && it.name.localName == "d" + } + .firstOrNull() + assertNotNull(dPointerRef) + val ePointerRef = + tu.allChildren { + it.location?.region?.startLine == 128 && it.name.localName == "e" + } + .firstOrNull() + assertNotNull(ePointerRef) + val fPointerRef = + tu.allChildren { + it.location?.region?.startLine == 128 && it.name.localName == "f" + } + .firstOrNull() + assertNotNull(fPointerRef) + + // PointerDerefs + val paPointerDeref = + tu.allChildren { + it.location?.region?.startLine == 129 && it.name.localName == "pa" + } + .firstOrNull() + assertNotNull(paPointerDeref) + val pbPointerDeref = + tu.allChildren { + it.location?.region?.startLine == 129 && it.name.localName == "pb" + } + .firstOrNull() + assertNotNull(pbPointerDeref) + val pcPointerDeref = + tu.allChildren { + it.location?.region?.startLine == 129 && it.name.localName == "pc" + } + .firstOrNull() + assertNotNull(pcPointerDeref) + val pdPointerDeref = + tu.allChildren { + it.location?.region?.startLine == 129 && it.name.localName == "pd" + } + .firstOrNull() + assertNotNull(pdPointerDeref) + val pePointerDeref = + tu.allChildren { + it.location?.region?.startLine == 129 && it.name.localName == "pe" + } + .firstOrNull() + assertNotNull(pePointerDeref) + val pfPointerDeref = + tu.allChildren { + it.location?.region?.startLine == 129 && it.name.localName == "pf" + } + .firstOrNull() + assertNotNull(pfPointerDeref) + + // Result of memcpy in Line 112 + assertEquals(1, bRef.memoryAddress.size) + assertEquals(bDecl.memoryAddress, bRef.memoryAddress.first()) + assertEquals(1, bRef.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), bRef.prevDFG.first()) + + assertEquals(1, pbPointerDeref.memoryAddress.size) + assertEquals(bDecl.memoryAddress, pbPointerDeref.memoryAddress.first()) + assertEquals(1, pbPointerDeref.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), pbPointerDeref.prevDFG.first()) + + // Result of memcpy in Line 115 + assertEquals(1, cRef.memoryAddress.size) + assertEquals(cDecl.memoryAddress, cRef.memoryAddress.first()) + assertEquals(1, cRef.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), cRef.prevDFG.first()) + + assertEquals(1, pcPointerDeref.memoryAddress.size) + assertEquals(cDecl.memoryAddress, pcPointerDeref.memoryAddress.first()) + assertEquals(1, pcPointerDeref.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), pcPointerDeref.prevDFG.first()) + + // Result of memcpy in Line 118 + assertEquals(1, dRef.memoryAddress.size) + assertEquals(dDecl.memoryAddress, dRef.memoryAddress.first()) + assertEquals(1, dRef.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), dRef.prevDFG.first()) + + assertEquals(1, pdPointerDeref.memoryAddress.size) + assertEquals(dDecl.memoryAddress, pdPointerDeref.memoryAddress.first()) + assertEquals(1, pdPointerDeref.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), pdPointerDeref.prevDFG.first()) + + // Result of memcpy in Line 121 + assertEquals(1, eRef.memoryAddress.size) + assertEquals(eDecl.memoryAddress, eRef.memoryAddress.first()) + assertEquals(1, eRef.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), eRef.prevDFG.first()) + + assertEquals(1, pePointerDeref.memoryAddress.size) + assertEquals(eDecl.memoryAddress, pePointerDeref.memoryAddress.first()) + assertEquals(1, pePointerDeref.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), pePointerDeref.prevDFG.first()) + + // Result of memcpy in Line 125 + assertEquals(1, fRef.memoryAddress.size) + assertEquals(fDecl.memoryAddress, fRef.memoryAddress.first()) + assertEquals(1, fRef.prevDFG.size) + assertEquals(fDecl.prevDFG.first(), fRef.prevDFG.first()) + + assertEquals(1, pfPointerDeref.memoryAddress.size) + assertEquals(aDecl.memoryAddress, pfPointerDeref.memoryAddress.first()) + assertEquals(1, pfPointerDeref.prevDFG.size) + assertEquals(aDecl.prevDFG.first(), pfPointerDeref.prevDFG.first()) + } + + @Test + fun testPointerToPointer() { + val file = File("src/test/resources/pointsto.cpp") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.registerPass() + it.registerFunctionSummaries(File("src/test/resources/hardcodedDFGedges.yml")) + } + assertNotNull(tu) + + // Declarations + val aDecl = + tu.allChildren { it.location?.region?.startLine == 134 }.firstOrNull() + assertNotNull(aDecl) + val bDecl = + tu.allChildren { it.location?.region?.startLine == 135 }.firstOrNull() + assertNotNull(bDecl) + val cDecl = + tu.allChildren { it.location?.region?.startLine == 136 }.firstOrNull() + assertNotNull(cDecl) + + // References + val aRefLine138 = + tu.allChildren { + it.location?.region?.startLine == 138 && it.name.localName == "a" + } + .firstOrNull() + assertNotNull(aRefLine138) + val bRefLine138 = + tu.allChildren { + it.location?.region?.startLine == 138 && + it.name.localName == "b" && + it.location?.region?.startColumn == 65 + } + .firstOrNull() + assertNotNull(bRefLine138) + val bRefLine139 = + tu.allChildren { + it.location?.region?.startLine == 139 && it.name.localName == "b" + } + .firstOrNull() + assertNotNull(bRefLine139) + val cRefLine139 = + tu.allChildren { + it.location?.region?.startLine == 139 && + it.name.localName == "c" && + it.location?.region?.startColumn == 68 + } + .firstOrNull() + assertNotNull(cRefLine139) + + // PointerDereferences + val bPointerDerefLine138 = + tu.allChildren { + it.location?.region?.startLine == 138 && it.name.localName == "b" + } + .firstOrNull() + assertNotNull(bPointerDerefLine138) + val cPointerDerefLine139 = + tu.allChildren { + it.location?.region?.startLine == 139 && it.name.localName == "c" + } + .firstOrNull() + assertNotNull(cPointerDerefLine139) + val cPointerDerefLine140 = + tu.allChildren { + it.location?.region?.startLine == 140 && + it.name.localName == "c" && + it.input is PointerDereference + } + .firstOrNull() + + // Literals + val literal10 = + tu.allChildren> { it.location?.region?.startLine == 134 }.firstOrNull() + assertNotNull(literal10) + + assertNotNull(cPointerDerefLine140) + + // Line 138 + assertEquals(1, aRefLine138.memoryAddress.size) + assertEquals(aDecl.memoryAddress, aRefLine138.memoryAddress.first()) + assertEquals(1, aRefLine138.prevDFG.size) + assertEquals(literal10, aRefLine138.prevDFG.first()) + + assertEquals(1, bRefLine138.memoryAddress.size) + assertEquals(bDecl.memoryAddress, bRefLine138.memoryAddress.first()) + assertEquals(1, bRefLine138.prevDFG.size) + assertEquals(aDecl.memoryAddress, bRefLine138.prevDFG.first()) + + assertEquals(1, bPointerDerefLine138.memoryAddress.size) + assertEquals(aDecl.memoryAddress, bPointerDerefLine138.memoryAddress.first()) + assertEquals(1, bPointerDerefLine138.prevDFG.size) + assertEquals(literal10, bPointerDerefLine138.prevDFG.first()) + + // Line 139 + assertEquals(1, bRefLine139.memoryAddress.size) + assertEquals(bDecl.memoryAddress, bRefLine139.memoryAddress.first()) + assertEquals(1, bRefLine139.prevDFG.size) + assertEquals(aDecl.memoryAddress, bRefLine139.prevDFG.first()) + + assertEquals(1, cRefLine139.memoryAddress.size) + assertEquals(cDecl.memoryAddress, cRefLine139.memoryAddress.first()) + assertEquals(1, cRefLine139.prevDFG.size) + assertEquals(bDecl.memoryAddress, cRefLine139.prevDFG.first()) + + assertEquals(1, cPointerDerefLine139.memoryAddress.size) + assertEquals(bDecl.memoryAddress, cPointerDerefLine139.memoryAddress.first()) + assertEquals(1, cPointerDerefLine139.prevDFG.size) + assertEquals(aDecl.memoryAddress, cPointerDerefLine139.prevDFG.first()) + + // Line 140 + assertEquals(1, cPointerDerefLine140.memoryAddress.size) + assertEquals(aDecl.memoryAddress, cPointerDerefLine140.memoryAddress.first()) + assertEquals(1, cPointerDerefLine140.prevDFG.size) + assertEquals(literal10, cPointerDerefLine140.prevDFG.first()) + } + + @Test + fun testGhidraCode() { + val file = File("src/test/resources/pointsto.cpp") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.registerPass() + it.registerFunctionSummaries(File("src/test/resources/hardcodedDFGedges.yml")) + } + assertNotNull(tu) + + // ParameterDeclaration + val param_1Line145 = + tu.allChildren { it.location?.region?.startLine == 145 }.first() + assertNotNull(param_1Line145) + + val param_1Line193 = + tu.allChildren { it.location?.region?.startLine == 193 }.first() + assertNotNull(param_1Line193) + + // References + val local_20Line159 = + tu.allChildren { + it.location?.region?.startLine == 159 && it.name.localName == "local_20" + } + .first() + assertNotNull(local_20Line159) + val param_1Line159 = + tu.allChildren { + it.location?.region?.startLine == 159 && it.name.localName == "param_1" + } + .first() + assertNotNull(param_1Line159) + + val local_30Line160 = + tu.allChildren { + it.location?.region?.startLine == 160 && it.name.localName == "local_30" + } + .first() + assertNotNull(local_30Line160) + val param_1Line160 = + tu.allChildren { + it.location?.region?.startLine == 160 && it.name.localName == "param_1" + } + .first() + assertNotNull(param_1Line160) + + val local_30Line165 = + tu.allChildren { + it.location?.region?.startLine == 165 && it.name.localName == "local_30" + } + .first() + assertNotNull(local_30Line165) + + val local_18Line165 = + tu.allChildren { + it.location?.region?.startLine == 165 && it.name.localName == "local_18" + } + .first() + assertNotNull(local_18Line165) + + val local_10Line166 = + tu.allChildren { + it.location?.region?.startLine == 166 && it.name.localName == "local_10" + } + .first() + assertNotNull(local_10Line166) + + val local_28Line167 = + tu.allChildren { + it.location?.region?.startLine == 167 && it.name.localName == "local_28" + } + .first() + assertNotNull(local_28Line167) + + val local_28Line172 = + tu.allChildren { + it.location?.region?.startLine == 172 && it.name.localName == "local_28" + } + .first() + assertNotNull(local_28Line172) + + val local_10Line172 = + tu.allChildren { + it.location?.region?.startLine == 172 && it.name.localName == "local_10" + } + .first() + assertNotNull(local_10Line172) + + val local_28Line177 = + tu.allChildren { + it.location?.region?.startLine == 177 && it.name.localName == "local_28" + } + .first() + assertNotNull(local_28Line177) + + val local_10Line177 = + tu.allChildren { + it.location?.region?.startLine == 177 && it.name.localName == "local_10" + } + .first() + assertNotNull(local_10Line177) + + val local_28Line179 = + tu.allChildren { + it.location?.region?.startLine == 179 && + it.name.localName == "local_28" && + it.location?.region?.startColumn == 19 + } + .first() + assertNotNull(local_28Line179) + + val local_28Line180 = + tu.allChildren { + it.location?.region?.startLine == 180 && it.name.localName == "local_28" + } + .first() + assertNotNull(local_28Line180) + + val local_28DerefLine181 = + tu.allChildren { + it.location?.region?.startLine == 181 && it.name.localName == "local_28" + } + .first() + assertNotNull(local_28DerefLine181) + + val local_28Line182 = + tu.allChildren { + it.location?.region?.startLine == 182 && it.name.localName == "local_28" + } + .first() + assertNotNull(local_28Line182) + + // PointerDereferences + val local_28DerefLine179 = + tu.allChildren { + it.location?.region?.startLine == 179 && it.name.localName == "local_28" + } + .first() + assertNotNull(local_28DerefLine179) + + val local_18DerefLine190 = + tu.allChildren { + it.location?.region?.startLine == 190 && it.name.localName == "local_18" + } + .first() + assertNotNull(local_18DerefLine190) + + val param_1DerefLine190 = + tu.allChildren { + it.location?.region?.startLine == 190 && it.name.localName == "param_1" + } + .first() + assertNotNull(param_1DerefLine190) + + // Literals + val literal10Line166 = + tu.allChildren> { it.location?.region?.startLine == 166 }.first() + assertNotNull(literal10Line166) + + val literal0Line167 = + tu.allChildren> { it.location?.region?.startLine == 167 }.first() + assertNotNull(literal0Line167) + + val literal0Line177 = + tu.allChildren> { it.location?.region?.startLine == 177 }.first() + assertNotNull(literal0Line177) + + // MemberExpressions + val meLine201 = + tu.allChildren { it.location?.region?.startLine == 201 }.first() + assertNotNull(meLine201) + + // CallExpressions + val ceLine172 = + tu.allChildren { it.location?.region?.startLine == 172 }.first() + assertNotNull(ceLine172) + + val ceLine201 = + tu.allChildren { it.location?.region?.startLine == 201 }.first() + assertNotNull(ceLine201) + + // Line 159 + assertEquals(1, local_20Line159.prevDFG.size) + assertEquals(1, param_1Line159.prevDFG.size) + assertEquals(param_1Line159.prevDFG.first(), local_20Line159.prevDFG.first()) + + // Effect from Line 160 + assertEquals(1, local_30Line165.prevDFG.size) + assertTrue(local_30Line165.prevDFG.first() is ParameterMemoryValue) + assertEquals("param_1.derefvalue", local_30Line165.prevDFG.firstOrNull()?.name.toString()) + + // Line 165 + assertEquals(1, local_18Line165.prevDFG.size) + assertTrue(local_18Line165.prevDFG.first() is ParameterMemoryValue) + assertEquals("param_1.derefvalue", local_18Line165.prevDFG.firstOrNull()?.name.toString()) + + // Line 167 + assertEquals(1, local_28Line167.prevDFG.size) + assertEquals(literal0Line167, local_28Line167.prevDFG.firstOrNull()) + + // Line 172 + assertEquals(1, local_28Line172.prevDFG.size) + assertEquals(ceLine172, local_28Line172.prevDFG.firstOrNull()) + + // Line 179 + assertEquals(2, local_28Line179.prevDFG.size) + assertTrue(local_28Line179.prevDFG.contains(literal0Line167)) + assertTrue(local_28Line179.prevDFG.contains(ceLine172)) + + assertEquals(2, local_28DerefLine179.prevDFG.size) + assertTrue(local_28DerefLine179.prevDFG.contains(literal0Line177)) + assertEquals( + 1, + local_28DerefLine179.prevDFG + .filterIsInstance() + .filter { it.name.localName == "0" } + .size + ) + assertTrue(local_28DerefLine179.prevDFG.contains(literal0Line177)) + assertEquals(2, local_28DerefLine179.memoryAddress.size) + assertTrue(local_28DerefLine179.memoryAddress.contains(literal0Line167)) + assertTrue(local_28DerefLine179.memoryAddress.contains(ceLine172)) + + // Line 180 + assertEquals(2, local_28Line180.prevDFG.size) + assertTrue(local_28Line180.prevDFG.contains(literal0Line167)) + assertTrue(local_28Line180.prevDFG.contains(ceLine172)) + + // Line 181 + assertEquals(2, local_28DerefLine181.prevDFG.size) + assertTrue(local_28DerefLine181.prevDFG.contains(ceLine201)) + assertTrue( + local_28DerefLine181.prevDFG.any { + it is UnknownMemoryValue && it.name.localName == "DAT_0011b1c8" + } + ) + + // Line 190 + // TODO: verify the memcpy in Line 183 + assertEquals(1, local_18DerefLine190.memoryAddress.size) + assertEquals(param_1Line145.memoryValue, local_18DerefLine190.prevDFG.firstOrNull()) + assertEquals(2, local_18DerefLine190.prevDFG.size) + + // Verify the functionSummary for sgx_ecall_key_to_out + } + + @Test + fun testFunctionSummaries() { + val file = File("src/test/resources/pointsto.cpp") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.registerPass() + it.registerFunctionSummaries(File("src/test/resources/hardcodedDFGedges.yml")) + } + assertNotNull(tu) + + // Declarations + val iDecl = + tu.allChildren { + it.location?.region?.startLine == 224 && it.name.localName == "i" + } + .firstOrNull() + assertNotNull(iDecl) + + val jDecl = + tu.allChildren { + it.location?.region?.startLine == 225 && it.name.localName == "j" + } + .firstOrNull() + assertNotNull(jDecl) + + // References + val iRefLine208 = + tu.allChildren { + it.location?.region?.startLine == 208 && it.name.localName == "i" + } + .firstOrNull() + assertNotNull(iRefLine208) + + val iRefLine230Left = + tu.allChildren { + it.location?.region?.startLine == 230 && + it.name.localName == "i" && + it.location?.region?.startColumn == 3 + } + .firstOrNull() + assertNotNull(iRefLine230Left) + + val iRefLine230Right = + tu.allChildren { + it.location?.region?.startLine == 230 && + it.name.localName == "i" && + it.location?.region?.startColumn == 9 + } + .firstOrNull() + assertNotNull(iRefLine230Right) + + val iRefLine231 = + tu.allChildren { + it.location?.region?.startLine == 231 && it.name.localName == "i" + } + .firstOrNull() + assertNotNull(iRefLine231) + + val iRefLine234 = + tu.allChildren { + it.location?.region?.startLine == 234 && it.name.localName == "i" + } + .firstOrNull() + assertNotNull(iRefLine234) + + val iRefLine237 = + tu.allChildren { + it.location?.region?.startLine == 237 && it.name.localName == "i" + } + .firstOrNull() + assertNotNull(iRefLine237) + + val iRefLine240 = + tu.allChildren { + it.location?.region?.startLine == 240 && it.name.localName == "i" + } + .firstOrNull() + assertNotNull(iRefLine240) + + val iRefLine242Left = + tu.allChildren { + it.location?.region?.startLine == 242 && + it.name.localName == "i" && + it.location?.region?.startColumn == 3 + } + .firstOrNull() + assertNotNull(iRefLine242Left) + + val iRefLine242Right = + tu.allChildren { + it.location?.region?.startLine == 242 && + it.name.localName == "i" && + it.location?.region?.startColumn == 19 + } + .firstOrNull() + assertNotNull(iRefLine242Right) + + val pRefLine242 = + tu.allChildren { + it.location?.region?.startLine == 242 && it.name.localName == "p" + } + .firstOrNull() + assertNotNull(pRefLine242) + + // Dereferences + val pDerefLine231 = + tu.allChildren { + it.location?.region?.startLine == 231 && it.name.localName == "p" + } + .firstOrNull() + assertNotNull(pDerefLine231) + + val pDerefLine234 = + tu.allChildren { + it.location?.region?.startLine == 234 && it.name.localName == "p" + } + .firstOrNull() + assertNotNull(pDerefLine234) + + val pDerefLine237 = + tu.allChildren { + it.location?.region?.startLine == 237 && it.name.localName == "p" + } + .firstOrNull() + assertNotNull(pDerefLine237) + + val pDerefLine240 = + tu.allChildren { + it.location?.region?.startLine == 240 && it.name.localName == "p" + } + .firstOrNull() + assertNotNull(pDerefLine240) + + // BinaryOperators + val binOpLine207 = + tu.allChildren { it.location?.region?.startLine == 207 }.firstOrNull() + assertNotNull(binOpLine207) + + val binOpLine212 = + tu.allChildren { it.location?.region?.startLine == 212 }.firstOrNull() + assertNotNull(binOpLine212) + + // CallExpressions + val ceLine230 = + tu.allChildren { it.location?.region?.startLine == 230 }.firstOrNull() + assertNotNull(ceLine230) + + val ceLine242 = + tu.allChildren { it.location?.region?.startLine == 242 }.firstOrNull() + assertNotNull(ceLine242) + + // Line 230 + assertEquals(1, ceLine230.prevDFG.size) + assertEquals(binOpLine207, ceLine230.prevDFG.firstOrNull()) + assertEquals(1, iRefLine230Left.prevDFG.size) + assertEquals(ceLine230, iRefLine230Left.prevDFG.firstOrNull()) + assertEquals(1, iRefLine230Right.nextDFG.size) + assertTrue(iRefLine230Right.nextDFG.firstOrNull() is ParameterMemoryValue) + assertLocalName("value", iRefLine230Right.nextDFG.firstOrNull()) + assertEquals("i", iRefLine230Right.nextDFG.firstOrNull()?.name?.parent?.localName) + + // Line 231 + assertEquals(1, iRefLine231.prevDFG.size) + assertEquals(ceLine230, iRefLine231.prevDFG.first()) + assertEquals(1, pDerefLine231.prevDFG.size) + assertEquals(ceLine230, pDerefLine231.prevDFG.first()) + assertEquals(1, pDerefLine231.memoryAddress.size) + assertEquals(iDecl.memoryAddress, pDerefLine231.memoryAddress.first()) + + // Line 234 + assertEquals(1, pDerefLine234.memoryAddress.size) + assertEquals(iDecl.memoryAddress, pDerefLine234.memoryAddress.firstOrNull()) + assertEquals(1, iRefLine234.prevDFG.size) + assertEquals(binOpLine212, iRefLine234.prevDFG.firstOrNull()) + assertEquals(1, pDerefLine234.prevDFG.size) + assertEquals(binOpLine212, pDerefLine234.prevDFG.firstOrNull()) + assertEquals(1, pDerefLine234.memoryAddress.size) + assertEquals(iDecl.memoryAddress, pDerefLine234.memoryAddress.first()) + + // Line 237 + assertEquals(1, pDerefLine237.memoryAddress.size) + assertEquals(iDecl.memoryAddress, pDerefLine237.memoryAddress.firstOrNull()) + assertEquals(1, pDerefLine237.prevDFG.size) + assertEquals(jDecl.prevDFG.firstOrNull(), pDerefLine237.prevDFG.firstOrNull()) + + // Line 240 + assertEquals(1, pDerefLine240.memoryAddress.size) + assertEquals(iDecl.memoryAddress, pDerefLine240.memoryAddress.firstOrNull()) + assertEquals(1, pDerefLine240.prevDFG.size) + assertEquals(binOpLine212, pDerefLine240.prevDFG.firstOrNull()) + assertEquals(1, iRefLine240.prevDFG.size) + assertEquals(binOpLine212, iRefLine240.prevDFG.firstOrNull()) + + // Line 242 + assertEquals(1, iRefLine242Left.prevDFG.size) + assertEquals(ceLine242, iRefLine242Left.prevDFG.firstOrNull()) + assertEquals(2, ceLine242.prevDFG.size) + assertTrue(ceLine242.prevDFG.contains(iRefLine242Right)) + assertTrue(ceLine242.prevDFG.contains(pRefLine242)) + } +} diff --git a/cpg-language-cxx/src/test/resources/comments.c b/cpg-language-cxx/src/test/resources/comments.c new file mode 100644 index 0000000000..b7301bbca9 --- /dev/null +++ b/cpg-language-cxx/src/test/resources/comments.c @@ -0,0 +1,20 @@ +/** + * Documentation comment + */ +void functionDoc() { + return; +} + +/* + * Multi-line + * comment + */ +void functionMultiline() { + return; +} + +// Simple comment + +void functionSimple() { + return; +} \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/hardcodedDFGedges.yml b/cpg-language-cxx/src/test/resources/hardcodedDFGedges.yml new file mode 100644 index 0000000000..39d4538a50 --- /dev/null +++ b/cpg-language-cxx/src/test/resources/hardcodedDFGedges.yml @@ -0,0 +1,110 @@ +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: strcpy + dataFlows: + - from: param1.address + to: param0.address + dfgType: full + - from: param0.address + to: return + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: memcpy + dataFlows: + - from: param1.address + to: param0.address + dfgType: full + - from: param0.address + to: return + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: memcpy_verw + dataFlows: + - from: param1.address + to: param0.address + dfgType: full + - from: param0.address + to: return + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: memcpy_s + dataFlows: + - from: param2.address + to: param0.address + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: memcpy_verw_s + dataFlows: + - from: param2.address + to: param0.address + dfgType: full + + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: sgx_sha256_update + dataFlows: + - from: param0.address + to: param2.address + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: sgx_sha256_init + dataFlows: + - from: param0.address + to: param0.address + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: sgx_ocall + dataFlows: + - from: param1.address + to: param1.address + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: fread + dataFlows: + - from: param1.address + to: param1.address + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: sgx_get_key + dataFlows: + - from: param1.address + to: param1.address + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: memset + dataFlows: + - from: param1.value + to: param0.address + dfgType: full + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: malloc + dataFlows: [] + +- functionDeclaration: + language: de.fraunhofer.aisec.cpg.frontends.cxx.CPPLanguage + methodName: ocall_malloc + dataFlows: + - from: param0.address + to: param0.address + dfgType: full \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/pointsto.cpp b/cpg-language-cxx/src/test/resources/pointsto.cpp new file mode 100644 index 0000000000..58cf3bb9bf --- /dev/null +++ b/cpg-language-cxx/src/test/resources/pointsto.cpp @@ -0,0 +1,243 @@ +#include + +int main() { + int i=0; + int j=1; + int* a=&i; + int* b=a; + printf("%d\n", i); + i=2; + printf("%d\n", i); + i = i + 1; + printf("%d\n", *a); + i++; + printf("%d\n", *a); + a=&j; + printf("%d\n", *a); + *a=3; + printf("%d\n", *b); +} + +int conditions() { + int i=0; + int j=1; + int* a; + if ( 1 == 1 ) { + a = &i; + printf("%d\n", *a); + } else { + a = &j; + printf("%d\n", *a); + } + printf("%d\n", *a); + + for (int x=0; x<10; x++) { + i++; + } + printf("%d\n", *a); +} + + +typedef struct test { + int a; + int b; +} S; + +int structs() { + S s; + S t; + S* p=&s; + + s.a=1; + s.b=2; + printf("%d %d\n", s.a, s.b); + + printf("%d %d\n", p->a, p->b); + p->a=3; + p->b=4; + + printf("%d %d\n", p->a, p->b); +} + +int arrays() { + int n[5]; + int i, j; + + printf("%d\n", n[0]); + n[0] = 1; + printf("%d\n", n[0]); + + for(i = 0; i < 5; i++){ + n[i] = i + 100; + } + + for(j = 0; j < 5; j++){ + printf("n[%d] = %d\n", j, n[j]); + } + return 0; + +} + + + + +/*long memcpy_s(long *dst, int dstlen, long *src, int srclen){ + memcpy(src,dst,dstlen); +}*/ + +int memcpy() { + long a=0; + long b=1; + long c=2; + long caddr=(long)&c; + long d=3; + long e=4; + long f=5; + long g=6; + long h=7; + + long* pa=&a; + long* pb=&b; + long* pc=&c; + long* pd=&d; + long* pe=&e; + long* pf=&f; + long* pg=&g; + long* ph=&h; + + printf("a : %d, %p; b : %d, %p, c : %d, %p, d : %d, %p, e : %d, %p, f : %d, %p\n", a, &a, b, &b, c, &c, d, &d, e, &e, f, &f); + printf("pa: %d, %p; pb: %d, %p, pc: %d, %p, pd: %d, %p, pe: %d, %p, pf: %d, %p\n", *pa, pa, *pb, pb, *pc, pc, *pd, pd, *pe, pe, *pf, pf); + + // Copy the data at pa.memoryValue (AKA &a) to the address at pb.memoryValue (AKA &b) + memcpy(pb,pa,sizeof(a)); + + // Copy the data at pa.memoryValue (AKA &a) to the address at caddr.memoryValue (AKA &c) + memcpy((void *)caddr,pa,sizeof(pa)); + + // Copy the data at pa.memoryValue (AKA &a) to the address at &d.memoryValue (AKA d.memoryAddress) + memcpy(&d,pa,sizeof(a)); + + // Copy the data at &a (AKA a.memoryAddress) to the address at pe.memoryValue (AKA e.memoryAddress) + memcpy(pe,&a,sizeof(a)); + + // Copy the data at &pa (AKA pa.memoryAddress, which contains &a) to the address at &pf (which so far contained f.memoryAddress) + // Simplified: Set the value of pf to &a + memcpy(&pf,&pa,sizeof(a)); + + + printf("a : %d, %p; b : %d, %p, c : %d, %p, d : %d, %p, e : %d, %p, f : %d, %p\n", a, &a, b, &b, c, &c, d, &d, e, &e, f, &f); + printf("pa: %d, %p; pb: %d, %p, pc: %d, %p, pd: %d, %p, pe: %d, %p, pf: %d, %p\n", *pa, pa, *pb, pb, *pc, pc, *pd, pd, *pe, pe, *pf, pf); +} + +int pointertopointer(){ + + int a = 10; + int *b = &a; + int **c = &b; + + printf("a: %d \nAddress of 'a': %d \nValue at a: %d\n\n", a, b, *b); + printf("b: %d \nPointer to 'b' is c: %d \nValue at b: %d\n", b, c, *c); + printf("Value of 'a' from 'c': %d", **c); + + return 0; +} + +undefined4 sgx_ecall_key_to_out(long param_1) + +{ + int iVar1; + undefined4 uVar2; + long local_30; + void *local_28; + long local_20; + long local_18; + size_t local_10; + + if ((param_1 == 0) || (iVar1 = sgx_is_outside_enclave(param_1,8), iVar1 == 0)) { + return 2; + } + local_20 = param_1; + iVar1 = memcpy_s(&local_30,8,param_1,8); + if (iVar1 != 0) { + return 1; + } + uVar2 = 0; + local_18 = local_30; + local_10 = 0x10; + local_28 = (void *)0x0; + if ((local_30 != 0) && (iVar1 = sgx_is_outside_enclave(local_30,0x10), iVar1 == 0)) { + return 2; + } + if ((local_18 != 0) && (local_10 != 0)) { + local_28 = dlmalloc(local_10); + if (local_28 == (void *)0x0) { + uVar2 = 3; + goto LAB_001011ce; + } + memset(local_28,0,local_10); + } + printf("%d\n", *local_28); + ecall_key_to_out(local_28); + printf("%d\n", *local_28); + if ((local_28 != (void *)0x0) && /*printf("%d", *local_28) &&*/ + (iVar1 = memcpy_verw_s(local_18,local_10,local_28,local_10), iVar1 != 0)) { + uVar2 = 1; + } +LAB_001011ce: + if (local_28 != (void *)0x0) { + free(local_28); + } + printf("%d %d\n", *local_18, *param_1); return uVar2; +} + +void ecall_key_to_out(/*undefined8*/ void *param_1) +{ + undefined8 uVar1; + + if ((char)key == '\0') { + derive_secret_key(); + } + uVar1 = DAT_0011b1c8; + *param_1 = CONCAT71(key._1_7_,(char)key); + param_1[1] = uVar1; + return; +} + +int inc(int i) { + i=i+1; + return i; +} + +void incp(int* p) { + *p=*p+1;//(*p)++; +} + +void changep(int* p1, int* p2) { + *p1=*p2; +} + +void changep2(int* p1, int* p2) { + *p1=p2; +} + +int testFunctionSummaries() { + int i=0; + int j=3; + int* p=&i; + + printf("i: %d j: %d *p: %d p: %p\n", i, j, *p, p); + + i=inc(i); + printf("i: %d j: %d *p: %d p: %p\n", i, j, *p, p); + + incp(p); + printf("i: %d j: %d *p: %d p: %p\n", i, j, *p, p); + + changep(p, &j); + printf("i: %d j: %d *p: %d p: %p\n", i, j, *p, p); + + incp(p); + printf("i: %d j: %d *p: %d p: %p\n", i, j, *p, p); + + i = unknownFunc(i, p); +} diff --git a/cpg-language-cxx/src/test/resources/unaryoperator.cpp b/cpg-language-cxx/src/test/resources/unaryoperator.cpp index 2b984a4791..ac8b425759 100644 --- a/cpg-language-cxx/src/test/resources/unaryoperator.cpp +++ b/cpg-language-cxx/src/test/resources/unaryoperator.cpp @@ -11,4 +11,8 @@ int main() { int* ptr = 0; b = *ptr; + + int* c; + *c = 7; + *ptr = *c; }