Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Propagate uncertainty information to Evaluators #886

Merged
merged 23 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
31acd51
introduce the Length Type to check the length of an argument
CodingDepot Jun 17, 2024
0369e26
Merge branch 'main' into rh/length-type
CodingDepot Jun 17, 2024
d81132e
Add Length related tests
CodingDepot Jun 24, 2024
bb82ebd
rewrite cpgGetNodes to return the associated Result with the Node
CodingDepot Jun 24, 2024
46b86ea
move result to own file and rewrite cpgSignature to use results
CodingDepot Jun 26, 2024
1cb9af5
have cpgGetNodes return a Map and adapt the Evaluators
CodingDepot Jul 1, 2024
aee5df3
changes around new cpgGetNodes
CodingDepot Jul 1, 2024
e2371a4
fixes to the signatureTest
CodingDepot Jul 1, 2024
945768e
remove custom and implementation between Boolean and Result because o…
CodingDepot Jul 1, 2024
3001542
add code documentation
CodingDepot Jul 1, 2024
f9f02e9
fix code style issues
CodingDepot Jul 3, 2024
54f2d74
Merge branch 'refs/heads/main' into rh/length-type
CodingDepot Jul 3, 2024
9c03a5b
only add the result when the CallExpression is not invalid
CodingDepot Jul 3, 2024
893f827
Merge branch 'refs/heads/main' into rh/length-type
CodingDepot Jul 3, 2024
5985bf9
disable tests that rely on missing CPG features
CodingDepot Jul 8, 2024
44ef976
remove the Length type
CodingDepot Jul 10, 2024
64b0f21
cleanup
CodingDepot Jul 10, 2024
bcf9437
add regression tests for the new Result type
CodingDepot Jul 22, 2024
5cc9ce4
also test result conversion
CodingDepot Jul 22, 2024
14aa02c
apply less spots
CodingDepot Jul 22, 2024
15818cb
try not inlining result operations
CodingDepot Jul 22, 2024
7768611
refactor Only and Never evaluator to be more testable
CodingDepot Jul 22, 2024
55c99b6
Merge branch 'main' into rh/open-result
fwendland Sep 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class CokoCpgBackend(config: BackendConfiguration) :
*/
override fun order(baseNodes: Op, block: Order.() -> Unit): OrderEvaluator =
OrderEvaluator(
baseNodes = baseNodes.cpgGetNodes(),
baseNodes = baseNodes.cpgGetNodes().keys,
order = Order().apply(block)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ fun DataItem<*>.cpgGetAllNodes(): Nodes =
context(CokoBackend)
fun DataItem<*>.cpgGetNodes(): Nodes {
return when (this@DataItem) {
is ReturnValueItem -> op.cpgGetNodes().flatMap { it.getVariableInNextDFGOrThis() }
is ReturnValueItem -> op.cpgGetNodes().flatMap { it.key.getVariableInNextDFGOrThis() }
is Value -> [email protected]()
is ArgumentItem -> op.cpgGetNodes().map { it.arguments[index] } // TODO: Do we count starting at 0 or 1?
is ArgumentItem -> op.cpgGetNodes().map { it.key.arguments[index] } // TODO: Do we count starting at 0 or 1?
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,22 @@
package de.fraunhofer.aisec.codyze.backends.cpg.coko.dsl

import de.fraunhofer.aisec.codyze.backends.cpg.coko.Nodes
import de.fraunhofer.aisec.codyze.backends.cpg.coko.dsl.Result.*
import de.fraunhofer.aisec.codyze.specificationLanguages.coko.core.CokoBackend
import de.fraunhofer.aisec.codyze.specificationLanguages.coko.core.CokoMarker
import de.fraunhofer.aisec.codyze.specificationLanguages.coko.core.dsl.*
import de.fraunhofer.aisec.codyze.specificationLanguages.coko.core.modelling.*
import de.fraunhofer.aisec.codyze.specificationLanguages.coko.core.modelling.DataItem
import de.fraunhofer.aisec.codyze.specificationLanguages.coko.core.modelling.Definition
import de.fraunhofer.aisec.codyze.specificationLanguages.coko.core.modelling.ParameterGroup
import de.fraunhofer.aisec.codyze.specificationLanguages.coko.core.modelling.Signature
import de.fraunhofer.aisec.cpg.TranslationResult
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.ValueDeclaration
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.query.dataFlow
import de.fraunhofer.aisec.cpg.query.executionPath
import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.ConstructExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression
import de.fraunhofer.aisec.cpg.query.*

//
// all functions/properties defined here must use CokoBackend
Expand Down Expand Up @@ -60,34 +66,50 @@
* [Definition]s.
*/
context(CokoBackend)
fun Op.cpgGetNodes(): Collection<CallExpression> =
fun Op.cpgGetNodes(): Map<CallExpression, Result> =
when (this@Op) {
is FunctionOp ->
[email protected]
.flatMap { def ->
[email protected](def.fqn) {
def.signatures.any { sig ->
cpgSignature(*sig.parameters.toTypedArray()) &&
sig.unorderedParameters.all { it?.cpgFlowsTo(arguments) ?: false }
is FunctionOp -> {
val results = mutableListOf<Result>()
val fqn = [email protected] { def ->
[email protected](def.fqn) {
def.signatures.any { sig ->
// We consider a result when both the signature and the flow are not invalid
// However, if at least one of them is OPEN we propagate this information to the caller
val signature = cpgSignature(*sig.parameters.toTypedArray())

Check warning

Code scanning / detekt

In most cases using a spread operator causes a full copy of the array to be created before calling a method. This may result in a performance penalty. Warning

Used in this way a spread operator causes a full copy of the array to be created before calling a method. This may result in a performance penalty.
val flow = sig.unorderedParameters.allResult { it?.cpgFlowsTo(arguments) }
if (signature != INVALID && flow != INVALID) {
results.add(signature.and(flow))
} else {
false
}
}
}
is ConstructorOp ->
[email protected]
.flatMap { sig ->
[email protected]([email protected]) {
cpgSignature(*sig.parameters.toTypedArray()) &&
sig.unorderedParameters.all { it?.cpgFlowsTo(arguments) ?: false }
}
fqn.zip(results).toMap()
}
is ConstructorOp -> {
val results = mutableListOf<Result>()
val fqn = [email protected] { sig ->
[email protected]([email protected]) {
val signature = cpgSignature(*sig.parameters.toTypedArray())

Check warning

Code scanning / detekt

In most cases using a spread operator causes a full copy of the array to be created before calling a method. This may result in a performance penalty. Warning

Used in this way a spread operator causes a full copy of the array to be created before calling a method. This may result in a performance penalty.
val flow = sig.unorderedParameters.allResult { it?.cpgFlowsTo(arguments) }
if (signature != INVALID && flow != INVALID) {
results.add(signature.and(flow))
} else {
false
}
}
is GroupingOp -> [email protected] { it.cpgGetNodes() }
}
fqn.zip(results).toMap()
}
is GroupingOp -> [email protected] { it.cpgGetNodes().entries }.associate { it.toPair() }
is ConditionalOp -> {
val resultNodes = resultOp.cpgGetNodes()
val conditionNodes = conditionOp.cpgGetNodes()
resultNodes.filter { resultNode ->
conditionNodes.any { conditionNode ->
// TODO: Is it correct to use the EOG relationship here?
val result = executionPath(conditionNode, resultNode)
val result = executionPath(conditionNode.key, resultNode.key)
result.value
}
}
Expand Down Expand Up @@ -148,7 +170,7 @@
* - If this is a Collection, we check if at least one of the elements flows to [that]
* - If this is a [Node], we use the DFG of the CPG.
*/
infix fun Any.cpgFlowsTo(that: Node): Boolean =
infix fun Any.cpgFlowsTo(that: Node): Result =
this.cpgFlowsTo(listOf(that))

// it should only be available in the context of a CallExpression
Expand All @@ -159,22 +181,45 @@
* - If this is a Collection, we check if at least one of the elements flows to [that]
* - If this is a [Node], we use the DFG of the CPG.
*/
infix fun Any.cpgFlowsTo(that: Collection<Node>): Boolean =
if (this is Wildcard) {
true
} else {
infix fun Any.cpgFlowsTo(that: Collection<Node>): Result =
Result.convert(
when (this) {
is Wildcard -> true
is String -> that.any {
val regex = Regex(this)
regex.matches((it as? Expression)?.evaluate()?.toString().orEmpty()) || regex.matches(it.code.orEmpty())
}
is Iterable<*> -> this.any { it?.cpgFlowsTo(that) ?: false }
is Array<*> -> this.any { it?.cpgFlowsTo(that) ?: false }
// Separate cases for IntRange and LongRange result in a huge performance boost for large ranges
is LongRange, is IntRange -> checkRange(that)
is Iterable<*> -> this.anyResult { it?.cpgFlowsTo(that) }
is Array<*> -> this.anyResult { it?.cpgFlowsTo(that) }
is Node -> that.any { dataFlow(this, it).value }
is ParameterGroup -> this.parameters.all { it?.cpgFlowsTo(that) ?: false }
is ParameterGroup -> this.parameters.allResult { it?.cpgFlowsTo(that) }
else -> this in that.map { (it as Expression).evaluate() }
}
)

private fun Any.checkRange(that: Collection<Node>): Boolean {
when (this) {
// I would love to combine the following two cases, but any implementation loses the benefit of
// quickly reading the last value of the range, therefore making the whole distinction useless.
is IntRange -> {
return that.all {
val minValue = min(it).value.toInt()
val maxValue = max(it).value.toInt()
minValue > this.first && maxValue < this.last
}
}
is LongRange -> {
return that.all {
val minValue = min(it).value.toInt()
val maxValue = max(it).value.toInt()
minValue > this.first && maxValue < this.last
}
}
else -> throw IllegalArgumentException("Unexpected type")
}
}

context(CokoBackend)
// TODO: better description
Expand All @@ -195,21 +240,20 @@
* are not important to the analysis
*/
@Suppress("UnsafeCallOnNullableType")
fun CallExpression.cpgSignature(vararg parameters: Any?, hasVarargs: Boolean = false): Boolean {
fun CallExpression.cpgSignature(vararg parameters: Any?, hasVarargs: Boolean = false): Result {
// checks if amount of parameters is the same as amount of arguments of this CallExpression
return cpgCheckArgsSize(parameters, hasVarargs) &&
if (cpgCheckArgsSize(parameters, hasVarargs)) {
// checks if the CallExpression matches with the parameters
parameters.withIndex().all { (i: Int, parameter: Any?) ->
return parameters.withIndex().allResult { (i: Int, parameter: Any?) ->
when (parameter) {
// if any parameter is null, signature returns false
null -> false
null -> INVALID
is ParamWithType ->
// if `parameter` is a `ParamWithType` object we want to check the type and
// if there is dataflow
cpgCheckType(parameter.type, i) &&
parameter.param cpgFlowsTo arguments[i]
if (cpgCheckType(parameter.type, i)) parameter.param cpgFlowsTo arguments[i] else INVALID
// checks if the type of the argument is the same
is Type -> cpgCheckType(parameter, i)
is Type -> Result.convert(cpgCheckType(parameter, i))
// check if any of the Nodes of the Op flow to the argument
is Op -> parameter.cpgGetNodes() cpgFlowsTo arguments[i]
// check if any of the Nodes of the DataItem flow to the argument
Expand All @@ -218,6 +262,8 @@
else -> parameter cpgFlowsTo arguments[i]
}
}
}
return INVALID
}

/** Checks the [type] against the type of the argument at [index] for the Call Expression */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright (c) 2024, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.fraunhofer.aisec.codyze.backends.cpg.coko.dsl

import de.fraunhofer.aisec.codyze.backends.cpg.coko.dsl.Result.*

/**
* A data class that serves as a ternary value for the analysis result.
*
* OPEN is used where we cannot deduce either VALID or INVALID results because of lack of information.
*/
enum class Result {
VALID,
INVALID,
OPEN;

companion object {
fun convert(from: Any?): Result {
return when (from) {
is Result -> from
is Boolean -> if (from) VALID else INVALID
else -> OPEN
}
}
}
}

/** returns VALID if all Results are VALID, otherwise returns OPEN if any result is OPEN, otherwise returns INVALID */
fun <T> Iterable<T>.allResult(predicate: (T) -> Result?): Result {
var invalidFlag = false
for (element in this) {
if (predicate(element) == OPEN) {
return OPEN
} else if (predicate(element) == INVALID) {
invalidFlag = true
}
}
return if (invalidFlag) INVALID else VALID
}

/** returns VALID if any Result is VALID, otherwise returns OPEN if any result is OPEN, otherwise returns INVALID */
fun <T> Iterable<T>.anyResult(predicate: (T) -> Result?): Result {
var openFlag = false
for (element in this) {
if (predicate(element) == VALID) {
return VALID
} else if (predicate(element) == OPEN) {
openFlag = true
}
}
return if (openFlag) OPEN else INVALID
}

/** returns VALID if all Results are VALID, otherwise returns OPEN if any result is OPEN, otherwise returns INVALID */
fun <T> Array<T>.allResult(predicate: (T) -> Result?): Result {
return this.asIterable().allResult(predicate)
}

/** returns VALID if any Result is VALID, otherwise returns OPEN if any result is OPEN, otherwise returns INVALID */
fun <T> Array<T>.anyResult(predicate: (T) -> Result?): Result {
return this.asIterable().anyResult(predicate)
}

/** precedence order for ternary and: OPEN > INVALID > VALID */
fun Result.and(other: Result): Result {
return if (this == OPEN || other == OPEN) {
OPEN
} else if (this == INVALID || other == INVALID) {
INVALID
} else {
VALID
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ class CpgWheneverEvaluator(premise: ConditionComponent) : WheneverEvaluator(prem
callConditionComponent: CallConditionComponent,
premiseNode: Node? = null
): EvaluationResult {
val callNodes = callConditionComponent.op.cpgGetNodes().filterWithDistanceToPremise(premiseNode)
val callNodes = callConditionComponent.op.cpgGetNodes().keys.filterWithDistanceToPremise(premiseNode)
return EvaluationResult(callNodes, emptyList(), Problems())
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ class FollowsEvaluator(val ifOp: Op, val thenOp: Op) : Evaluator {

override fun evaluate(context: EvaluationContext): List<CpgFinding> {
val (unreachableThisNodes, thisNodes) =
with(this@CokoCpgBackend) { ifOp.cpgGetNodes().toSet() }
with(this@CokoCpgBackend) { ifOp.cpgGetNodes().keys }
.partition { it.isUnreachable() }

val thatNodes = with(this@CokoCpgBackend) { thenOp.cpgGetNodes().toSet() }
val thatNodes = with(this@CokoCpgBackend) { thenOp.cpgGetNodes().keys }

val findings = mutableListOf<CpgFinding>()

Expand Down
Loading