Skip to content

Commit

Permalink
Prevent duplicate findings from multiple sarif files
Browse files Browse the repository at this point in the history
  • Loading branch information
drdavella committed Nov 21, 2024
1 parent cbc6a29 commit e0115e8
Show file tree
Hide file tree
Showing 8 changed files with 359 additions and 309 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ readme = "README.md"
license = {file = "LICENSE"}
description = "A pluggable framework for building codemods in Python"
dependencies = [
"boltons~=21.0.0",
"GitPython<4",
"isort>=5.12,<5.14",
"libcst>=1.1,<1.6",
Expand Down
6 changes: 6 additions & 0 deletions src/codemodder/codetf.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,17 @@ class Rule(BaseModel):
name: str
url: Optional[str] = None

class Config:
frozen = True


class Finding(BaseModel):
id: str
rule: Rule

class Config:
frozen = True

def to_unfixed_finding(
self,
*,
Expand Down
77 changes: 46 additions & 31 deletions src/codemodder/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from abc import abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Type
from typing import TYPE_CHECKING, Any, ClassVar, Sequence, Type

import libcst as cst
from boltons.setutils import IndexedSet
from libcst._position import CodeRange
from typing_extensions import Self

Expand All @@ -18,39 +19,40 @@
from codemodder.context import CodemodExecutionContext


@dataclass
@dataclass(frozen=True)
class LineInfo:
line: int
column: int = -1
snippet: str | None = None


@dataclass
@dataclass(frozen=True)
class Location(ABCDataclass):
file: Path
start: LineInfo
end: LineInfo


@dataclass(frozen=True)
class SarifLocation(Location):
@classmethod
@abstractmethod
def from_sarif(cls, sarif_location) -> Self:
pass


@dataclass
@dataclass(frozen=True)
class LocationWithMessage:
location: Location
message: str


@dataclass(kw_only=True)
@dataclass(frozen=True, kw_only=True)
class Result(ABCDataclass):
rule_id: str
locations: list[Location]
codeflows: list[list[Location]] = field(default_factory=list)
related_locations: list[LocationWithMessage] = field(default_factory=list)
locations: Sequence[Location]
codeflows: Sequence[Sequence[Location]] = field(default_factory=tuple)
related_locations: Sequence[LocationWithMessage] = field(default_factory=tuple)
finding: Finding | None = None

def match_location(self, pos: CodeRange, node: cst.CSTNode) -> bool:
Expand All @@ -67,13 +69,16 @@ def match_location(self, pos: CodeRange, node: cst.CSTNode) -> bool:
for location in self.locations
)

def __hash__(self):
return hash(self.rule_id)

@dataclass(kw_only=True)

@dataclass(frozen=True, kw_only=True)
class SASTResult(Result):
finding_id: str


@dataclass(kw_only=True)
@dataclass(frozen=True, kw_only=True)
class SarifResult(SASTResult, ABCDataclass):
location_type: ClassVar[Type[SarifLocation]]

Expand All @@ -84,32 +89,40 @@ def from_sarif(
raise NotImplementedError

@classmethod
def extract_locations(cls, sarif_result) -> list[Location]:
return [
cls.location_type.from_sarif(location)
for location in sarif_result["locations"]
]
def extract_locations(cls, sarif_result) -> Sequence[Location]:
return tuple(
[
cls.location_type.from_sarif(location)
for location in sarif_result["locations"]
]
)

@classmethod
def extract_related_locations(cls, sarif_result) -> list[LocationWithMessage]:
return [
LocationWithMessage(
message=rel_location.get("message", {}).get("text", ""),
location=cls.location_type.from_sarif(rel_location),
)
for rel_location in sarif_result.get("relatedLocations", [])
]
def extract_related_locations(cls, sarif_result) -> Sequence[LocationWithMessage]:
return tuple(
[
LocationWithMessage(
message=rel_location.get("message", {}).get("text", ""),
location=cls.location_type.from_sarif(rel_location),
)
for rel_location in sarif_result.get("relatedLocations", [])
]
)

@classmethod
def extract_code_flows(cls, sarif_result) -> list[list[Location]]:
return [
def extract_code_flows(cls, sarif_result) -> Sequence[Sequence[Location]]:
return tuple(
[
cls.location_type.from_sarif(locations.get("location"))
for locations in threadflow.get("locations", {})
tuple(
[
cls.location_type.from_sarif(locations.get("location"))
for locations in threadflow.get("locations", {})
]
)
for codeflow in sarif_result.get("codeFlows", {})
for threadflow in codeflow.get("threadFlows", {})
]
for codeflow in sarif_result.get("codeFlows", {})
for threadflow in codeflow.get("threadFlows", {})
]
)

@classmethod
def extract_rule_id(cls, result, sarif_run, truncate_rule_id: bool = False) -> str:
Expand Down Expand Up @@ -199,5 +212,7 @@ def list_dict_or(
) -> dict[Any, list[Any]]:
result_dict = {}
for k in other.keys() | dictionary.keys():
result_dict[k] = dictionary.get(k, []) + other.get(k, [])
result_dict[k] = list(
IndexedSet(dictionary.get(k, [])) | (IndexedSet(other.get(k, [])))
)
return result_dict
2 changes: 1 addition & 1 deletion src/codemodder/utils/abc_dataclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import dataclass


@dataclass
@dataclass(frozen=True)
class ABCDataclass(ABC):
"""Inspired by https://stackoverflow.com/a/60669138"""

Expand Down
2 changes: 1 addition & 1 deletion src/core_codemods/defectdojo/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def from_result(cls, result: dict) -> Self:
return cls(
finding_id=result["id"],
rule_id=result["title"],
locations=[DefectDojoLocation.from_result(result)],
locations=tuple([DefectDojoLocation.from_result(result)]),
finding=Finding(
id=str(result["id"]),
rule=Rule(
Expand Down
17 changes: 11 additions & 6 deletions src/core_codemods/sonar/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import replace
from functools import cache
from pathlib import Path
from typing import Sequence

import libcst as cst
from typing_extensions import Self
Expand Down Expand Up @@ -40,18 +41,22 @@ def from_result(cls, result: dict) -> Self:
if not (rule_id := result.get("rule", None) or result.get("ruleKey", None)):
raise ValueError("Could not extract rule id from sarif result.")

locations: list[Location] = (
locations: Sequence[Location] = tuple(
[SonarLocation.from_json_location(result)]
if result.get("textRange")
else []
)
all_flows: list[list[Location]] = [
all_flows: Sequence[Sequence[Location]] = tuple(
[
SonarLocation.from_json_location(json_location)
for json_location in flow.get("locations", {})
tuple(
[
SonarLocation.from_json_location(json_location)
for json_location in flow.get("locations", {})
]
)
for flow in result.get("flows", [])
]
for flow in result.get("flows", [])
]
)

finding_id = result.get("key", rule_id)

Expand Down
Loading

0 comments on commit e0115e8

Please sign in to comment.