From 22ea9ac970ab8529ac20d8da3212155341100a15 Mon Sep 17 00:00:00 2001 From: Dani Alcala <112832187+clavedeluna@users.noreply.github.com> Date: Thu, 5 Dec 2024 12:25:04 -0300 Subject: [PATCH] ResultSet `or` behavior needs update for storing tools (#944) * correctly or result * only add if tool is present --- src/codemodder/codeql.py | 1 + src/codemodder/result.py | 6 +++++- src/codemodder/semgrep.py | 2 +- tests/test_codeql.py | 2 ++ tests/test_sarif_processing.py | 7 +++++++ 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/codemodder/codeql.py b/src/codemodder/codeql.py index fc3fd2bb..0f976adc 100644 --- a/src/codemodder/codeql.py +++ b/src/codemodder/codeql.py @@ -59,4 +59,5 @@ def from_sarif(cls, sarif_file: str | Path, truncate_rule_id: bool = False) -> S sarif_result, sarif_run, truncate_rule_id ) result_set.add_result(codeql_result) + result_set.store_tool_data(sarif_run.get("tool", {})) return result_set diff --git a/src/codemodder/result.py b/src/codemodder/result.py index f3487d5c..69d86bba 100644 --- a/src/codemodder/result.py +++ b/src/codemodder/result.py @@ -192,7 +192,8 @@ def add_result(self, result: Result): self.setdefault(result.rule_id, {}).setdefault(loc.file, []).append(result) def store_tool_data(self, tool_data: dict): - self.tools.append(tool_data) + if tool_data: + self.tools.append(tool_data) def results_for_rule_and_file( self, context: CodemodExecutionContext, rule_id: str, file: Path @@ -231,6 +232,9 @@ def __or__(self, other): result.results_for_rule = list_dict_or( self.results_for_rule, other.results_for_rule ) + for tool in self.tools or other.tools: + result.store_tool_data(tool) + return result def __ior__(self, other): diff --git a/src/codemodder/semgrep.py b/src/codemodder/semgrep.py index 47a1ea32..7c6e7a32 100644 --- a/src/codemodder/semgrep.py +++ b/src/codemodder/semgrep.py @@ -88,7 +88,7 @@ def from_sarif(cls, sarif_file: str | Path, truncate_rule_id: bool = False) -> S result, sarif_run, truncate_rule_id ) result_set.add_result(sarif_result) - + result_set.store_tool_data(sarif_run.get("tool", {})) return result_set diff --git a/tests/test_codeql.py b/tests/test_codeql.py index c8a2e2c3..ea219e83 100644 --- a/tests/test_codeql.py +++ b/tests/test_codeql.py @@ -19,6 +19,8 @@ def test_from_file(codeql_result_set: Path): result_set = process_codeql_findings(tuple([str(codeql_result_set)])) assert len(result_set["py/path-injection"][Path("Path Traversal/py_ctf.py")]) == 1 assert len(result_set["py/path-injection"]) == 2 + assert result_set.tools + assert result_set.tools[0]["driver"] def test_from_duplicate_files(codeql_result_set: Path): diff --git a/tests/test_sarif_processing.py b/tests/test_sarif_processing.py index b4e75d2b..f7d4f2a9 100644 --- a/tests/test_sarif_processing.py +++ b/tests/test_sarif_processing.py @@ -4,6 +4,7 @@ import pytest +from codemodder.codemods.semgrep import process_semgrep_findings from codemodder.sarifs import detect_sarif_tools from codemodder.semgrep import SemgrepResult, SemgrepResultSet @@ -149,3 +150,9 @@ def test_two_sarifs_different_tools(self): assert "semgrep" in results assert len(results["codeql"]) == 1 assert len(results["semgrep"]) == 1 + + def test_stores_tools(self): + sarif_file = Path("tests") / "samples" / "semgrep.sarif" + result_set = process_semgrep_findings(tuple([str(sarif_file)])) + assert result_set.tools + assert result_set.tools[0]["driver"]["rules"]