From 2db48f95f3662385e9e9f59d8712fb72aa68f4c1 Mon Sep 17 00:00:00 2001
From: Daniel D'Avella <dan.davella@pixee.ai>
Date: Tue, 14 Nov 2023 14:16:20 -0500
Subject: [PATCH] Generate CodeTF when no files/codemods match

---
 src/codemodder/codemodder.py | 85 ++++++++++++++++++++++--------------
 tests/test_codemodder.py     | 55 ++++++++++++++++-------
 2 files changed, 92 insertions(+), 48 deletions(-)

diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py
index 2d68ee4f9..a517c8cab 100644
--- a/src/codemodder/codemodder.py
+++ b/src/codemodder/codemodder.py
@@ -195,6 +195,51 @@ def log_report(context, argv, elapsed_ms, files_to_analyze):
     logger.info("  write:       %s ms", context.timer.get_time_ms("write"))
 
 
+def apply_codemods(
+    context: CodemodExecutionContext,
+    codemods_to_run: list[CodemodExecutorWrapper],
+    semgrep_results: ResultSet,
+    files_to_analyze: list[Path],
+    argv,
+):
+    log_section("scanning")
+
+    if not files_to_analyze:
+        logger.info("no files to scan")
+        return
+
+    if not codemods_to_run:
+        logger.info("no codemods to run")
+        return
+
+    semgrep_finding_ids = semgrep_results.all_rule_ids()
+
+    # run codemods one at a time making sure to respect the given sequence
+    for codemod in codemods_to_run:
+        # Unfortunately the IDs from semgrep are not fully specified
+        # TODO: eventually we need to be able to use fully specified IDs here
+        if codemod.is_semgrep and codemod.name not in semgrep_finding_ids:
+            logger.debug(
+                "no results from semgrep for %s, skipping analysis",
+                codemod.id,
+            )
+            continue
+
+        logger.info("running codemod %s", codemod.id)
+        semgrep_files = semgrep_results.files_for_rule(codemod.name)
+        # Non-semgrep codemods ignore the semgrep results
+        results = codemod.apply(context, semgrep_files)
+        analyze_files(
+            context,
+            files_to_analyze,
+            codemod,
+            results,
+            argv,
+        )
+        context.process_dependencies(codemod.id)
+        context.log_changes(codemod.id)
+
+
 def run(original_args) -> int:
     start = datetime.datetime.now()
 
@@ -229,10 +274,6 @@ def run(original_args) -> int:
     codemods_to_run = codemod_registry.match_codemods(
         argv.codemod_include, argv.codemod_exclude
     )
-    if not codemods_to_run:
-        # XXX: sarif files given on the command line are currently not used by any codemods
-        logger.error("no codemods to run")
-        return 0
 
     log_section("setup")
     log_list(logging.INFO, "running", codemods_to_run, predicate=lambda c: c.id)
@@ -242,41 +283,19 @@ def run(original_args) -> int:
     files_to_analyze: list[Path] = match_files(
         context.directory, argv.path_exclude, argv.path_include
     )
-    if not files_to_analyze:
-        logger.error("no files matched.")
-        return 0
 
     full_names = [str(path) for path in files_to_analyze]
     log_list(logging.DEBUG, "matched files", full_names)
 
     semgrep_results: ResultSet = find_semgrep_results(context, codemods_to_run)
-    semgrep_finding_ids = semgrep_results.all_rule_ids()
-
-    log_section("scanning")
-    # run codemods one at a time making sure to respect the given sequence
-    for codemod in codemods_to_run:
-        # Unfortunately the IDs from semgrep are not fully specified
-        # TODO: eventually we need to be able to use fully specified IDs here
-        if codemod.is_semgrep and codemod.name not in semgrep_finding_ids:
-            logger.debug(
-                "no results from semgrep for %s, skipping analysis",
-                codemod.id,
-            )
-            continue
 
-        logger.info("running codemod %s", codemod.id)
-        semgrep_files = semgrep_results.files_for_rule(codemod.name)
-        # Non-semgrep codemods ignore the semgrep results
-        results = codemod.apply(context, semgrep_files)
-        analyze_files(
-            context,
-            files_to_analyze,
-            codemod,
-            results,
-            argv,
-        )
-        context.process_dependencies(codemod.id)
-        context.log_changes(codemod.id)
+    apply_codemods(
+        context,
+        codemods_to_run,
+        semgrep_results,
+        files_to_analyze,
+        argv,
+    )
 
     results = context.compile_results(codemods_to_run)
 
diff --git a/tests/test_codemodder.py b/tests/test_codemodder.py
index dd73f6a94..5dac795d6 100644
--- a/tests/test_codemodder.py
+++ b/tests/test_codemodder.py
@@ -9,14 +9,21 @@
 from codemodder.result import ResultSet
 
 
+@pytest.fixture(autouse=True, scope="module")
+def disable_write_report():
+    """Override fixture from conftest.py"""
+
+
 class TestRun:
     @mock.patch("libcst.parse_module")
-    @mock.patch("codemodder.codemodder.logger.error")
-    def test_no_files_matched(self, error_log, mock_parse):
+    def test_no_files_matched(self, mock_parse, tmpdir):
+        codetf = tmpdir / "result.codetf"
+        assert not codetf.exists()
+
         args = [
             "tests/samples/",
             "--output",
-            "here.txt",
+            str(codetf),
             "--codemod-include=url-sandbox",
             "--path-exclude",
             "*py",
@@ -24,10 +31,8 @@ def test_no_files_matched(self, error_log, mock_parse):
         res = run(args)
         assert res == 0
 
-        error_log.assert_called()
-        assert error_log.call_args_list[0][0][0] == "no files matched."
-
         mock_parse.assert_not_called()
+        assert codetf.exists()
 
     @mock.patch("libcst.parse_module", side_effect=Exception)
     @mock.patch("codemodder.codemodder.report_default")
@@ -63,16 +68,20 @@ def test_cst_parsing_fails(self, mock_reporting, mock_parse):
 
     @mock.patch("codemodder.codemodder.update_code")
     @mock.patch("codemodder.codemods.base_codemod.semgrep_run", side_effect=semgrep_run)
-    def test_dry_run(self, _, mock_update_code):
+    def test_dry_run(self, _, mock_update_code, tmpdir):
+        codetf = tmpdir / "result.codetf"
         args = [
             "tests/samples/",
             "--output",
-            "here.txt",
+            str(codetf),
             "--dry-run",
         ]
 
+        assert not codetf.exists()
+
         res = run(args)
         assert res == 0
+        assert codetf.exists()
 
         mock_update_code.assert_not_called()
 
@@ -99,23 +108,31 @@ def test_reporting(self, mock_reporting, dry_run):
         assert len(results_by_codemod) == len(registry.codemods)
 
     @mock.patch("codemodder.codemods.base_codemod.semgrep_run")
-    def test_no_codemods_to_run(self, mock_semgrep_run):
+    def test_no_codemods_to_run(self, mock_semgrep_run, tmpdir):
+        codetf = tmpdir / "result.codetf"
+        assert not codetf.exists()
+
         registry = load_registered_codemods()
         names = ",".join(registry.names)
         args = [
             "tests/samples/",
             "--output",
-            "here.txt",
+            str(codetf),
             f"--codemod-exclude={names}",
         ]
 
         exit_code = run(args)
         assert exit_code == 0
         mock_semgrep_run.assert_not_called()
+        assert codetf.exists()
 
     @pytest.mark.parametrize("codemod", ["secure-random", "pixee:python/secure-random"])
     @mock.patch("codemodder.context.CodemodExecutionContext.compile_results")
-    def test_run_codemod_name_or_id(self, mock_compile_results, codemod):
+    @mock.patch("codemodder.codemodder.report_default")
+    def test_run_codemod_name_or_id(
+        self, report_default, mock_compile_results, codemod
+    ):
+        del report_default
         args = [
             "tests/samples/",
             "--output",
@@ -129,7 +146,9 @@ def test_run_codemod_name_or_id(self, mock_compile_results, codemod):
 
 
 class TestExitCode:
-    def test_success_0(self):
+    @mock.patch("codemodder.codemodder.report_default")
+    def test_success_0(self, mock_report):
+        del mock_report
         args = [
             "tests/samples/",
             "--output",
@@ -142,7 +161,9 @@ def test_success_0(self):
         exit_code = run(args)
         assert exit_code == 0
 
-    def test_bad_project_dir_1(self):
+    @mock.patch("codemodder.codemodder.report_default")
+    def test_bad_project_dir_1(self, mock_report):
+        del mock_report
         args = [
             "bad/path/",
             "--output",
@@ -153,7 +174,9 @@ def test_bad_project_dir_1(self):
         exit_code = run(args)
         assert exit_code == 1
 
-    def test_conflicting_include_exclude(self):
+    @mock.patch("codemodder.codemodder.report_default")
+    def test_conflicting_include_exclude(self, mock_report):
+        del mock_report
         args = [
             "tests/samples/",
             "--output",
@@ -167,7 +190,9 @@ def test_conflicting_include_exclude(self):
             run(args)
         assert err.value.args[0] == 3
 
-    def test_bad_codemod_name(self):
+    @mock.patch("codemodder.codemodder.report_default")
+    def test_bad_codemod_name(self, mock_report):
+        del mock_report
         bad_codemod = "doesntexist"
         args = [
             "tests/samples/",