From 1e54302996f33ed659b22ad7e6d583cfccc77bf1 Mon Sep 17 00:00:00 2001 From: Daniel D'Avella Date: Tue, 14 Nov 2023 14:16:20 -0500 Subject: [PATCH] Generate CodeTF when no files/codemods match --- src/codemodder/codemodder.py | 85 ++++++++++++++++++++++-------------- tests/test_codemodder.py | 55 ++++++++++++++++------- 2 files changed, 92 insertions(+), 48 deletions(-) diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 2d68ee4f9..a517c8cab 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -195,6 +195,51 @@ def log_report(context, argv, elapsed_ms, files_to_analyze): logger.info(" write: %s ms", context.timer.get_time_ms("write")) +def apply_codemods( + context: CodemodExecutionContext, + codemods_to_run: list[CodemodExecutorWrapper], + semgrep_results: ResultSet, + files_to_analyze: list[Path], + argv, +): + log_section("scanning") + + if not files_to_analyze: + logger.info("no files to scan") + return + + if not codemods_to_run: + logger.info("no codemods to run") + return + + semgrep_finding_ids = semgrep_results.all_rule_ids() + + # run codemods one at a time making sure to respect the given sequence + for codemod in codemods_to_run: + # Unfortunately the IDs from semgrep are not fully specified + # TODO: eventually we need to be able to use fully specified IDs here + if codemod.is_semgrep and codemod.name not in semgrep_finding_ids: + logger.debug( + "no results from semgrep for %s, skipping analysis", + codemod.id, + ) + continue + + logger.info("running codemod %s", codemod.id) + semgrep_files = semgrep_results.files_for_rule(codemod.name) + # Non-semgrep codemods ignore the semgrep results + results = codemod.apply(context, semgrep_files) + analyze_files( + context, + files_to_analyze, + codemod, + results, + argv, + ) + context.process_dependencies(codemod.id) + context.log_changes(codemod.id) + + def run(original_args) -> int: start = datetime.datetime.now() @@ -229,10 +274,6 @@ def run(original_args) -> int: codemods_to_run = codemod_registry.match_codemods( argv.codemod_include, argv.codemod_exclude ) - if not codemods_to_run: - # XXX: sarif files given on the command line are currently not used by any codemods - logger.error("no codemods to run") - return 0 log_section("setup") log_list(logging.INFO, "running", codemods_to_run, predicate=lambda c: c.id) @@ -242,41 +283,19 @@ def run(original_args) -> int: files_to_analyze: list[Path] = match_files( context.directory, argv.path_exclude, argv.path_include ) - if not files_to_analyze: - logger.error("no files matched.") - return 0 full_names = [str(path) for path in files_to_analyze] log_list(logging.DEBUG, "matched files", full_names) semgrep_results: ResultSet = find_semgrep_results(context, codemods_to_run) - semgrep_finding_ids = semgrep_results.all_rule_ids() - - log_section("scanning") - # run codemods one at a time making sure to respect the given sequence - for codemod in codemods_to_run: - # Unfortunately the IDs from semgrep are not fully specified - # TODO: eventually we need to be able to use fully specified IDs here - if codemod.is_semgrep and codemod.name not in semgrep_finding_ids: - logger.debug( - "no results from semgrep for %s, skipping analysis", - codemod.id, - ) - continue - logger.info("running codemod %s", codemod.id) - semgrep_files = semgrep_results.files_for_rule(codemod.name) - # Non-semgrep codemods ignore the semgrep results - results = codemod.apply(context, semgrep_files) - analyze_files( - context, - files_to_analyze, - codemod, - results, - argv, - ) - context.process_dependencies(codemod.id) - context.log_changes(codemod.id) + apply_codemods( + context, + codemods_to_run, + semgrep_results, + files_to_analyze, + argv, + ) results = context.compile_results(codemods_to_run) diff --git a/tests/test_codemodder.py b/tests/test_codemodder.py index dd73f6a94..5dac795d6 100644 --- a/tests/test_codemodder.py +++ b/tests/test_codemodder.py @@ -9,14 +9,21 @@ from codemodder.result import ResultSet +@pytest.fixture(autouse=True, scope="module") +def disable_write_report(): + """Override fixture from conftest.py""" + + class TestRun: @mock.patch("libcst.parse_module") - @mock.patch("codemodder.codemodder.logger.error") - def test_no_files_matched(self, error_log, mock_parse): + def test_no_files_matched(self, mock_parse, tmpdir): + codetf = tmpdir / "result.codetf" + assert not codetf.exists() + args = [ "tests/samples/", "--output", - "here.txt", + str(codetf), "--codemod-include=url-sandbox", "--path-exclude", "*py", @@ -24,10 +31,8 @@ def test_no_files_matched(self, error_log, mock_parse): res = run(args) assert res == 0 - error_log.assert_called() - assert error_log.call_args_list[0][0][0] == "no files matched." - mock_parse.assert_not_called() + assert codetf.exists() @mock.patch("libcst.parse_module", side_effect=Exception) @mock.patch("codemodder.codemodder.report_default") @@ -63,16 +68,20 @@ def test_cst_parsing_fails(self, mock_reporting, mock_parse): @mock.patch("codemodder.codemodder.update_code") @mock.patch("codemodder.codemods.base_codemod.semgrep_run", side_effect=semgrep_run) - def test_dry_run(self, _, mock_update_code): + def test_dry_run(self, _, mock_update_code, tmpdir): + codetf = tmpdir / "result.codetf" args = [ "tests/samples/", "--output", - "here.txt", + str(codetf), "--dry-run", ] + assert not codetf.exists() + res = run(args) assert res == 0 + assert codetf.exists() mock_update_code.assert_not_called() @@ -99,23 +108,31 @@ def test_reporting(self, mock_reporting, dry_run): assert len(results_by_codemod) == len(registry.codemods) @mock.patch("codemodder.codemods.base_codemod.semgrep_run") - def test_no_codemods_to_run(self, mock_semgrep_run): + def test_no_codemods_to_run(self, mock_semgrep_run, tmpdir): + codetf = tmpdir / "result.codetf" + assert not codetf.exists() + registry = load_registered_codemods() names = ",".join(registry.names) args = [ "tests/samples/", "--output", - "here.txt", + str(codetf), f"--codemod-exclude={names}", ] exit_code = run(args) assert exit_code == 0 mock_semgrep_run.assert_not_called() + assert codetf.exists() @pytest.mark.parametrize("codemod", ["secure-random", "pixee:python/secure-random"]) @mock.patch("codemodder.context.CodemodExecutionContext.compile_results") - def test_run_codemod_name_or_id(self, mock_compile_results, codemod): + @mock.patch("codemodder.codemodder.report_default") + def test_run_codemod_name_or_id( + self, report_default, mock_compile_results, codemod + ): + del report_default args = [ "tests/samples/", "--output", @@ -129,7 +146,9 @@ def test_run_codemod_name_or_id(self, mock_compile_results, codemod): class TestExitCode: - def test_success_0(self): + @mock.patch("codemodder.codemodder.report_default") + def test_success_0(self, mock_report): + del mock_report args = [ "tests/samples/", "--output", @@ -142,7 +161,9 @@ def test_success_0(self): exit_code = run(args) assert exit_code == 0 - def test_bad_project_dir_1(self): + @mock.patch("codemodder.codemodder.report_default") + def test_bad_project_dir_1(self, mock_report): + del mock_report args = [ "bad/path/", "--output", @@ -153,7 +174,9 @@ def test_bad_project_dir_1(self): exit_code = run(args) assert exit_code == 1 - def test_conflicting_include_exclude(self): + @mock.patch("codemodder.codemodder.report_default") + def test_conflicting_include_exclude(self, mock_report): + del mock_report args = [ "tests/samples/", "--output", @@ -167,7 +190,9 @@ def test_conflicting_include_exclude(self): run(args) assert err.value.args[0] == 3 - def test_bad_codemod_name(self): + @mock.patch("codemodder.codemodder.report_default") + def test_bad_codemod_name(self, mock_report): + del mock_report bad_codemod = "doesntexist" args = [ "tests/samples/",