From 2922572e1db3b555b7dfbc66dd1650a82ec280a3 Mon Sep 17 00:00:00 2001 From: Daniel D'Avella Date: Wed, 13 Sep 2023 09:43:05 -0400 Subject: [PATCH] Use codemod execution context to store results --- src/codemodder/__main__.py | 62 ++++++++++++------------- src/codemodder/codemods/__init__.py | 3 ++ src/codemodder/codemods/api/__init__.py | 2 - src/codemodder/codemods/base_codemod.py | 13 +++--- src/codemodder/context.py | 30 ++++++++++++ src/codemodder/file_context.py | 1 - tests/codemods/base_codemod_test.py | 5 +- tests/shared.py | 6 +-- tests/test_file_context.py | 2 +- 9 files changed, 73 insertions(+), 51 deletions(-) create mode 100644 src/codemodder/context.py diff --git a/src/codemodder/__main__.py b/src/codemodder/__main__.py index dded2fbfb..0d8807937 100644 --- a/src/codemodder/__main__.py +++ b/src/codemodder/__main__.py @@ -11,29 +11,14 @@ from codemodder.cli import parse_args from codemodder.code_directory import file_line_patterns, match_files from codemodder.codemods import match_codemods -from codemodder.codemods.change import Change +from codemodder.context import CodemodExecutionContext, ChangeSet from codemodder.dependency_manager import DependencyManager from codemodder.report.codetf_reporter import report_default from codemodder.semgrep import run as semgrep_run from codemodder.sarifs import parse_sarif_files # Must use from import here to point to latest state -from codemodder import global_state - -RESULTS_BY_CODEMOD = [] -from dataclasses import dataclass - - -@dataclass -class ChangeSet: - """A set of changes made to a file at `path`""" - - path: str - diff: str - changes: list[Change] - - def to_json(self): - return {"path": self.path, "diff": self.diff, "changes": self.changes} +from codemodder import global_state # TODO: should not use global state def update_code(file_path, new_code): @@ -46,6 +31,7 @@ def update_code(file_path, new_code): def run_codemods_for_file( + execution_context: CodemodExecutionContext, file_context, codemods_to_run, source_tree, @@ -54,6 +40,8 @@ def run_codemods_for_file( wrapper = cst.MetadataWrapper(source_tree) codemod = codemod_kls( CodemodContext(wrapper=wrapper), + # TODO: eventually pass execution context here + # It will be used for things like dependency management file_context, ) if not codemod.should_transform: @@ -61,6 +49,7 @@ def run_codemods_for_file( logger.info("Running codemod %s for %s", name, file_context.file_path) output_tree = codemod.transform_module(source_tree) + # TODO: there has got to be a more efficient way to check this? changed_file = not output_tree.deep_equals(source_tree) if changed_file: @@ -72,21 +61,21 @@ def run_codemods_for_file( logger.debug("CHANGED %s with codemod %s", file_context.file_path, name) logger.debug(diff) - codemod_kls.CHANGESET_ALL_FILES.append( - ChangeSet( - # TODO: we should not be using global state here - str(file_context.file_path.relative_to(global_state.DIRECTORY)), - diff, - changes=file_context.codemod_changes, - ).to_json() + change_set = ChangeSet( + str(file_context.file_path.relative_to(execution_context.directory)), + diff, + changes=file_context.codemod_changes, ) - if file_context.dry_run: + execution_context.add_result(name, change_set) + + if execution_context.dry_run: logger.info("Dry run, not changing files") else: update_code(file_context.file_path, output_tree.code) def analyze_files( + execution_context: CodemodExecutionContext, files_to_analyze, codemods_to_run, sarif, @@ -109,34 +98,39 @@ def analyze_files( file_context = FileContext( file_path, - cli_args.dry_run, line_exclude, line_include, sarif_for_file, ) run_codemods_for_file( + execution_context, file_context, codemods_to_run, source_tree, ) -def compile_results(codemods): +def compile_results(execution_context: CodemodExecutionContext, codemods): + results = [] for name, codemod_kls in codemods.items(): - if not codemod_kls.CHANGESET_ALL_FILES: + if not (changeset := execution_context.results_by_codemod.get(name)): continue + data = { + # TODO: this prefix should not be hardcoded "codemod": f"pixee:python/{name}", "summary": codemod_kls.SUMMARY, "description": codemod_kls.METADATA.DESCRIPTION, "references": [], "properties": {}, "failedFiles": [], - "changeset": codemod_kls.CHANGESET_ALL_FILES, + "changeset": [change.to_json() for change in changeset], } - RESULTS_BY_CODEMOD.append(data) + results.append(data) + + return results def run(argv, original_args) -> int: @@ -147,6 +141,7 @@ def run(argv, original_args) -> int: return 1 global_state.set_directory(Path(argv.directory)) + context = CodemodExecutionContext(Path(argv.directory), argv.dry_run) codemods_to_run = match_codemods(argv.codemod_include, argv.codemod_exclude) if not codemods_to_run: @@ -179,18 +174,19 @@ def run(argv, original_args) -> int: logger.debug("Matched files:\n%s", "\n".join(full_names)) analyze_files( + context, files_to_analyze, codemods_to_run, sarif_results, argv, ) - compile_results(codemods_to_run) + results = compile_results(context, codemods_to_run) - DependencyManager().write(dry_run=argv.dry_run) + DependencyManager().write(dry_run=context.dry_run) elapsed = datetime.datetime.now() - start elapsed_ms = int(elapsed.total_seconds() * 1000) - report_default(elapsed_ms, argv, original_args, RESULTS_BY_CODEMOD) + report_default(elapsed_ms, argv, original_args, results) return 0 diff --git a/src/codemodder/codemods/__init__.py b/src/codemodder/codemods/__init__.py index f5db9d329..6bd77a69a 100644 --- a/src/codemodder/codemods/__init__.py +++ b/src/codemodder/codemods/__init__.py @@ -41,6 +41,9 @@ CODEMOD_NAMES = [codemod.name() for codemod in DEFAULT_CODEMODS] +# TODO: codemod registry + + def match_codemods(codemod_include: list, codemod_exclude: list) -> dict: if not codemod_include and not codemod_exclude: return {codemod.name(): codemod for codemod in DEFAULT_CODEMODS} diff --git a/src/codemodder/codemods/api/__init__.py b/src/codemodder/codemods/api/__init__.py index 52cce15f1..9b074ffde 100644 --- a/src/codemodder/codemods/api/__init__.py +++ b/src/codemodder/codemods/api/__init__.py @@ -80,8 +80,6 @@ class BaseCodemod( BaseTransformer, Helpers, ): - CHANGESET_ALL_FILES: list = [] - def report_change(self, original_node): line_number = self.lineno_for_node(original_node) self.file_context.codemod_changes.append( diff --git a/src/codemodder/codemods/base_codemod.py b/src/codemodder/codemods/base_codemod.py index 802c7ee31..eaf27ab92 100644 --- a/src/codemodder/codemods/base_codemod.py +++ b/src/codemodder/codemods/base_codemod.py @@ -2,6 +2,8 @@ from enum import Enum import itertools from typing import List, ClassVar + +from codemodder.file_context import FileContext from codemodder.semgrep import rule_ids_from_yaml_files @@ -24,11 +26,10 @@ class BaseCodemod: SUMMARY: ClassVar[str] = NotImplemented IS_SEMGREP = False + file_context: FileContext + def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) - # TODO: this should not belong to codemod classes - # Instead it should be owned by a global context that gets passed in - cls.CHANGESET_ALL_FILES: list = [] if "codemodder.codemods.base_codemod.SemgrepCodemod" in str(cls): # hack: SemgrepCodemod won't NotImplementedError but all other child @@ -86,11 +87,11 @@ def __init_subclass__(cls, **kwargs): cls.RULE_IDS = rule_ids_from_yaml_files(cls.YAML_FILES) - def __init__(self, file_context): - super().__init__(file_context) + def __init__(self, *args): + super().__init__(*args) self._results = list( itertools.chain.from_iterable( - map(lambda rId: file_context.results_by_id[rId], self.RULE_IDS) + map(lambda rId: self.file_context.results_by_id[rId], self.RULE_IDS) ) ) diff --git a/src/codemodder/context.py b/src/codemodder/context.py new file mode 100644 index 000000000..76ee849c8 --- /dev/null +++ b/src/codemodder/context.py @@ -0,0 +1,30 @@ +from pathlib import Path +from dataclasses import dataclass + +from codemodder.codemods.change import Change + + +@dataclass +class ChangeSet: + """A set of changes made to a file at `path`""" + + path: str + diff: str + changes: list[Change] + + def to_json(self): + return {"path": self.path, "diff": self.diff, "changes": self.changes} + + +class CodemodExecutionContext: + results_by_codemod: dict[str, list[ChangeSet]] = {} + directory: Path + dry_run: bool = False + + def __init__(self, directory, dry_run): + self.directory = directory + self.dry_run = dry_run + self.results_by_codemod = {} + + def add_result(self, codemod_name, change_set): + self.results_by_codemod.setdefault(codemod_name, []).append(change_set) diff --git a/src/codemodder/file_context.py b/src/codemodder/file_context.py index d4387252a..771697dcf 100644 --- a/src/codemodder/file_context.py +++ b/src/codemodder/file_context.py @@ -10,7 +10,6 @@ class FileContext: """ file_path: Path - dry_run: bool line_exclude: List[int] line_include: List[int] results_by_id: DefaultDict diff --git a/tests/codemods/base_codemod_test.py b/tests/codemods/base_codemod_test.py index 38d48ed3d..f989060e1 100644 --- a/tests/codemods/base_codemod_test.py +++ b/tests/codemods/base_codemod_test.py @@ -3,6 +3,7 @@ from libcst.codemod import CodemodContext from pathlib import Path import os +from collections import defaultdict from codemodder.file_context import FileContext from codemodder.semgrep import run_on_directory as semgrep_run from codemodder.semgrep import find_all_yaml_files @@ -23,10 +24,9 @@ def run_and_assert_filepath(self, _, file_path, input_code, expected): input_tree = cst.parse_module(input_code) self.file_context = FileContext( file_path, - False, - [], [], [], + defaultdict(list), ) command_instance = self.codemod(CodemodContext(), self.file_context) output_tree = command_instance.transform_module(input_tree) @@ -49,7 +49,6 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected): results = all_results[str(file_path)] self.file_context = FileContext( file_path, - False, [], [], results, diff --git a/tests/shared.py b/tests/shared.py index ff1ef8e9a..269e553e8 100644 --- a/tests/shared.py +++ b/tests/shared.py @@ -1,11 +1,10 @@ """Module to store shared utilities for both unit and integration tests""" import pytest from codemodder import global_state -from codemodder.__main__ import RESULTS_BY_CODEMOD -from codemodder.codemods import ALL_CODEMODS from codemodder.dependency_manager import DependencyManager +# TODO: should not have any global state @pytest.fixture(autouse=True, scope="function") def reset_global_state(): """ @@ -16,6 +15,3 @@ def reset_global_state(): yield DependencyManager.clear_instance() global_state.set_directory("") - RESULTS_BY_CODEMOD.clear() - for codemod_kls in ALL_CODEMODS: - codemod_kls.CHANGESET_ALL_FILES = [] diff --git a/tests/test_file_context.py b/tests/test_file_context.py index 832a2567d..9f38bc59e 100644 --- a/tests/test_file_context.py +++ b/tests/test_file_context.py @@ -2,6 +2,6 @@ def test_file_context(): - file_context = FileContext(None, None, None, None, None) + file_context = FileContext(None, None, None, None) assert file_context.line_exclude == [] assert file_context.line_include == []