From 6ee8779ceb6a1e6b8506adf1441420dac7d03ae1 Mon Sep 17 00:00:00 2001 From: Daniel D'Avella Date: Thu, 26 Oct 2023 16:34:45 -0400 Subject: [PATCH] Collect fine-grained timing data --- src/codemodder/codemodder.py | 18 ++++++++++---- src/codemodder/codemods/base_codemod.py | 3 ++- src/codemodder/context.py | 4 ++++ src/codemodder/file_context.py | 2 ++ src/codemodder/utils/timer.py | 31 +++++++++++++++++++++++++ 5 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 src/codemodder/utils/timer.py diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 5a352ac1..5e52c2a2 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -41,7 +41,9 @@ def apply_codemod_to_file( if not codemod.should_transform: return False - output_tree = codemod.transform_module(source_tree) + with file_context.timer.measure("transform"): + output_tree = codemod.transform_module(source_tree) + # TODO: we can probably just use the presence of recorded changes instead of # comparing the trees to gain some efficiency if output_tree.deep_equals(source_tree): @@ -61,7 +63,8 @@ def apply_codemod_to_file( file_context.add_result(change_set) if not dry_run: - update_code(file_context.file_path, output_tree.code) + with file_context.timer.measure("write"): + update_code(file_context.file_path, output_tree.code) return True @@ -91,8 +94,9 @@ def process_file( ) try: - with open(file_path, "r", encoding="utf-8") as f: - source_tree = cst.parse_module(f.read()) + with file_context.timer.measure("parse"): + with open(file_path, "r", encoding="utf-8") as f: + source_tree = cst.parse_module(f.read()) except Exception: file_context.add_failure(file_path) logger.exception("error parsing file %s", file_path) @@ -234,7 +238,11 @@ def run(original_args) -> int: len(set(all_changes)), ) logger.info("report file: %s", argv.output) - logger.info("elapsed: %s ms", elapsed_ms) + logger.info("total elapsed: %s ms", elapsed_ms) + logger.info("semgrep: %s ms", context.timer.get_time_ms("semgrep")) + logger.info("parse: %s ms", context.timer.get_time_ms("parse")) + logger.info("transform: %s ms", context.timer.get_time_ms("transform")) + logger.info("write: %s ms", context.timer.get_time_ms("write")) return 0 diff --git a/src/codemodder/codemods/base_codemod.py b/src/codemodder/codemods/base_codemod.py index f1237031..127d9a9b 100644 --- a/src/codemodder/codemods/base_codemod.py +++ b/src/codemodder/codemods/base_codemod.py @@ -122,7 +122,8 @@ def apply_rule( Apply semgrep to gather rule results """ del args, kwargs - return semgrep_run(context, yaml_files) + with context.timer.measure("semgrep"): + return semgrep_run(context, yaml_files) @property def should_transform(self): diff --git a/src/codemodder/context.py b/src/codemodder/context.py index 44984fd7..719f7e79 100644 --- a/src/codemodder/context.py +++ b/src/codemodder/context.py @@ -12,6 +12,7 @@ from codemodder.logging import logger, log_list from codemodder.registry import CodemodRegistry from codemodder.project_analysis.python_repo_manager import PythonRepoManager +from codemodder.utils.timer import Timer DEPENDENCY_NOTIFICATION = """``` @@ -36,6 +37,7 @@ class CodemodExecutionContext: # pylint: disable=too-many-instance-attributes verbose: bool = False registry: CodemodRegistry repo_manager: PythonRepoManager + timer: Timer def __init__( self, @@ -53,6 +55,7 @@ def __init__( self.dependencies = {} self.registry = registry self.repo_manager = repo_manager + self.timer = Timer() def add_results(self, codemod_name: str, change_sets: List[ChangeSet]): self._results_by_codemod.setdefault(codemod_name, []).extend(change_sets) @@ -112,6 +115,7 @@ def process_results(self, codemod_id: str, results: Iterator[FileContext]): self.add_results(codemod_id, file_context.results) self.add_failures(codemod_id, file_context.failures) self.add_dependencies(codemod_id, file_context.dependencies) + self.timer.aggregate(file_context.timer) def compile_results(self, codemods: list[CodemodExecutorWrapper]): results = [] diff --git a/src/codemodder/file_context.py b/src/codemodder/file_context.py index cdc9290b..14de36b8 100644 --- a/src/codemodder/file_context.py +++ b/src/codemodder/file_context.py @@ -4,6 +4,7 @@ from codemodder.change import Change, ChangeSet from codemodder.dependency import Dependency +from codemodder.utils.timer import Timer @dataclass @@ -21,6 +22,7 @@ class FileContext: # pylint: disable=too-many-instance-attributes codemod_changes: List[Change] = field(default_factory=list) results: List[ChangeSet] = field(default_factory=list) failures: List[Path] = field(default_factory=list) + timer: Timer = field(default_factory=Timer) def add_dependency(self, dependency: Dependency): self.dependencies.add(dependency) diff --git a/src/codemodder/utils/timer.py b/src/codemodder/utils/timer.py new file mode 100644 index 00000000..ad7b33c8 --- /dev/null +++ b/src/codemodder/utils/timer.py @@ -0,0 +1,31 @@ +from collections import defaultdict +from contextlib import contextmanager +import time + +from typing_extensions import Self + + +class Timer: + _times: defaultdict + + def __init__(self): + self._times = defaultdict(float) + + @contextmanager + def measure(self, name: str): + start = time.monotonic() + try: + yield + finally: + end = time.monotonic() + self._add_time(name, end - start) + + def _add_time(self, name: str, val: float) -> None: + self._times[name] = self._times.get(name, 0) + val + + def get_time_ms(self, name: str) -> int: + return int(self._times.get(name, 0) * 1000) + + def aggregate(self, other: Self) -> None: + for key, val in other._times.items(): + self._add_time(key, val)