Skip to content

Commit

Permalink
Create changeset for updated dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
drdavella committed Oct 23, 2023
1 parent 5e88eb8 commit 09b0795
Show file tree
Hide file tree
Showing 18 changed files with 212 additions and 104 deletions.
5 changes: 2 additions & 3 deletions integration_tests/test_process_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ class TestProcessSandbox(BaseIntegrationTest):
expected_diff = '--- \n+++ \n@@ -1,10 +1,11 @@\n import subprocess\n+from security import safe_command\n \n-subprocess.run("echo \'hi\'", shell=True)\n-subprocess.run(["ls", "-l"])\n+safe_command.run(subprocess.run, "echo \'hi\'", shell=True)\n+safe_command.run(subprocess.run, ["ls", "-l"])\n \n-subprocess.call("echo \'hi\'", shell=True)\n-subprocess.call(["ls", "-l"])\n+safe_command.call(subprocess.call, "echo \'hi\'", shell=True)\n+safe_command.call(subprocess.call, ["ls", "-l"])\n \n subprocess.check_output(["ls", "-l"])\n \n'
expected_line_change = "3"
num_changes = 4
num_changed_files = 2
change_description = ProcessSandbox.CHANGE_DESCRIPTION

requirements_path = "tests/samples/requirements.txt"
original_requirements = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
expected_new_reqs = (
"requests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\nsecurity==1.0.1"
)
expected_new_reqs = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\nsecurity==1.0.1"
6 changes: 2 additions & 4 deletions integration_tests/test_url_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ class TestUrlSandbox(BaseIntegrationTest):
expected_diff = '--- \n+++ \n@@ -1,4 +1,4 @@\n-import requests\n+from security import safe_requests\n \n-requests.get("https://www.google.com")\n+safe_requests.get("https://www.google.com")\n var = "hello"\n'
expected_line_change = "3"
change_description = UrlSandbox.CHANGE_DESCRIPTION
num_changed_files = 1
num_changed_files = 2

requirements_path = "tests/samples/requirements.txt"
original_requirements = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
expected_new_reqs = (
"requests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\nsecurity==1.0.1"
)
expected_new_reqs = "# file used to test dependency management\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\nsecurity==1.0.1"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ requires-python = ">=3.10.0"
readme = "README.md"
license = {file = "LICENSE"}
dependencies = [
"dependency-manager @ git+https://github.com/pixee/python-dependency-manager#egg=dependency-manager",
"isort~=5.12.0",
"libcst~=1.1.0",
"packaging~=23.0.0",
"pylint~=3.0.0",
"python-json-logger~=2.0.0",
"PyYAML~=6.0.0",
Expand Down
12 changes: 12 additions & 0 deletions src/codemodder/change.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,15 @@ def to_json(self):
"properties": self.properties,
"packageActions": self.packageActions,
}


@dataclass
class ChangeSet:
"""A set of changes made to a file at `path`"""

path: str
diff: str
changes: list[Change]

def to_json(self):
return {"path": self.path, "diff": self.diff, "changes": self.changes}
18 changes: 7 additions & 11 deletions src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import sys
from pathlib import Path
from textwrap import indent

import libcst as cst
from libcst.codemod import CodemodContext
Expand All @@ -13,9 +12,9 @@
from codemodder import registry, __VERSION__
from codemodder.logging import configure_logger, logger, log_section, log_list
from codemodder.cli import parse_args
from codemodder.change import ChangeSet
from codemodder.code_directory import file_line_patterns, match_files
from codemodder.context import CodemodExecutionContext, ChangeSet
from codemodder.dependency_manager import write_dependencies
from codemodder.context import CodemodExecutionContext
from codemodder.executor import CodemodExecutorWrapper
from codemodder.report.codetf_reporter import report_default

Expand Down Expand Up @@ -76,6 +75,7 @@ def analyze_files(
sarif,
cli_args,
):
# TODO: parallelize this loop
for idx, file_path in enumerate(files_to_analyze):
logger.debug("scanning file %s", file_path)
if idx and idx % 100 == 0:
Expand All @@ -93,6 +93,7 @@ def analyze_files(
line_include = file_line_patterns(file_path, cli_args.path_include)
sarif_for_file = sarif.get(str(file_path)) or {}

# NOTE: file context will become more important if/when we parallelize this loop
file_context = FileContext(
file_path,
line_exclude,
Expand All @@ -107,13 +108,7 @@ def analyze_files(
source_tree,
)

if failures := execution_context.get_failures(codemod.id):
log_list(logging.INFO, "failed", failures)
if changes := execution_context.get_results(codemod.id):
logger.info("changed:")
for change in changes:
logger.info(" - %s", change.path)
logger.debug(" diff:\n%s", indent(change.diff, " " * 6))
execution_context.add_dependencies(codemod.id, file_context.dependencies)


def run(original_args) -> int:
Expand Down Expand Up @@ -179,10 +174,11 @@ def run(original_args) -> int:
results,
argv,
)
context.process_dependencies(codemod.id)
context.log_changes(codemod.id)

results = context.compile_results(codemods_to_run)

write_dependencies(context)
elapsed = datetime.datetime.now() - start
elapsed_ms = int(elapsed.total_seconds() * 1000)
report_default(elapsed_ms, argv, original_args, results)
Expand Down
3 changes: 3 additions & 0 deletions src/codemodder/codemods/base_codemod.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ def line_exclude(self):
def line_include(self):
return self.file_context.line_include

def add_dependency(self, dependency: str):
self.file_context.add_dependency(dependency)


class SemgrepCodemod(BaseCodemod):
YAML_FILES: ClassVar[List[str]] = NotImplemented
Expand Down
53 changes: 35 additions & 18 deletions src/codemodder/context.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,19 @@
import logging
from pathlib import Path
from dataclasses import dataclass
import itertools
from textwrap import indent

from codemodder.change import Change
from codemodder.change import ChangeSet
from codemodder.dependency_manager import DependencyManager
from codemodder.executor import CodemodExecutorWrapper
from codemodder.logging import logger, log_list
from codemodder.registry import CodemodRegistry


@dataclass
class ChangeSet:
"""A set of changes made to a file at `path`"""

path: str
diff: str
changes: list[Change]

def to_json(self):
return {"path": self.path, "diff": self.diff, "changes": self.changes}


class CodemodExecutionContext: # pylint: disable=too-many-instance-attributes
_results_by_codemod: dict[str, list[ChangeSet]] = {}
_failures_by_codemod: dict[str, list[Path]] = {}
dependencies: set[str]
dependencies: dict[str, set[str]] = {}
directory: Path
dry_run: bool = False
verbose: bool = False
Expand All @@ -38,9 +29,9 @@ def __init__(
self.directory = directory
self.dry_run = dry_run
self.verbose = verbose
self.dependencies = set()
self._results_by_codemod = {}
self._failures_by_codemod = {}
self.dependencies = {}
self.registry = registry

def add_result(self, codemod_name, change_set):
Expand All @@ -49,6 +40,9 @@ def add_result(self, codemod_name, change_set):
def add_failure(self, codemod_name, file_path):
self._failures_by_codemod.setdefault(codemod_name, []).append(file_path)

def add_dependencies(self, codemod_id: str, dependencies: set[str]):
self.dependencies.setdefault(codemod_id, set()).update(dependencies)

def get_results(self, codemod_name):
return self._results_by_codemod.get(codemod_name, [])

Expand All @@ -69,8 +63,22 @@ def get_failed_files(self):
)
)

def add_dependency(self, dependency: str):
self.dependencies.add(dependency)
def process_dependencies(self, codemod_id: str):
dependencies = self.dependencies.get(codemod_id)
if not dependencies:
return

dm = DependencyManager(self.directory)
if not dm.found_dependency_file:
logger.info(
"unable to write dependencies for %s: no dependency file found",
codemod_id,
)
return

dm.add(list(dependencies))
if (changeset := dm.write(self.dry_run)) is not None:
self.add_result(codemod_id, changeset)

def compile_results(self, codemods: list[CodemodExecutorWrapper]):
results = []
Expand All @@ -90,3 +98,12 @@ def compile_results(self, codemods: list[CodemodExecutorWrapper]):
results.append(data)

return results

def log_changes(self, codemod_id: str):
if failures := self.get_failures(codemod_id):
log_list(logging.INFO, "failed", failures)
if changes := self.get_results(codemod_id):
logger.info("changed:")
for change in changes:
logger.info(" - %s", change.path)
logger.debug(" diff:\n%s", indent(change.diff, " " * 6))
93 changes: 75 additions & 18 deletions src/codemodder/dependency_manager.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,82 @@
import sys
import io

from dependency_manager import DependencyManagerAbstract
from functools import cached_property
from pathlib import Path
from typing import Optional

import difflib
from packaging.requirements import Requirement

from codemodder.change import ChangeSet


class DependencyManager:
parent_directory: Path
_lines: list[str]
_new_requirements: list[str]

def __init__(self, parent_directory: Path):
"""One-time class initialization."""
self.parent_directory = parent_directory
self.dependency_file_changed = False
self._lines = []
self._new_requirements = []

def add(self, dependencies: list[str]):
"""add any number of dependencies to the end of list of dependencies."""
for dep_str in dependencies:
dep = Requirement(dep_str)
if dep not in self.dependencies:
self.dependencies.update({dep: None})
self._new_requirements.append(str(dep))

def write(self, dry_run: bool = False) -> Optional[ChangeSet]:
"""
Write the updated dependency files if any changes were made.
"""
if not (self.dependency_file and self._new_requirements):
return None

updated = self._lines + self._new_requirements + ["\n"]

diff = "".join(difflib.unified_diff(self._lines, updated))
# TODO: add a change entry for each new requirement
# TODO: make sure to set the contextual_description=True in the properties bag

if not dry_run:
with open(self.dependency_file, "w", encoding="utf-8") as f:
f.writelines(self._lines)
f.writelines(self._new_requirements)

from codemodder.context import CodemodExecutionContext
self.dependency_file_changed = True
return ChangeSet(str(self.dependency_file), diff, changes=[])

@property
def found_dependency_file(self) -> bool:
return self.dependency_file is not None

def write_dependencies(execution_context: CodemodExecutionContext):
class DependencyManager(DependencyManagerAbstract):
def get_parent_dir(self):
return Path(execution_context.directory)
@cached_property
def dependency_file(self) -> Optional[Path]:
try:
# For now for simplicity only return the first file
return next(Path(self.parent_directory).rglob("requirements.txt"))
except StopIteration:
pass
return None

dm = DependencyManager()
dm.add(list(execution_context.dependencies))
@cached_property
def dependencies(self) -> dict[Requirement, None]:
"""
Extract list of dependencies from requirements.txt file.
Same order of requirements is maintained, no alphabetical sorting is done.
"""
if not self.dependency_file:
return {}

try:
# Hacky solution to prevent the dependency manager from writing to stdout
sys.stdout = io.StringIO()
dm.write(dry_run=execution_context.dry_run)
finally:
sys.stdout = sys.__stdout__
with open(self.dependency_file, "r", encoding="utf-8") as f:
self._lines = f.readlines()

return dm
return {
Requirement(line): None
for x in self._lines
# Skip empty lines and comments
if (line := x.strip()) and not line.startswith("#")
}
6 changes: 5 additions & 1 deletion src/codemodder/file_context.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List

Expand All @@ -13,10 +13,14 @@ class FileContext:
line_exclude: List[int]
line_include: List[int]
results_by_id: Dict
dependencies: set[str] = field(default_factory=set)

def __post_init__(self):
if self.line_include is None:
self.line_include = []
if self.line_exclude is None:
self.line_exclude = []
self.codemod_changes = []

def add_dependency(self, dependency: str):
self.dependencies.add(dependency)
2 changes: 1 addition & 1 deletion src/core_codemods/process_creation_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def rule(cls):

def on_result_found(self, original_node, updated_node):
self.add_needed_import("security", "safe_command")
self.execution_context.add_dependency("security==1.0.1")
self.add_dependency("security==1.0.1")
return self.update_call_target(
updated_node,
"safe_command",
Expand Down
2 changes: 1 addition & 1 deletion src/core_codemods/url_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def transform_module_impl(self, tree: cst.Module) -> cst.Module:
find_requests_visitor.changes_in_file
)
new_tree = tree.visit(ReplaceNodes(find_requests_visitor.nodes_to_change))
self.execution_context.add_dependency("security==1.0.1")
self.add_dependency("security==1.0.1")
# if it finds any request.get(...), try to remove the imports
if any(
(
Expand Down
2 changes: 1 addition & 1 deletion src/core_codemods/use_defused_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,6 @@ def transform_module_impl(self, tree: cst.Module) -> cst.Module:
result_tree = visitor.transform_module(tree)
self.file_context.codemod_changes.extend(visitor.changes_in_file)
if visitor.changes_in_file:
self.execution_context.add_dependency("defusedxml")
self.add_dependency("defusedxml") # TODO: which version?

return result_tree
3 changes: 3 additions & 0 deletions tests/codemods/base_codemod_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected):

assert output_tree.code == dedent(expected)

def assert_dependency(self, dependency: str):
assert self.file_context and self.file_context.dependencies == set([dependency])


class BaseSemgrepCodemodTest(BaseCodemodTest):
@classmethod
Expand Down
Loading

0 comments on commit 09b0795

Please sign in to comment.