diff --git a/integration_tests/base_test.py b/integration_tests/base_test.py index 4a2ddd53d..21ed2e0fb 100644 --- a/integration_tests/base_test.py +++ b/integration_tests/base_test.py @@ -154,6 +154,7 @@ def test_file_rewritten(self): completed_process = subprocess.run( command, check=False, + shell=False, ) assert completed_process.returncode == 0 diff --git a/integration_tests/test_multiple_codemods.py b/integration_tests/test_multiple_codemods.py index edf308801..b85de67dc 100644 --- a/integration_tests/test_multiple_codemods.py +++ b/integration_tests/test_multiple_codemods.py @@ -27,12 +27,14 @@ def test_two_codemods(self, codemods, tmpdir): "--output", str(codetf_path), f"--codemod-include={codemods}", - f"--path-include={source_file_name}", + "--path-include", + f"**/{source_file_name}", ] completed_process = subprocess.run( command, check=False, + shell=False, ) assert completed_process.returncode == 0 diff --git a/src/codemodder/cli.py b/src/codemodder/cli.py index 825a8d281..efc89fbb3 100644 --- a/src/codemodder/cli.py +++ b/src/codemodder/cli.py @@ -2,6 +2,7 @@ import sys from codemodder import __VERSION__ +from codemodder.code_directory import DEFAULT_INCLUDED_PATHS, DEFAULT_EXCLUDED_PATHS from codemodder.logging import OutputFormat, logger @@ -144,13 +145,13 @@ def parse_args(argv, codemod_registry): parser.add_argument( "--path-exclude", action=CsvListAction, - default="", + default=DEFAULT_EXCLUDED_PATHS, help="Comma-separated set of UNIX glob patterns to exclude", ) parser.add_argument( "--path-include", action=CsvListAction, - default="**/*.py", + default=DEFAULT_INCLUDED_PATHS, help="Comma-separated set of UNIX glob patterns to include", ) diff --git a/src/codemodder/code_directory.py b/src/codemodder/code_directory.py index 6015b1c2d..7e1323407 100644 --- a/src/codemodder/code_directory.py +++ b/src/codemodder/code_directory.py @@ -1,35 +1,56 @@ -from pathlib import Path - - -DEFAULT_INCLUDE = "*.py" - - -def py_file_matches(file_path, patterns): - """ - file_path is suffixed with `.py` and matches at least one pattern. - """ - is_py_file = file_path.match(DEFAULT_INCLUDE) - if not patterns: - return is_py_file - return is_py_file and any( - file_path.match(pattern.rsplit(":")[0]) for pattern in patterns - ) +from typing import Optional, Sequence +import fnmatch +import itertools +from pathlib import Path -def file_line_patterns(file_path, patterns): +DEFAULT_INCLUDED_PATHS = ["**.py", "**/*.py"] +DEFAULT_EXCLUDED_PATHS = [ + "**/test/**", + "**/tests/**", + "**/conftest.py", + "**/build/**", + "**/dist/**", + "**/venv/**", + "**/.venv/**", + "**/.tox/**", + "**/.nox/**", + "**/.eggs/**", + "**/.git/**", + "**/.mypy_cache/**", + "**/.pytest_cache/**", + "**/.hypothesis/**", + "**/.coverage*", +] + + +def file_line_patterns(file_path: str | Path, patterns: Sequence[str]): """ Find the lines included or excluded for a given file_path among the patterns """ - lines = [] - for pattern in patterns or []: - split = pattern.rsplit(":") - if len(split) == 2: - if file_path.match(split[0]): - lines.append(int(split[1])) - return lines + return [ + int(result[1]) + for pat in patterns + if len(result := pat.split(":")) == 2 + and fnmatch.fnmatch(str(file_path), result[0]) + ] + + +def filter_files(names: Sequence[str], patterns: Sequence[str], exclude: bool = False): + patterns = ( + [x.split(":")[0] for x in (patterns or [])] + if not exclude + # An excluded line should not cause the entire file to be excluded + else [x for x in (patterns or []) if ":" not in x] + ) + return itertools.chain(*[fnmatch.filter(names, pattern) for pattern in patterns]) -def match_files(parent_path, exclude_paths=None, include_paths=None): +def match_files( + parent_path: str | Path, + exclude_paths: Optional[Sequence[str]] = None, + include_paths: Optional[Sequence[str]] = None, +): """ Find pattern-matching files starting at the parent_path, recursively. @@ -37,29 +58,29 @@ def match_files(parent_path, exclude_paths=None, include_paths=None): patterns are passed in, a file must match `*.py` and at least one include patterns. :param parent_path: str name for starting directory - :param exclude_paths: comma-separated set of UNIX glob patterns to exclude - :param include_paths: comma-separated set of UNIX glob patterns to exclude + :param exclude_paths: list of UNIX glob patterns to exclude + :param include_paths: list of UNIX glob patterns to exclude :return: list of files found within (including recursively) the parent directory that match the criteria of both exclude and include patterns. """ - exclude_paths = list(exclude_paths) if exclude_paths is not None else [] - include_paths = list(include_paths) if include_paths is not None else [] - - matched_files = [] - - for file_path in Path(parent_path).rglob("*"): - if file_path.is_file(): - # Exclude patterns take precedence over include patterns. - if exclude_file(file_path, exclude_paths): - continue - - if py_file_matches(file_path, include_paths): - matched_files.append(file_path) - - return matched_files - + all_files = [str(path) for path in Path(parent_path).rglob("*")] + included_files = set( + filter_files( + all_files, + include_paths if include_paths is not None else DEFAULT_INCLUDED_PATHS, + ) + ) + excluded_files = set( + filter_files( + all_files, + exclude_paths if exclude_paths is not None else DEFAULT_EXCLUDED_PATHS, + exclude=True, + ) + ) -def exclude_file(file_path, exclude_patterns): - # if a file matches any excluded pattern, return True so it is not included for analysis - return any(file_path.match(exclude) for exclude in exclude_patterns) + return [ + path + for p in sorted(list(included_files - excluded_files)) + if (path := Path(p)).is_file() and path.suffix == ".py" + ] diff --git a/tests/test_code_directory.py b/tests/test_code_directory.py index c0870f7b0..17c34b2da 100644 --- a/tests/test_code_directory.py +++ b/tests/test_code_directory.py @@ -7,8 +7,8 @@ @pytest.fixture(scope="module") def dir_structure(tmp_path_factory): - tests_dir = tmp_path_factory.mktemp("tests") - samples_dir = tests_dir / "samples" + base_dir = tmp_path_factory.mktemp("foo") + samples_dir = base_dir / "samples" samples_dir.mkdir() (samples_dir / "make_request.py").touch() (samples_dir / "insecure_random.py").touch() @@ -18,9 +18,14 @@ def dir_structure(tmp_path_factory): (more_samples_dir / "empty_for_testing.py").touch() (more_samples_dir / "empty_for_testing.txt").touch() - assert len(list(tests_dir.rglob("*"))) == 6 + tests_dir = base_dir / "tests" + tests_dir.mkdir() + (tests_dir / "test_make_request.py").touch() + (tests_dir / "test_insecure_random.py").touch() - return tests_dir + assert len(list(base_dir.rglob("*"))) == 9 + + return base_dir class TestMatchFiles: @@ -37,45 +42,49 @@ def test_all_py_files_match(self, dir_structure): def test_match_excluded(self, dir_structure): expected = ["empty_for_testing.py", "insecure_random.py"] - files = match_files(dir_structure, ["*request.py"]) + files = match_files(dir_structure, ["**/tests/**", "*request.py"]) self._assert_expected(files, expected) def test_match_included_file_with_line(self, dir_structure): expected = ["insecure_random.py"] - files = match_files(dir_structure, include_paths=["insecure_random.py:2"]) + files = match_files(dir_structure, include_paths=["**/insecure_random.py:2"]) self._assert_expected(files, expected) def test_match_excluded_line(self, dir_structure): expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"] - files = match_files(dir_structure, exclude_paths=["insecure_random.py:2"]) + files = match_files( + dir_structure, exclude_paths=["**/tests/**", "**/insecure_random.py:2"] + ) self._assert_expected(files, expected) def test_match_included_line_and_glob(self, dir_structure): expected = ["insecure_random.py"] - files = match_files(dir_structure, include_paths=["insecure*.py:3"]) + files = match_files(dir_structure, include_paths=["**/insecure*.py:3"]) self._assert_expected(files, expected) def test_match_excluded_line_and_glob(self, dir_structure): expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"] - files = match_files(dir_structure, exclude_paths=["insecure*.py:3"]) + files = match_files( + dir_structure, exclude_paths=["**/tests/**", "**/insecure*.py:3"] + ) self._assert_expected(files, expected) def test_match_excluded_dir_incorrect_glob(self, dir_structure): incorrect_glob = "more_samples" expected = ["empty_for_testing.py", "insecure_random.py", "make_request.py"] - files = match_files(dir_structure, [incorrect_glob]) + files = match_files(dir_structure, ["**/tests/**", incorrect_glob]) self._assert_expected(files, expected) def test_match_excluded_dir_correct_glob(self, dir_structure): correct_globs = ["**/more_samples/**", "*/more_samples/*"] for correct_glob in correct_globs: expected = ["insecure_random.py", "make_request.py"] - files = match_files(dir_structure, [correct_glob]) + files = match_files(dir_structure, ["**/tests/**", correct_glob]) self._assert_expected(files, expected) def test_match_excluded_multiple(self, dir_structure): expected = ["insecure_random.py"] - files = match_files(dir_structure, ["*request.py", "*empty*"]) + files = match_files(dir_structure, ["**/tests/**", "*request.py", "*empty*"]) self._assert_expected(files, expected) def test_match_included(self, dir_structure): @@ -87,11 +96,46 @@ def test_match_excluded_precedence_over_included(self, dir_structure): expected = ["empty_for_testing.py", "insecure_random.py"] files = match_files( dir_structure, - exclude_paths=["*request.py"], + exclude_paths=["**/tests/**", "*request.py"], include_paths=["*request.py", "*empty*.py", "*random.py"], ) self._assert_expected(files, expected) + def test_test_directory_not_excluded(self, dir_structure): + expected = ["test_insecure_random.py", "test_make_request.py"] + files = match_files( + dir_structure, exclude_paths=["**/samples/**", "**/more_samples/**"] + ) + self._assert_expected(files, expected) + + def test_include_test_overridden_by_default_excludes(self, mocker): + mocker.patch( + "codemodder.code_directory.Path.rglob", + return_value=[ + "foo/tests/test_insecure_random.py", + "foo/tests/test_make_request.py", + ], + ) + mocker.patch( + "codemodder.code_directory.Path.is_file", + return_value=True, + ) + files = match_files(Path("."), include_paths=["**/tests/**"]) + self._assert_expected(files, []) + + def test_include_test_without_default_includes(self, mocker): + files = ["foo/tests/test_insecure_random.py", "foo/tests/test_make_request.py"] + mocker.patch( + "codemodder.code_directory.Path.rglob", + return_value=files, + ) + mocker.patch( + "codemodder.code_directory.Path.is_file", + return_value=True, + ) + result = match_files(Path("."), exclude_paths=[]) + assert result == [Path(x) for x in files] + def test_extract_line_from_pattern(self): lines = file_line_patterns(Path("insecure_random.py"), ["insecure_*.py:3"]) assert lines == [3]