Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial Python Repo Manager #97

Merged
merged 8 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ repos:
[
"types-mock==5.0.*",
"types-PyYAML==6.0",
"types-toml~=0.10",
]
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies = [
"python-json-logger~=2.0.0",
"PyYAML~=6.0.0",
"semgrep~=1.46.0",
"toml~=0.10.2",
"wrapt~=1.15.0",
]

Expand Down
5 changes: 5 additions & 0 deletions src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from codemodder.code_directory import file_line_patterns, match_files
from codemodder.context import CodemodExecutionContext
from codemodder.executor import CodemodExecutorWrapper
from codemodder.project_analysis.python_repo_manager import PythonRepoManager
from codemodder.report.codetf_reporter import report_default


Expand Down Expand Up @@ -130,12 +131,16 @@ def run(original_args) -> int:
log_section("startup")
logger.info("codemodder: python/%s", __VERSION__)

repo_manager = PythonRepoManager(Path(argv.directory))
context = CodemodExecutionContext(
Path(argv.directory),
argv.dry_run,
argv.verbose,
codemod_registry,
repo_manager,
)
# todo: enable when ready
# repo_manager.package_stores

# TODO: this should be a method of CodemodExecutionContext
codemods_to_run = codemod_registry.match_codemods(
Expand Down
6 changes: 5 additions & 1 deletion src/codemodder/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from codemodder.executor import CodemodExecutorWrapper
from codemodder.logging import logger, log_list
from codemodder.registry import CodemodRegistry
from codemodder.project_analysis.python_repo_manager import PythonRepoManager


DEPENDENCY_NOTIFICATION = """```
Expand All @@ -32,21 +33,24 @@ class CodemodExecutionContext: # pylint: disable=too-many-instance-attributes
dry_run: bool = False
verbose: bool = False
registry: CodemodRegistry
repo_manager: PythonRepoManager

def __init__(
self,
directory: Path,
dry_run: bool,
verbose: bool,
registry: CodemodRegistry,
):
repo_manager: PythonRepoManager,
): # pylint: disable=too-many-arguments
self.directory = directory
self.dry_run = dry_run
self.verbose = verbose
self._results_by_codemod = {}
self._failures_by_codemod = {}
self.dependencies = {}
self.registry = registry
self.repo_manager = repo_manager
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very minor thing but we should add the type declaration at the class definition above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you referring to something else that's not already line 43, repo_manager: PythonRepoManager,?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it should be added on line 35 above too.


def add_result(self, codemod_name, change_set):
self._results_by_codemod.setdefault(codemod_name, []).append(change_set)
Expand Down
1 change: 0 additions & 1 deletion src/codemodder/dependency_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ class DependencyManager:
_new_requirements: list[Dependency]

def __init__(self, parent_directory: Path):
"""One-time class initialization."""
self.parent_directory = parent_directory
self.dependency_file_changed = False
self._lines = []
Expand Down
Empty file.
4 changes: 4 additions & 0 deletions src/codemodder/project_analysis/file_parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .requirements_txt_file_parser import RequirementsTxtParser
from .pyproject_toml_file_parser import PyprojectTomlParser
from .setup_cfg_file_parser import SetupCfgParser
from .setup_py_file_parser import SetupPyParser
42 changes: 42 additions & 0 deletions src/codemodder/project_analysis/file_parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from abc import ABC, abstractmethod

from pathlib import Path
from typing import List
from .package_store import PackageStore
from packaging.requirements import Requirement


class BaseParser(ABC):
def __init__(self, parent_directory: Path):
self.parent_directory = parent_directory

@property
@abstractmethod
def file_name(self):
... # pragma: no cover
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a tiny thing but I think you can have the ... on the same line as the def which probably makes the # pragma unnecessary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh I hope so!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

black doesn't like inline ... so I left it as is


def _parse_dependencies(self, dependencies: List[str]):
return [
Requirement(line)
for x in dependencies
# Skip empty lines and comments
if (line := x.strip()) and not line.startswith("#")
]

@abstractmethod
def _parse_file(self, file: Path):
... # pragma: no cover

def find_file_locations(self) -> List[Path]:
return list(Path(self.parent_directory).rglob(self.file_name))

def parse(self) -> list[PackageStore]:
"""
Find 0 or more project config or dependency files within a project repo.
"""
stores = []
req_files = self.find_file_locations()
for file in req_files:
store = self._parse_file(file)
stores.append(store)
return stores
10 changes: 10 additions & 0 deletions src/codemodder/project_analysis/file_parsers/package_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from dataclasses import dataclass
from packaging.requirements import Requirement


@dataclass
class PackageStore:
type: str
file: str
dependencies: list[Requirement]
py_versions: list[str]
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from pathlib import Path
import toml

from .base_parser import BaseParser


class PyprojectTomlParser(BaseParser):
@property
def file_name(self):
return "pyproject.toml"

def _parse_dependencies_from_toml(self, toml_data: dict):
# todo: handle cases for
# 1. no dependencies
return self._parse_dependencies(toml_data["project"]["dependencies"])

def _parse_py_versions(self, toml_data: dict):
# todo: handle cases for
# 1. no requires-python
# 2. multiple requires-python such as "">3.5.2"", ">=3.11.1,<3.11.2"
return [toml_data["project"]["requires-python"]]

def _parse_file(self, file: Path):
data = toml.load(file)
# todo: handle no "project" in data

return PackageStore(
type=self.file_name,
file=str(file),
dependencies=self._parse_dependencies_from_toml(data),
py_versions=self._parse_py_versions(data),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from pathlib import Path
from .base_parser import BaseParser


class RequirementsTxtParser(BaseParser):
@property
def file_name(self):
return "requirements.txt"

def _parse_file(self, file: Path):
with open(file, "r", encoding="utf-8") as f:
lines = f.readlines()

return PackageStore(
type=self.file_name,
file=str(file),
dependencies=self._parse_dependencies(lines),
# requirements.txt files do not declare py versions explicitly
# though we could create a heuristic by analyzing each dependency
# and extracting py versions from them.
py_versions=[],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from pathlib import Path
import configparser

from .base_parser import BaseParser


class SetupCfgParser(BaseParser):
@property
def file_name(self):
return "setup.cfg"

def _parse_dependencies_from_cfg(self, config: configparser.ConfigParser):
# todo: handle cases for
# 1. no dependencies, no options dict
# setup_requires, tests_require, extras_require
dependency_lines = config["options"]["install_requires"].split("\n")
return self._parse_dependencies(dependency_lines)

def _parse_py_versions(self, config: configparser.ConfigParser):
# todo: handle cases for
# 1. no options/ no requires-python
# 2. various requires-python such as "">3.5.2"", ">=3.11.1,<3.11.2"
return [config["options"]["python_requires"]]

def _parse_file(self, file: Path):
config = configparser.ConfigParser()
config.read(file)

# todo: handle no config, no "options" in config

return PackageStore(
type=self.file_name,
file=str(file),
dependencies=self._parse_dependencies_from_cfg(config),
py_versions=self._parse_py_versions(config),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from pathlib import Path
import libcst as cst
from libcst import matchers
from packaging.requirements import Requirement

from .base_parser import BaseParser
from .utils import clean_simplestring


class SetupPyParser(BaseParser):
@property
def file_name(self):
return "setup.py"

def _parse_dependencies(self, dependencies):
return [
Requirement(line)
for x in dependencies
# Skip empty lines and comments
if (line := clean_simplestring(x.value)) and not line.startswith("#")
]

def _parse_dependencies_from_cst(self, cst_dependencies):
# todo: handle cases for
# 1. no dependencies,
return self._parse_dependencies(cst_dependencies)

def _parse_py_versions(self, version_str):
# todo: handle for multiple versions
return [clean_simplestring(version_str)]

def _parse_file(self, file: Path):
visitor = SetupCallVisitor()
with open(str(file), "r", encoding="utf-8") as f:
# todo: handle failure in parsing
module = cst.parse_module(f.read())
module.visit(visitor)

# todo: handle no python_requires, install_requires

return PackageStore(
type=self.file_name,
file=str(file),
dependencies=self._parse_dependencies_from_cst(visitor.install_requires),
py_versions=self._parse_py_versions(visitor.python_requires),
)


class SetupCallVisitor(cst.CSTVisitor):
def __init__(self):
self.python_requires = None
self.install_requires = None
# todo setup_requires, tests_require, extras_require

def visit_Call(self, node: cst.Call) -> None:
# todo: only handle setup from setuptools, not others tho unlikely
if matchers.matches(node.func, cst.Name(value="setup")):
visitor = SetupArgVisitor()
node.visit(visitor)
self.python_requires = visitor.python_requires
self.install_requires = visitor.install_requires


class SetupArgVisitor(cst.CSTVisitor):
def __init__(self):
self.python_requires = None
self.install_requires = None

def visit_Arg(self, node: cst.Arg) -> None:
if matchers.matches(node.keyword, cst.Name(value="python_requires")):
# todo: this works for `python_requires=">=3.7",` but what about
# a list of versions?
self.python_requires = node.value.value
if matchers.matches(node.keyword, cst.Name(value="install_requires")):
# todo: could it be something other than a list?
self.install_requires = node.value.elements
7 changes: 7 additions & 0 deletions src/codemodder/project_analysis/file_parsers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import libcst as cst


def clean_simplestring(node: cst.SimpleString | str) -> str:
if isinstance(node, str):
return node.strip('"')
return node.raw_value
30 changes: 30 additions & 0 deletions src/codemodder/project_analysis/python_repo_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from functools import cached_property
from pathlib import Path
from codemodder.project_analysis.file_parsers import (
RequirementsTxtParser,
PyprojectTomlParser,
SetupCfgParser,
SetupPyParser,
)
from codemodder.project_analysis.file_parsers.package_store import PackageStore


class PythonRepoManager:
def __init__(self, parent_directory: Path):
self.parent_directory = parent_directory
self._potential_stores = [
RequirementsTxtParser,
PyprojectTomlParser,
SetupCfgParser,
SetupPyParser,
]

@cached_property
def package_stores(self) -> list[PackageStore]:
return self._parse_all_stores()

def _parse_all_stores(self) -> list[PackageStore]:
discovered_pkg_stores: list[PackageStore] = []
for store in self._potential_stores:
discovered_pkg_stores.extend(store(self.parent_directory).parse())
return discovered_pkg_stores
2 changes: 2 additions & 0 deletions tests/codemods/base_codemod_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected):
dry_run=True,
verbose=False,
registry=mock.MagicMock(),
repo_manager=mock.MagicMock(),
)
self.file_context = FileContext(
file_path,
Expand Down Expand Up @@ -80,6 +81,7 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected):
dry_run=True,
verbose=False,
registry=mock.MagicMock(),
repo_manager=mock.MagicMock(),
)
input_tree = cst.parse_module(input_code)
all_results = self.results_by_id_filepath(input_code, file_path)
Expand Down
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,12 @@ def disable_write_dependencies():
dm_write.start()
yield
dm_write.stop()


@pytest.fixture(scope="module")
def pkg_with_reqs_txt(tmp_path_factory):
base_dir = tmp_path_factory.mktemp("foo")
req_file = base_dir / "requirements.txt"
reqs = "# comment\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
req_file.write_text(reqs)
return base_dir
Empty file.
Empty file.
Loading