Skip to content

Commit

Permalink
Initial Python Repo Manager (#97)
Browse files Browse the repository at this point in the history
* add repo manager w reqs txt file parser

* add pyproject.toml parser

* add parser for setup.cfg file

* add setup.py parser

* testing

* use abstract

* ignore cov in base class

* review feedback
  • Loading branch information
clavedeluna authored Oct 25, 2023
1 parent e09942c commit 4663b59
Show file tree
Hide file tree
Showing 24 changed files with 465 additions and 2 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ repos:
[
"types-mock==5.0.*",
"types-PyYAML==6.0",
"types-toml~=0.10",
]
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies = [
"python-json-logger~=2.0.0",
"PyYAML~=6.0.0",
"semgrep~=1.46.0",
"toml~=0.10.2",
"wrapt~=1.15.0",
]

Expand Down
5 changes: 5 additions & 0 deletions src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from codemodder.code_directory import file_line_patterns, match_files
from codemodder.context import CodemodExecutionContext
from codemodder.executor import CodemodExecutorWrapper
from codemodder.project_analysis.python_repo_manager import PythonRepoManager
from codemodder.report.codetf_reporter import report_default


Expand Down Expand Up @@ -130,12 +131,16 @@ def run(original_args) -> int:
log_section("startup")
logger.info("codemodder: python/%s", __VERSION__)

repo_manager = PythonRepoManager(Path(argv.directory))
context = CodemodExecutionContext(
Path(argv.directory),
argv.dry_run,
argv.verbose,
codemod_registry,
repo_manager,
)
# todo: enable when ready
# repo_manager.package_stores

# TODO: this should be a method of CodemodExecutionContext
codemods_to_run = codemod_registry.match_codemods(
Expand Down
6 changes: 5 additions & 1 deletion src/codemodder/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from codemodder.executor import CodemodExecutorWrapper
from codemodder.logging import logger, log_list
from codemodder.registry import CodemodRegistry
from codemodder.project_analysis.python_repo_manager import PythonRepoManager


DEPENDENCY_NOTIFICATION = """```
Expand All @@ -32,21 +33,24 @@ class CodemodExecutionContext: # pylint: disable=too-many-instance-attributes
dry_run: bool = False
verbose: bool = False
registry: CodemodRegistry
repo_manager: PythonRepoManager

def __init__(
self,
directory: Path,
dry_run: bool,
verbose: bool,
registry: CodemodRegistry,
):
repo_manager: PythonRepoManager,
): # pylint: disable=too-many-arguments
self.directory = directory
self.dry_run = dry_run
self.verbose = verbose
self._results_by_codemod = {}
self._failures_by_codemod = {}
self.dependencies = {}
self.registry = registry
self.repo_manager = repo_manager

def add_result(self, codemod_name, change_set):
self._results_by_codemod.setdefault(codemod_name, []).append(change_set)
Expand Down
1 change: 0 additions & 1 deletion src/codemodder/dependency_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ class DependencyManager:
_new_requirements: list[Dependency]

def __init__(self, parent_directory: Path):
"""One-time class initialization."""
self.parent_directory = parent_directory
self.dependency_file_changed = False
self._lines = []
Expand Down
Empty file.
4 changes: 4 additions & 0 deletions src/codemodder/project_analysis/file_parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .requirements_txt_file_parser import RequirementsTxtParser
from .pyproject_toml_file_parser import PyprojectTomlParser
from .setup_cfg_file_parser import SetupCfgParser
from .setup_py_file_parser import SetupPyParser
42 changes: 42 additions & 0 deletions src/codemodder/project_analysis/file_parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from abc import ABC, abstractmethod

from pathlib import Path
from typing import List
from .package_store import PackageStore
from packaging.requirements import Requirement


class BaseParser(ABC):
def __init__(self, parent_directory: Path):
self.parent_directory = parent_directory

@property
@abstractmethod
def file_name(self):
... # pragma: no cover

def _parse_dependencies(self, dependencies: List[str]):
return [
Requirement(line)
for x in dependencies
# Skip empty lines and comments
if (line := x.strip()) and not line.startswith("#")
]

@abstractmethod
def _parse_file(self, file: Path):
... # pragma: no cover

def find_file_locations(self) -> List[Path]:
return list(Path(self.parent_directory).rglob(self.file_name))

def parse(self) -> list[PackageStore]:
"""
Find 0 or more project config or dependency files within a project repo.
"""
stores = []
req_files = self.find_file_locations()
for file in req_files:
store = self._parse_file(file)
stores.append(store)
return stores
10 changes: 10 additions & 0 deletions src/codemodder/project_analysis/file_parsers/package_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from dataclasses import dataclass
from packaging.requirements import Requirement


@dataclass
class PackageStore:
type: str
file: str
dependencies: list[Requirement]
py_versions: list[str]
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from pathlib import Path
import toml

from .base_parser import BaseParser


class PyprojectTomlParser(BaseParser):
@property
def file_name(self):
return "pyproject.toml"

def _parse_dependencies_from_toml(self, toml_data: dict):
# todo: handle cases for
# 1. no dependencies
return self._parse_dependencies(toml_data["project"]["dependencies"])

def _parse_py_versions(self, toml_data: dict):
# todo: handle cases for
# 1. no requires-python
# 2. multiple requires-python such as "">3.5.2"", ">=3.11.1,<3.11.2"
return [toml_data["project"]["requires-python"]]

def _parse_file(self, file: Path):
data = toml.load(file)
# todo: handle no "project" in data

return PackageStore(
type=self.file_name,
file=str(file),
dependencies=self._parse_dependencies_from_toml(data),
py_versions=self._parse_py_versions(data),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from pathlib import Path
from .base_parser import BaseParser


class RequirementsTxtParser(BaseParser):
@property
def file_name(self):
return "requirements.txt"

def _parse_file(self, file: Path):
with open(file, "r", encoding="utf-8") as f:
lines = f.readlines()

return PackageStore(
type=self.file_name,
file=str(file),
dependencies=self._parse_dependencies(lines),
# requirements.txt files do not declare py versions explicitly
# though we could create a heuristic by analyzing each dependency
# and extracting py versions from them.
py_versions=[],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from pathlib import Path
import configparser

from .base_parser import BaseParser


class SetupCfgParser(BaseParser):
@property
def file_name(self):
return "setup.cfg"

def _parse_dependencies_from_cfg(self, config: configparser.ConfigParser):
# todo: handle cases for
# 1. no dependencies, no options dict
# setup_requires, tests_require, extras_require
dependency_lines = config["options"]["install_requires"].split("\n")
return self._parse_dependencies(dependency_lines)

def _parse_py_versions(self, config: configparser.ConfigParser):
# todo: handle cases for
# 1. no options/ no requires-python
# 2. various requires-python such as "">3.5.2"", ">=3.11.1,<3.11.2"
return [config["options"]["python_requires"]]

def _parse_file(self, file: Path):
config = configparser.ConfigParser()
config.read(file)

# todo: handle no config, no "options" in config

return PackageStore(
type=self.file_name,
file=str(file),
dependencies=self._parse_dependencies_from_cfg(config),
py_versions=self._parse_py_versions(config),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from pathlib import Path
import libcst as cst
from libcst import matchers
from packaging.requirements import Requirement

from .base_parser import BaseParser
from .utils import clean_simplestring


class SetupPyParser(BaseParser):
@property
def file_name(self):
return "setup.py"

def _parse_dependencies(self, dependencies):
return [
Requirement(line)
for x in dependencies
# Skip empty lines and comments
if (line := clean_simplestring(x.value)) and not line.startswith("#")
]

def _parse_dependencies_from_cst(self, cst_dependencies):
# todo: handle cases for
# 1. no dependencies,
return self._parse_dependencies(cst_dependencies)

def _parse_py_versions(self, version_str):
# todo: handle for multiple versions
return [clean_simplestring(version_str)]

def _parse_file(self, file: Path):
visitor = SetupCallVisitor()
with open(str(file), "r", encoding="utf-8") as f:
# todo: handle failure in parsing
module = cst.parse_module(f.read())
module.visit(visitor)

# todo: handle no python_requires, install_requires

return PackageStore(
type=self.file_name,
file=str(file),
dependencies=self._parse_dependencies_from_cst(visitor.install_requires),
py_versions=self._parse_py_versions(visitor.python_requires),
)


class SetupCallVisitor(cst.CSTVisitor):
def __init__(self):
self.python_requires = None
self.install_requires = None
# todo setup_requires, tests_require, extras_require

def visit_Call(self, node: cst.Call) -> None:
# todo: only handle setup from setuptools, not others tho unlikely
if matchers.matches(node.func, cst.Name(value="setup")):
visitor = SetupArgVisitor()
node.visit(visitor)
self.python_requires = visitor.python_requires
self.install_requires = visitor.install_requires


class SetupArgVisitor(cst.CSTVisitor):
def __init__(self):
self.python_requires = None
self.install_requires = None

def visit_Arg(self, node: cst.Arg) -> None:
if matchers.matches(node.keyword, cst.Name(value="python_requires")):
# todo: this works for `python_requires=">=3.7",` but what about
# a list of versions?
self.python_requires = node.value.value
if matchers.matches(node.keyword, cst.Name(value="install_requires")):
# todo: could it be something other than a list?
self.install_requires = node.value.elements
7 changes: 7 additions & 0 deletions src/codemodder/project_analysis/file_parsers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import libcst as cst


def clean_simplestring(node: cst.SimpleString | str) -> str:
if isinstance(node, str):
return node.strip('"')
return node.raw_value
30 changes: 30 additions & 0 deletions src/codemodder/project_analysis/python_repo_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from functools import cached_property
from pathlib import Path
from codemodder.project_analysis.file_parsers import (
RequirementsTxtParser,
PyprojectTomlParser,
SetupCfgParser,
SetupPyParser,
)
from codemodder.project_analysis.file_parsers.package_store import PackageStore


class PythonRepoManager:
def __init__(self, parent_directory: Path):
self.parent_directory = parent_directory
self._potential_stores = [
RequirementsTxtParser,
PyprojectTomlParser,
SetupCfgParser,
SetupPyParser,
]

@cached_property
def package_stores(self) -> list[PackageStore]:
return self._parse_all_stores()

def _parse_all_stores(self) -> list[PackageStore]:
discovered_pkg_stores: list[PackageStore] = []
for store in self._potential_stores:
discovered_pkg_stores.extend(store(self.parent_directory).parse())
return discovered_pkg_stores
2 changes: 2 additions & 0 deletions tests/codemods/base_codemod_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected):
dry_run=True,
verbose=False,
registry=mock.MagicMock(),
repo_manager=mock.MagicMock(),
)
self.file_context = FileContext(
file_path,
Expand Down Expand Up @@ -80,6 +81,7 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected):
dry_run=True,
verbose=False,
registry=mock.MagicMock(),
repo_manager=mock.MagicMock(),
)
input_tree = cst.parse_module(input_code)
all_results = self.results_by_id_filepath(input_code, file_path)
Expand Down
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,12 @@ def disable_write_dependencies():
dm_write.start()
yield
dm_write.stop()


@pytest.fixture(scope="module")
def pkg_with_reqs_txt(tmp_path_factory):
base_dir = tmp_path_factory.mktemp("foo")
req_file = base_dir / "requirements.txt"
reqs = "# comment\nrequests==2.31.0\nblack==23.7.*\nmypy~=1.4\npylint>1\n"
req_file.write_text(reqs)
return base_dir
Empty file.
Empty file.
Loading

0 comments on commit 4663b59

Please sign in to comment.