From 2771a7113cb2ba2736e8daec9e70b2bc00a6e275 Mon Sep 17 00:00:00 2001 From: Martin Lehmann Date: Mon, 6 Nov 2023 15:34:28 +0100 Subject: [PATCH] feat!: Rewrite the link fixer script This commit greatly refactors the link fixer script. The previous solution was based on detecting links using regular expressions and applying some pattern-based fixes. However, in practice, this approach proved to be a bad one. Luckily it at least didn't make things worse (the model was already broken before anyways), but often enough it didn't produce the desired outcome of a no-longer-broken model. The new implementation introduced in this commit loads the model using capellambse's MelodyLoader facility, and applies fixes using the semantics-aware methods that the loader offers. Specifically, it tries to parse all XML attributes as a list of element links, discards the (potentially broken) file path from each link, and finally regenerates them using `MelodyLoader.create_link`. BREAKING CHANGE: Changed the hook's ID to `fix-capella-fragment-links`. Experimentation has unfortunately shown the new implementation to be noticably slower than the old one - to remedy this, the hook now skips itself during the follow-up commit, so as to not run twice. The ID change was made to more reliably identify the hook, and reduce the chance for name clashes with a different hook that might fix some other type of link in some other type of file. --- .pre-commit-config.yaml | 2 + .pre-commit-hooks.yaml | 4 +- capella_git_hooks/fix_links.py | 321 +++++++++++++++++++++++---------- pyproject.toml | 10 +- 4 files changed, 233 insertions(+), 104 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c3042ed..fd62ddb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,6 +50,8 @@ repos: rev: v1.13.0 hooks: - id: mypy + additional_dependencies: + - capellambse==0.6.10 - repo: https://github.com/Lucas-C/pre-commit-hooks rev: v1.5.5 hooks: diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 8f2a79a..fa0aaa4 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -1,7 +1,7 @@ -- id: fix-links +- id: fix-capella-fragment-links name: fix links after breaking merge description: After a breaking merge by the merge tool, this script fixes links starting with index:/. - entry: fix-links + entry: fix-capella-fragment-links language: python always_run: true stages: [post-commit] diff --git a/capella_git_hooks/fix_links.py b/capella_git_hooks/fix_links.py index b2c42a3..6618cb9 100644 --- a/capella_git_hooks/fix_links.py +++ b/capella_git_hooks/fix_links.py @@ -1,113 +1,240 @@ # Copyright DB Netz AG and contributors # SPDX-License-Identifier: Apache-2.0 -"""A script which fixes wrong links with an index:/ prefix. +"""A script which fixes broken links within Capella models. -See https://github.com/eclipse/capella/issues/2725 for more information. +This is intended to fix the issue where the merge tool adds an +``index:/`` or similar prefix to links within Capella models. See +`eclipse/capella#2725`__ for more information. + +__ https://github.com/eclipse/capella/issues/2725 """ -import os -import re -import subprocess -import sys -from capellambse.loader import exs -from lxml import etree as ET +from __future__ import annotations -# pylint: disable=line-too-long -root_pattern = re.compile( - r"(index:/).*\.(capella|aird)#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}" -) -fragment_pattern = re.compile( - r"(index:/fragments/).*\.(capella|aird)fragment#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}" +import collections.abc as cabc +import logging +import os +import pathlib +import subprocess +import typing as t +import urllib.parse + +import click +from capellambse import filehandler, helpers, loader + +LOGGER = logging.getLogger(__name__) +# FIXME This isn't conventional-commits compliant, and might fail checks +COMMIT_MSG = "fix[by-script]: merge tool index-prefix" + + +@click.command() +@click.option( + "-m", + "--model", + "models", + help=( + "Path to a Capella model to fix." + " If not given, run on all tracked models." + " Can be specified multiple times." + ), + multiple=True, ) -file_name_pattern = re.compile(r".*\.(capella|aird)(fragment)?") - - -def get_unmodified_tracked_files() -> list[str]: - """Return a list of git tracked files not having uncommitted changes.""" - repo_path = os.getcwd() - try: - modified_files = subprocess.check_output( - ["git", "diff", "--name-only"], cwd=repo_path, text=True - ).splitlines() - all_tracked_files = subprocess.check_output( - ["git", "ls-files"], cwd=repo_path, text=True - ).splitlines() - unmodified_tracked_files = [ - file for file in all_tracked_files if file not in modified_files - ] - - return unmodified_tracked_files - except subprocess.CalledProcessError as e: - # Handle any Git command errors - print("Git command error:", e) - return [] - - -def fix_xml(path: str) -> bool: - """Fix the XML located in the provided path and write changes to disk.""" - is_fragment = path.startswith("fragments/") - assert is_fragment or "/" not in path - if path.endswith(".capella") or path.endswith(".capellafragment"): - line_length = exs.LINE_LENGTH +@click.option("--no-commit", is_flag=True, help="Do not commit changes.") +def main( + models: cabc.Sequence[str], + no_commit: bool, +): + """Fix links in all tracked Capella models.""" + logging.basicConfig(level="INFO") + + dirty_files = ( + subprocess.check_output( + ["git", "diff", "--name-only", "-z"], text=True + ) + .rstrip("\0") + .split("\0") + ) + if dirty_files != [""]: + LOGGER.error( + "Worktree is dirty, commit or stash changes and try again: %s", + dirty_files, + ) + raise SystemExit(1) + + changes = False + for modelfile in find_tracked_models(models): + LOGGER.info("Loading model %s", modelfile) + model = loader.MelodyLoader(modelfile) + dirty = fix_model(model) + if dirty: + LOGGER.info("Saving changes to %s", modelfile) + model.resources["\0"] = _IndexWriter() + model.save() + changes = True + else: + LOGGER.info("Model is clean, not saving: %s", modelfile) + + if changes: + if no_commit: + LOGGER.info("Not committing changes (--no-commit)") + else: + LOGGER.info("Committing changes") + subprocess.call( + ["git", "commit", "-m", COMMIT_MSG], + env=os.environ | {"SKIP": "fix-capella-fragment-links"}, + ) + + +def find_tracked_models(models: cabc.Sequence[str]) -> cabc.Iterable[str]: + """Find all tracked models in the current Git repository. + + If a list of models is provided, filter the result to only include + those models. + + This function always returns paths relative to the current working + directory. Any provided models that live outside of the CWD are + ignored. + """ + if not models: + filter: cabc.Container = EverythingContainer() else: - line_length = sys.maxsize - - changed = False - - tree = ET.parse(path) - root = tree.getroot() - - root_replacement = "../" if is_fragment else "" - fragment_replacement = "" if is_fragment else "fragments/" - - for elem in root.iter(): - for key, value in list(elem.attrib.items()): - # Use the regex pattern to find and replace attribute values - while match := root_pattern.search(value): - changed = True - index_match = match.span(1) - value = ( - value[: index_match[0]] - + root_replacement - + value[index_match[1] :] - ) + filter = [os.path.relpath(m) for m in models] - while match := fragment_pattern.search(value): - changed = True - index_match = match.span(1) - value = ( - value[: index_match[0]] - + fragment_replacement - + value[index_match[1] :] - ) - - elem.attrib[key] = value - - if changed: - exs.write(tree, path, line_length=line_length, siblings=True) - - return changed + for file in subprocess.check_output( + ["git", "ls-files", "-cz"], text=True + ).split("\0"): + if file.endswith(".aird") and file in filter: + yield file -def commit_changes(changed_files_to_be_committed: list[str]): - """Add and commit the provided list of files using a predefined message.""" - subprocess.call(["git", "add"] + changed_files_to_be_committed) - subprocess.call( - ["git", "commit", "-m", "fix[by-script]: merge tool index-prefix"] +def is_file_dirty(path: pathlib.PurePosixPath) -> bool: + """Check if the given file is dirty (worktree != index).""" + p = subprocess.run( + ["git", "diff-index", "--quiet", "--", path], + check=False, ) + return p.returncode != 0 + + +def fix_model(model: loader.MelodyLoader) -> bool: + """Fix the links in the provided model. + + Returns + ------- + bool + True if the model was modified, False otherwise. + """ + dirty = False + for element in model.iterall(): + if element.tag == "semanticResources": + if element.text == "": + element.getparent().remove(element) + continue + + targetfrag = urllib.parse.unquote(element.text.rsplit("/", 1)[-1]) + frags = [i for i in model.trees if i.name == targetfrag] + if len(frags) != 1: + raise RuntimeError( + f"Ambiguous fragment name: {element.text}" + " - please report this as bug at" + " https://github.com/DSD-DBS/capella-git-hooks" + ) - -def main(): - """Fix links in all tracked, unchanged files and commit the changes.""" - tracked_unmodified_files = get_unmodified_tracked_files() - changed_files = [] - for file in tracked_unmodified_files: - if file_name_pattern.match(file): - if fix_xml(file): - changed_files.append(file) - - if changed_files: - commit_changes(changed_files) + sourcefrag = model.find_fragment(element) + if sourcefrag == frags[0]: + element.getparent().remove(element) + continue + + link = os.path.relpath(frags[0], sourcefrag.parent) + link = urllib.parse.quote(link) + if link != element.text: + element.text = link + dirty = True + + for attr, value in element.attrib.items(): + if attr == "href": + include_target_type = False + else: + include_target_type = None + + try: + links = tuple(helpers.split_links(value)) + except ValueError: + continue + + new_links: list[str] = [] + for link in links: + _, _, target_id = link.partition("#") + try: + target = model.follow_link(None, target_id) + except KeyError: + LOGGER.error("Cannot repair dangling link: #%s", target_id) + else: + link = model.create_link( + element, + target, + include_target_type=include_target_type, + ) + new_links.append(link) + + new_value = " ".join(new_links) + if new_value != value.strip(): + element.attrib[attr] = new_value + dirty = True + + return dirty + + +class EverythingContainer(cabc.Container): + """A container that returns True for any key.""" + + def __contains__(self, key: object) -> bool: + """Return True.""" + return True + + +class _IndexWriter(filehandler.abc.FileHandler): + """A file handler that writes to the worktree and the git index.""" + + def __init__(self) -> None: + super().__init__("index:") + + def open( + self, + filename: str | pathlib.PurePosixPath, + mode: t.Literal["r", "rb", "w", "wb"] = "rb", + ) -> t.BinaryIO: + if "w" not in mode: + raise ValueError("IndexWriter can only be used for writing") + + path = helpers.normalize_pure_path(filename, base=self.subdir) + return _IndexFile(path) # type: ignore[abstract] + + +class _IndexFile(t.BinaryIO): + def __init__(self, path: pathlib.PurePosixPath) -> None: + # pylint: disable=consider-using-with + self.__path = path + self.__file = open(path, "wb") + + def __enter__(self) -> _IndexFile: + return self + + def __exit__(self, *args: object) -> None: + self.close() + + def __del__(self) -> None: + import warnings + + warnings.warn("IndexFile should be closed explicitly", ResourceWarning) + self.close() + + def close(self) -> None: + self.__file.close() + subprocess.check_call(["git", "add", self.__path]) + + def write(self, s: bytes) -> int: # type: ignore[override] + return self.__file.write(s) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 3e8f456..fe13100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dynamic = ["version"] name = "capella-git-hooks" description = "A collection of useful Capella related Git hooks" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" license = { text = "Apache-2.0" } authors = [ { name = "DB Netz AG" }, @@ -23,17 +23,17 @@ classifiers = [ "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] dependencies = [ - "capellambse>=0.5,<0.6", + "capellambse>=0.6.10,<0.7", "click", ] [project.scripts] -fix-links = "capella_git_hooks.fix_links:main" +fix-capella-fragment-links = "capella_git_hooks.fix_links:main" [project.urls] Homepage = "https://github.com/DSD-DBS/capella-git-hooks" @@ -84,7 +84,7 @@ no_implicit_optional = true show_error_codes = true warn_redundant_casts = true warn_unreachable = true -python_version = "3.9" +python_version = "3.10" [[tool.mypy.overrides]] module = ["tests.*"]