DSD-DBS · Wuestengecko · May 28, 2024 · Nov 6, 2023 · May 28, 2024 · micha91
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
@@ -1,7 +1,7 @@
--   id: fix-links
+-   id: fix-capella-fragment-links
     name: fix links after breaking merge
     description: After a breaking merge by the merge tool, this script fixes links starting with index:/.
-    entry: fix-links
+    entry: fix-capella-fragment-links
     language: python
     always_run: true
     stages: [post-commit]
diff --git a/capella_git_hooks/fix_links.py b/capella_git_hooks/fix_links.py
@@ -1,113 +1,242 @@
 # Copyright DB Netz AG and contributors
 # SPDX-License-Identifier: Apache-2.0
-"""A script which fixes wrong links with an index:/ prefix.
+"""A script which fixes broken links within Capella models.
 
-See https://github.com/eclipse/capella/issues/2725 for more information.
+This is intended to fix the issue where the merge tool adds an
+``index:/`` or similar prefix to links within Capella models. See
+`eclipse/capella#2725`__ for more information.
+
+__ https://github.com/eclipse/capella/issues/2725
 """
+from __future__ import annotations
+
+import collections.abc as cabc
+import logging
 import os
-import re
+import pathlib
 import subprocess
-import sys
-
-from capellambse.loader import exs
-from lxml import etree as ET
-
-# pylint: disable=line-too-long
-root_pattern = re.compile(
-    r"(index:/).*\.(capella|aird)#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}"
+import typing as t
+
+import capellambse
+import click
+from capellambse import helpers, loader
+from capellambse.loader import modelinfo
+
+LOGGER = logging.getLogger(__name__)
+# FIXME This isn't conventional-commits compliant, and might fail checks
+COMMIT_MSG = "fix[by-script]: merge tool index-prefix"
+
+
+@click.command()
+@click.option(
+    "-m",
+    "--model",
+    "models",
+    help=(
+        "Path to a Capella model to fix."
+        " If not given, run on all tracked models."
+        " Can be specified multiple times."
+    ),
+    multiple=True,
 )
-fragment_pattern = re.compile(
-    r"(index:/fragments/).*\.(capella|aird)fragment#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}"
-)
-file_name_pattern = re.compile(r".*\.(capella|aird)(fragment)?")
-
-
-def get_unmodified_tracked_files() -> list[str]:
-    """Return a list of git tracked files not having uncommitted changes."""
-    repo_path = os.getcwd()
-    try:
-        modified_files = subprocess.check_output(
-            ["git", "diff", "--name-only"], cwd=repo_path, text=True
-        ).splitlines()
-        all_tracked_files = subprocess.check_output(
-            ["git", "ls-files"], cwd=repo_path, text=True
-        ).splitlines()
-        unmodified_tracked_files = [
-            file for file in all_tracked_files if file not in modified_files
-        ]
-
-        return unmodified_tracked_files
-    except subprocess.CalledProcessError as e:
-        # Handle any Git command errors
-        print("Git command error:", e)
-        return []
-
-
-def fix_xml(path: str) -> bool:
-    """Fix the XML located in the provided path and write changes to disk."""
-    is_fragment = path.startswith("fragments/")
-    assert is_fragment or "/" not in path
-    if path.endswith(".capella") or path.endswith(".capellafragment"):
-        line_length = exs.LINE_LENGTH
+@click.option("--no-commit", is_flag=True, help="Do not commit changes.")
+def main(
+    models: cabc.Sequence[str],
+    no_commit: bool,
+):
+    """Fix links in all tracked Capella models."""
+    logging.basicConfig(level="INFO")
+
+    dirty_files = (
+        subprocess.check_output(
+            ["git", "diff", "--name-only", "-z"], text=True
+        )
+        .rstrip("\0")
+        .split("\0")
+    )
+    if dirty_files != [""]:
+        LOGGER.error(
+            "Worktree is dirty, commit or stash changes and try again: %s",
+            dirty_files,
+        )
+        raise SystemExit(1)
+
+    changes = False
+    for modelfile in find_tracked_models(models):
+        LOGGER.info("Loading model %s", modelfile)
+        model = loader.MelodyLoader(modelfile)
+        dirty = fix_model(model)
+        if dirty:
+            LOGGER.info("Saving changes to %s", modelfile)
+            model.resources["\0"] = _IndexWriter()
+            model.save()
+            changes = True
+        else:
+            LOGGER.info("Model is clean, not saving: %s", modelfile)
+
+    if changes:
+        if no_commit:
+            LOGGER.info("Not committing changes (--no-commit)")
+        else:
+            LOGGER.info("Committing changes")
+            subprocess.call(
+                ["git", "commit", "-m", COMMIT_MSG],
+                env=os.environ | {"SKIP": "fix-capella-fragment-links"},
+            )
+
+
+def find_tracked_models(models: cabc.Sequence[str]) -> cabc.Iterable[str]:
+    """Find all tracked models in the current Git repository.
+
+    If a list of models is provided, filter the result to only include
+    those models.
+
+    This function always returns paths relative to the current working
+    directory. Any provided models that live outside of the CWD are
+    ignored.
+    """
+    if not models:
+        filter: cabc.Container = EverythingContainer()
     else:
-        line_length = sys.maxsize
-
-    changed = False
-
-    tree = ET.parse(path)
-    root = tree.getroot()
-
-    root_replacement = "../" if is_fragment else ""
-    fragment_replacement = "" if is_fragment else "fragments/"
-
-    for elem in root.iter():
-        for key, value in list(elem.attrib.items()):
-            # Use the regex pattern to find and replace attribute values
-            while match := root_pattern.search(value):
-                changed = True
-                index_match = match.span(1)
-                value = (
-                    value[: index_match[0]]
-                    + root_replacement
-                    + value[index_match[1] :]
-                )
+        filter = [os.path.relpath(m) for m in models]
 
-            while match := fragment_pattern.search(value):
-                changed = True
-                index_match = match.span(1)
-                value = (
-                    value[: index_match[0]]
-                    + fragment_replacement
-                    + value[index_match[1] :]
+    for file in subprocess.check_output(
+        ["git", "ls-files", "-cz"], text=True
+    ).split("\0"):
+        if file.endswith(".aird") and file in filter:
+            yield file
+
+
+def is_file_dirty(path: pathlib.PurePosixPath) -> bool:
+    """Check if the given file is dirty (worktree != index)."""
+    p = subprocess.run(
+        ["git", "diff-index", "--quiet", "--", path],
+        check=False,
+    )
+    return p.returncode != 0
+
+
+def fix_model(model: loader.MelodyLoader) -> bool:
+    """Fix the links in the provided model.
+
+    Returns
+    -------
+    bool
+        True if the model was modified, False otherwise.
+    """
+    dirty = False
+    for element in model.iterall():
+        if element.tag == "semanticResources":
+            if element.text == "":
+                element.getparent().remove(element)
+                continue
+
+            targetfrag = element.text.rsplit("/", 1)[-1]
+            frags = [i for i in model.trees if i.name == targetfrag]
+            if len(frags) != 1:
+                raise RuntimeError(
+                    f"Ambiguous fragment name: {element.text}"
+                    " - please report this as bug at"
+                    " https://github.com/DSD-DBS/capella-git-hooks"
                 )
 
-            elem.attrib[key] = value
+            sourcefrag = model.find_fragment(element)
+            if sourcefrag == frags[0]:
+                element.getparent().remove(element)
+                continue
 
-    if changed:
-        exs.write(tree, path, line_length=line_length, siblings=True)
+            link = os.path.relpath(frags[0], sourcefrag.parent)
+            if link != element.text:
+                element.text = link
+                dirty = True
 
-    return changed
+        for attr, value in element.attrib.items():
+            if attr == "href":
+                include_target_type = False
+            else:
+                include_target_type = None
 
+            try:
+                links = tuple(helpers.split_links(value))
+            except ValueError:
+                continue
 
-def commit_changes(changed_files_to_be_committed: list[str]):
-    """Add and commit the provided list of files using a predefined message."""
-    subprocess.call(["git", "add"] + changed_files_to_be_committed)
-    subprocess.call(
-        ["git", "commit", "-m", "fix[by-script]: merge tool index-prefix"]
-    )
+            new_links: list[str] = []
+            for link in links:
+                _, _, target_id = link.partition("#")
+                try:
+                    target = model.follow_link(None, target_id)
+                except KeyError:
+                    LOGGER.error("Cannot repair dangling link: #%s", target_id)
+                else:
+                    link = model.create_link(
+                        element,
+                        target,
+                        include_target_type=include_target_type,
+                    )
+                new_links.append(link)
+
+            new_value = " ".join(new_links)
+            if new_value != value.strip():
+                element.attrib[attr] = new_value
+                dirty = True
+
+    return dirty
+
+
+class EverythingContainer(cabc.Container):
+    """A container that returns True for any key."""
+
+    def __contains__(self, key: object) -> bool:
+        """Return True."""
+        return True
+
+
+class _IndexWriter(capellambse.FileHandler):
+    """A file handler that writes to the worktree and the git index."""
+
+    def __init__(self) -> None:
+        super().__init__(".")
+
+    def get_model_info(self) -> modelinfo.ModelInfo:
+        return modelinfo.ModelInfo(url="index:")
+
+    def open(
+        self,
+        filename: str | pathlib.PurePosixPath,
+        mode: t.Literal["r", "rb", "w", "wb"] = "rb",
+    ) -> t.BinaryIO:
+        if "w" not in mode:
+            raise ValueError("IndexWriter can only be used for writing")
+
+        path = helpers.normalize_pure_path(filename, base=self.subdir)
+        return _IndexFile(path)  # type: ignore[abstract]
+
+
+class _IndexFile(t.BinaryIO):
+    def __init__(self, path: pathlib.PurePosixPath) -> None:
+        # pylint: disable=consider-using-with
+        self.__path = path
+        self.__file = open(path, "wb")
+
+    def __enter__(self) -> _IndexFile:
+        return self
+
+    def __exit__(self, *args: object) -> None:
+        self.close()
+
+    def __del__(self) -> None:
+        import warnings
 
+        warnings.warn("IndexFile should be closed explicitly", ResourceWarning)
+        self.close()
 
-def main():
-    """Fix links in all tracked, unchanged files and commit the changes."""
-    tracked_unmodified_files = get_unmodified_tracked_files()
-    changed_files = []
-    for file in tracked_unmodified_files:
-        if file_name_pattern.match(file):
-            if fix_xml(file):
-                changed_files.append(file)
+    def close(self) -> None:
+        self.__file.close()
+        subprocess.check_call(["git", "add", self.__path])
 
-    if changed_files:
-        commit_changes(changed_files)
+    def write(self, s: bytes) -> int:  # type: ignore[override]
+        return self.__file.write(s)
 
 
 if __name__ == "__main__":

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
 ]
 
 [project.scripts]
-fix-links = "capella_git_hooks.fix_links:main"
+fix-capella-fragment-links = "capella_git_hooks.fix_links:main"
 
 [project.urls]
 Homepage = "https://github.com/DSD-DBS/capella-git-hooks"