Skip to content

Commit

Permalink
feat!: Rewrite the link fixer script
Browse files Browse the repository at this point in the history
This commit greatly refactors the link fixer script. The previous
solution was based on detecting links using regular expressions and
applying some pattern-based fixes. However, in practice, this approach
proved to be a bad one. Luckily it at least didn't make things worse
(the model was already broken before anyways), but often enough it
didn't produce the desired outcome of a no-longer-broken model.

The new implementation introduced in this commit loads the model using
capellambse's MelodyLoader facility, and applies fixes using the
semantics-aware methods that the loader offers. Specifically, it tries
to parse all XML attributes as a list of element links, discards the
(potentially broken) file path from each link, and finally regenerates
them using `MelodyLoader.create_link`.

BREAKING CHANGE: Changed the hook's ID to `fix-capella-fragment-links`.
Experimentation has unfortunately shown the new implementation to be
noticably slower than the old one - to remedy this, the hook now skips
itself during the follow-up commit, so as to not run twice. The ID
change was made to more reliably identify the hook, and reduce the
chance for name clashes with a different hook that might fix some other
type of link in some other type of file.
  • Loading branch information
Wuestengecko committed Nov 18, 2024
1 parent 6e192f2 commit 634fe2e
Show file tree
Hide file tree
Showing 4 changed files with 230 additions and 108 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ repos:
rev: v1.5.1
hooks:
- id: mypy
additional_dependencies:
- capellambse==0.6.10
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.4.2
hooks:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
- id: fix-links
- id: fix-capella-fragment-links
name: fix links after breaking merge
description: After a breaking merge by the merge tool, this script fixes links starting with index:/.
entry: fix-links
entry: fix-capella-fragment-links
language: python
always_run: true
stages: [post-commit]
311 changes: 219 additions & 92 deletions capella_git_hooks/fix_links.py
Original file line number Diff line number Diff line change
@@ -1,113 +1,240 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""A script which fixes wrong links with an index:/ prefix.
"""A script which fixes broken links within Capella models.
See https://github.com/eclipse/capella/issues/2725 for more information.
This is intended to fix the issue where the merge tool adds an
``index:/`` or similar prefix to links within Capella models. See
`eclipse/capella#2725`__ for more information.
__ https://github.com/eclipse/capella/issues/2725
"""
import os
import re
import subprocess
import sys

from capellambse.loader import exs
from lxml import etree as ET
from __future__ import annotations

# pylint: disable=line-too-long
root_pattern = re.compile(
r"(index:/).*\.(capella|aird)#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}"
)
fragment_pattern = re.compile(
r"(index:/fragments/).*\.(capella|aird)fragment#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}"
import collections.abc as cabc
import logging
import os
import pathlib
import subprocess
import typing as t

import capellambse
import click
from capellambse import filehandler, helpers, loader
from capellambse.loader import modelinfo

LOGGER = logging.getLogger(__name__)
# FIXME This isn't conventional-commits compliant, and might fail checks
COMMIT_MSG = "fix[by-script]: merge tool index-prefix"


@click.command()
@click.option(
"-m",
"--model",
"models",
help=(
"Path to a Capella model to fix."
" If not given, run on all tracked models."
" Can be specified multiple times."
),
multiple=True,
)
file_name_pattern = re.compile(r".*\.(capella|aird)(fragment)?")


def get_unmodified_tracked_files() -> list[str]:
"""Return a list of git tracked files not having uncommitted changes."""
repo_path = os.getcwd()
try:
modified_files = subprocess.check_output(
["git", "diff", "--name-only"], cwd=repo_path, text=True
).splitlines()
all_tracked_files = subprocess.check_output(
["git", "ls-files"], cwd=repo_path, text=True
).splitlines()
unmodified_tracked_files = [
file for file in all_tracked_files if file not in modified_files
]

return unmodified_tracked_files
except subprocess.CalledProcessError as e:
# Handle any Git command errors
print("Git command error:", e)
return []


def fix_xml(path: str) -> bool:
"""Fix the XML located in the provided path and write changes to disk."""
is_fragment = path.startswith("fragments/")
assert is_fragment or "/" not in path
if path.endswith(".capella") or path.endswith(".capellafragment"):
line_length = exs.LINE_LENGTH
@click.option("--no-commit", is_flag=True, help="Do not commit changes.")
def main(
models: cabc.Sequence[str],
no_commit: bool,
):
"""Fix links in all tracked Capella models."""
logging.basicConfig(level="INFO")

dirty_files = (
subprocess.check_output(
["git", "diff", "--name-only", "-z"], text=True
)
.rstrip("\0")
.split("\0")
)
if dirty_files != [""]:
LOGGER.error(
"Worktree is dirty, commit or stash changes and try again: %s",
dirty_files,
)
raise SystemExit(1)

changes = False
for modelfile in find_tracked_models(models):
LOGGER.info("Loading model %s", modelfile)
model = loader.MelodyLoader(modelfile)
dirty = fix_model(model)
if dirty:
LOGGER.info("Saving changes to %s", modelfile)
model.resources["\0"] = _IndexWriter()
model.save()
changes = True
else:
LOGGER.info("Model is clean, not saving: %s", modelfile)

if changes:
if no_commit:
LOGGER.info("Not committing changes (--no-commit)")
else:
LOGGER.info("Committing changes")
subprocess.call(
["git", "commit", "-m", COMMIT_MSG],
env=os.environ | {"SKIP": "fix-capella-fragment-links"},
)


def find_tracked_models(models: cabc.Sequence[str]) -> cabc.Iterable[str]:
"""Find all tracked models in the current Git repository.
If a list of models is provided, filter the result to only include
those models.
This function always returns paths relative to the current working
directory. Any provided models that live outside of the CWD are
ignored.
"""
if not models:
filter: cabc.Container = EverythingContainer()
else:
line_length = sys.maxsize

changed = False

tree = ET.parse(path)
root = tree.getroot()

root_replacement = "../" if is_fragment else ""
fragment_replacement = "" if is_fragment else "fragments/"

for elem in root.iter():
for key, value in list(elem.attrib.items()):
# Use the regex pattern to find and replace attribute values
while match := root_pattern.search(value):
changed = True
index_match = match.span(1)
value = (
value[: index_match[0]]
+ root_replacement
+ value[index_match[1] :]
)
filter = [os.path.relpath(m) for m in models]

for file in subprocess.check_output(
["git", "ls-files", "-cz"], text=True
).split("\0"):
if file.endswith(".aird") and file in filter:
yield file

while match := fragment_pattern.search(value):
changed = True
index_match = match.span(1)
value = (
value[: index_match[0]]
+ fragment_replacement
+ value[index_match[1] :]

def is_file_dirty(path: pathlib.PurePosixPath) -> bool:
"""Check if the given file is dirty (worktree != index)."""
p = subprocess.run(
["git", "diff-index", "--quiet", "--", path],
check=False,
)
return p.returncode != 0


def fix_model(model: loader.MelodyLoader) -> bool:
"""Fix the links in the provided model.
Returns
-------
bool
True if the model was modified, False otherwise.
"""
dirty = False
for element in model.iterall():
if element.tag == "semanticResources":
if element.text == "":
element.getparent().remove(element)
continue

targetfrag = element.text.rsplit("/", 1)[-1]
frags = [i for i in model.trees if i.name == targetfrag]
if len(frags) != 1:
raise RuntimeError(
f"Ambiguous fragment name: {element.text}"
" - please report this as bug at"
" https://github.com/DSD-DBS/capella-git-hooks"
)

elem.attrib[key] = value
sourcefrag = model.find_fragment(element)
if sourcefrag == frags[0]:
element.getparent().remove(element)
continue

if changed:
exs.write(tree, path, line_length=line_length, siblings=True)
link = os.path.relpath(frags[0], sourcefrag.parent)
if link != element.text:
element.text = link
dirty = True

return changed
for attr, value in element.attrib.items():
if attr == "href":
include_target_type = False
else:
include_target_type = None

try:
links = tuple(helpers.split_links(value))
except ValueError:
continue

def commit_changes(changed_files_to_be_committed: list[str]):
"""Add and commit the provided list of files using a predefined message."""
subprocess.call(["git", "add"] + changed_files_to_be_committed)
subprocess.call(
["git", "commit", "-m", "fix[by-script]: merge tool index-prefix"]
)
new_links: list[str] = []
for link in links:
_, _, target_id = link.partition("#")
try:
target = model.follow_link(None, target_id)
except KeyError:
LOGGER.error("Cannot repair dangling link: #%s", target_id)
else:
link = model.create_link(
element,
target,
include_target_type=include_target_type,
)
new_links.append(link)

new_value = " ".join(new_links)
if new_value != value.strip():
element.attrib[attr] = new_value
dirty = True

return dirty


class EverythingContainer(cabc.Container):
"""A container that returns True for any key."""

def __contains__(self, key: object) -> bool:
"""Return True."""
return True


class _IndexWriter(filehandler.abc.FileHandler):
"""A file handler that writes to the worktree and the git index."""

def __init__(self) -> None:
super().__init__("index:")

def open(
self,
filename: str | pathlib.PurePosixPath,
mode: t.Literal["r", "rb", "w", "wb"] = "rb",
) -> t.BinaryIO:
if "w" not in mode:
raise ValueError("IndexWriter can only be used for writing")

path = helpers.normalize_pure_path(filename, base=self.subdir)
return _IndexFile(path) # type: ignore[abstract]


class _IndexFile(t.BinaryIO):
def __init__(self, path: pathlib.PurePosixPath) -> None:
# pylint: disable=consider-using-with
self.__path = path
self.__file = open(path, "wb")

def __enter__(self) -> _IndexFile:
return self

def __exit__(self, *args: object) -> None:
self.close()

def __del__(self) -> None:
import warnings

warnings.warn("IndexFile should be closed explicitly", ResourceWarning)
self.close()

def main():
"""Fix links in all tracked, unchanged files and commit the changes."""
tracked_unmodified_files = get_unmodified_tracked_files()
changed_files = []
for file in tracked_unmodified_files:
if file_name_pattern.match(file):
if fix_xml(file):
changed_files.append(file)
def close(self) -> None:
self.__file.close()
subprocess.check_call(["git", "add", self.__path])

if changed_files:
commit_changes(changed_files)
def write(self, s: bytes) -> int: # type: ignore[override]
return self.__file.write(s)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 634fe2e

Please sign in to comment.