Skip to content

Commit

Permalink
feat!: Rewrite the link fixer script
Browse files Browse the repository at this point in the history
This commit greatly refactors the link fixer script. The previous
solution was based on detecting links using regular expressions and
applying some pattern-based fixes. However, in practice, this approach
proved to be a bad one. Luckily it at least didn't make things worse
(the model was already broken before anyways), but often enough it
didn't produce the desired outcome of a no-longer-broken model.

The new implementation introduced in this commit loads the model using
capellambse's MelodyLoader facility, and applies fixes using the
semantics-aware methods that the loader offers. Specifically, it tries
to parse all XML attributes as a list of element links, discards the
(potentially broken) file path from each link, and finally regenerates
them using `MelodyLoader.create_link`.

BREAKING CHANGE: Changed the hook's ID to `fix-capella-fragment-links`.
Experimentation has unfortunately shown the new implementation to be
noticably slower than the old one - to remedy this, the hook now skips
itself during the follow-up commit. The ID change was made to more
reliably identify the hook, and reduce the change for name clashes with
a different hook that might fix some other type of link in some other
type of file.
  • Loading branch information
Wuestengecko committed Nov 6, 2023
1 parent 583ce52 commit 580356d
Show file tree
Hide file tree
Showing 3 changed files with 207 additions and 102 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
- id: fix-links
- id: fix-capella-fragment-links
name: fix links after breaking merge
description: After a breaking merge by the merge tool, this script fixes links starting with index:/.
entry: fix-links
entry: fix-capella-fragment-links
language: python
always_run: true
stages: [post-commit]
303 changes: 204 additions & 99 deletions capella_git_hooks/fix_links.py
Original file line number Diff line number Diff line change
@@ -1,113 +1,218 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""A script which fixes wrong links with an index:/ prefix.
"""A script which fixes broken links within Capella models.
See https://github.com/eclipse/capella/issues/2725 for more information.
This is intended to fix the issue where the merge tool adds an
``index:/`` or similar prefix to links within Capella models. See
`eclipse/capella#2725`__ for more information.
__ https://github.com/eclipse/capella/issues/2725
"""
from __future__ import annotations

import collections.abc as cabc
import logging
import os
import re
import pathlib
import subprocess
import sys

from capellambse.loader import exs
from lxml import etree as ET

# pylint: disable=line-too-long
root_pattern = re.compile(
r"(index:/).*\.(capella|aird)#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}"
import typing as t

import capellambse
import click
from capellambse import helpers, loader
from capellambse.loader import modelinfo

LOGGER = logging.getLogger(__name__)
# FIXME This isn't conventional-commits compliant, and might fail checks
COMMIT_MSG = "fix[by-script]: merge tool index-prefix"


@click.command()
@click.option(
"-m",
"--model",
"models",
help=(
"Path to a Capella model to fix."
" If not given, run on all tracked models."
" Can be specified multiple times."
),
multiple=True,
)
fragment_pattern = re.compile(
r"(index:/fragments/).*\.(capella|aird)fragment#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}"
)
file_name_pattern = re.compile(r".*\.(capella|aird)(fragment)?")


def get_unmodified_tracked_files() -> list[str]:
"""Return a list of git tracked files not having uncommitted changes."""
repo_path = os.getcwd()
try:
modified_files = subprocess.check_output(
["git", "diff", "--name-only"], cwd=repo_path, text=True
).splitlines()
all_tracked_files = subprocess.check_output(
["git", "ls-files"], cwd=repo_path, text=True
).splitlines()
unmodified_tracked_files = [
file for file in all_tracked_files if file not in modified_files
]

return unmodified_tracked_files
except subprocess.CalledProcessError as e:
# Handle any Git command errors
print("Git command error:", e)
return []


def fix_xml(path: str) -> bool:
"""Fix the XML located in the provided path and write changes to disk."""
is_fragment = path.startswith("fragments/")
assert is_fragment or "/" not in path
if path.endswith(".capella") or path.endswith(".capellafragment"):
line_length = exs.LINE_LENGTH
else:
line_length = sys.maxsize

changed = False

tree = ET.parse(path)
root = tree.getroot()

root_replacement = "../" if is_fragment else ""
fragment_replacement = "" if is_fragment else "fragments/"

for elem in root.iter():
for key, value in list(elem.attrib.items()):
# Use the regex pattern to find and replace attribute values
while match := root_pattern.search(value):
changed = True
index_match = match.span(1)
value = (
value[: index_match[0]]
+ root_replacement
+ value[index_match[1] :]
)

while match := fragment_pattern.search(value):
changed = True
index_match = match.span(1)
value = (
value[: index_match[0]]
+ fragment_replacement
+ value[index_match[1] :]
)

elem.attrib[key] = value

if changed:
exs.write(tree, path, line_length=line_length, siblings=True)

return changed


def commit_changes(changed_files_to_be_committed: list[str]):
"""Add and commit the provided list of files using a predefined message."""
subprocess.call(["git", "add"] + changed_files_to_be_committed)
subprocess.call(
["git", "commit", "-m", "fix[by-script]: merge tool index-prefix"]
@click.option("--no-commit", is_flag=True, help="Do not commit changes.")
def main(
models: cabc.Sequence[str],
no_commit: bool,
):
"""Fix links in all tracked Capella models."""
logging.basicConfig(level="INFO")

dirty_files = (
subprocess.check_output(
["git", "diff", "--name-only", "-z"], text=True
)
.rstrip("\0")
.split("\0")
)
if dirty_files != [""]:
LOGGER.error(
"Worktree is dirty, commit or stash changes and try again: %s",
dirty_files,
)
raise SystemExit(1)

changes = False
for modelfile in find_tracked_models(models):
LOGGER.info("Loading model %s", modelfile)
model = loader.MelodyLoader(modelfile)
dirty = fix_model(model)
if dirty:
LOGGER.info("Saving changes to %s", modelfile)
model.resources["\0"] = _IndexWriter()
model.save()
changes = True
else:
LOGGER.info("Model is clean, not overwriting: %s", modelfile)

if changes:
if no_commit:
LOGGER.info("Not committing changes (--no-commit)")
else:
LOGGER.info("Committing changes")
subprocess.call(
["git", "commit", "-m", COMMIT_MSG],
env=os.environ | {"SKIP": "fix-capella-fragment-links"},
)


def find_tracked_models(models: cabc.Sequence[str]) -> cabc.Iterable[str]:
"""Find all tracked models in the current Git repository.
If a list of models is provided, filter the result to only include
those models.
This function always returns paths relative to the current working
directory. Any provided models that live outside of the CWD are
ignored.
"""
if not models:
filter: cabc.Container = EverythingContainer()
else:
filter = [os.path.relpath(m) for m in models]

for file in subprocess.check_output(
["git", "ls-files", "-cz"], text=True
).split("\0"):
if file.endswith(".aird") and file in filter:
yield file

def main():
"""Fix links in all tracked, unchanged files and commit the changes."""
tracked_unmodified_files = get_unmodified_tracked_files()
changed_files = []
for file in tracked_unmodified_files:
if file_name_pattern.match(file):
if fix_xml(file):
changed_files.append(file)

if changed_files:
commit_changes(changed_files)
def is_file_dirty(path: pathlib.PurePosixPath) -> bool:
"""Check if the given file is dirty (worktree != index)."""
p = subprocess.run(
["git", "diff-index", "--quiet", "--", path],
check=False,
)
return p.returncode != 0


def fix_model(model: loader.MelodyLoader) -> bool:
"""Fix the links in the provided model.
Returns
-------
bool
True if the model was modified, False otherwise.
"""
dirty = False
for element in model.iterall():
for attr, value in element.attrib.items():
if attr == "href":
include_target_type = False
else:
include_target_type = None

try:
links = tuple(helpers.split_links(value))
except ValueError:
continue

new_links: list[str] = []
for link in links:
_, _, target_id = link.partition("#")
try:
target = model.follow_link(None, target_id)
except KeyError:
LOGGER.error("Cannot repair dangling link: #%s", target_id)
else:
link = model.create_link(
element,
target,
include_target_type=include_target_type,
)
new_links.append(link)

new_value = " ".join(new_links)
if new_value != value.strip():
element.attrib[attr] = new_value
dirty = True

return dirty


class EverythingContainer(cabc.Container):
"""A container that returns True for any key."""

def __contains__(self, key: object) -> bool:
"""Return True."""
return True


class _IndexWriter(capellambse.FileHandler):
"""A file handler that writes to the worktree and the git index."""

def __init__(self) -> None:
super().__init__(".")

def get_model_info(self) -> modelinfo.ModelInfo:
return modelinfo.ModelInfo(url="index:")

def open(
self,
filename: str | pathlib.PurePosixPath,
mode: t.Literal["r", "rb", "w", "wb"] = "rb",
) -> t.BinaryIO:
if "w" not in mode:
raise ValueError("IndexWriter can only be used for writing")

path = helpers.normalize_pure_path(filename, base=self.subdir)
return _IndexFile(path) # type: ignore[abstract]


class _IndexFile(t.BinaryIO):
def __init__(self, path: pathlib.PurePosixPath) -> None:
# pylint: disable=consider-using-with
self.__path = path
self.__file = open(path, "wb")

def __enter__(self) -> _IndexFile:
return self

def __exit__(self, *args: object) -> None:
self.close()

def __del__(self) -> None:
import warnings

warnings.warn("IndexFile should be closed explicitly", ResourceWarning)
self.close()

def close(self) -> None:
self.__file.close()
subprocess.check_call(["git", "add", self.__path])

def write(self, s: bytes) -> int: # type: ignore[override]
return self.__file.write(s)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
]

[project.scripts]
fix-links = "capella_git_hooks.fix_links:main"
fix-capella-fragment-links = "capella_git_hooks.fix_links:main"

[project.urls]
Homepage = "https://github.com/DSD-DBS/capella-git-hooks"
Expand Down

0 comments on commit 580356d

Please sign in to comment.