Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite the link fixer script #2

Merged
merged 2 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
- id: fix-links
- id: fix-capella-fragment-links
name: fix links after breaking merge
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As this repository is for capella-git-hooks, I don't think it is necessary to include capella in the hook ID. I get the point of adding fragment to the ID, but I am not sure whether this justifies the additional work it takes to change the hook in the pre-commit config of the projects...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this scope, yes, but the hook ID is also used in the (pre-commit wide) SKIP environment variable. That's why I included "capella" there. See the commit command that this script executes (translated to bash):

SKIP=fix-capella-fragment-links git commit -m "Fix links yada yada"

description: After a breaking merge by the merge tool, this script fixes links starting with index:/.
entry: fix-links
entry: fix-capella-fragment-links
language: python
always_run: true
stages: [post-commit]
313 changes: 221 additions & 92 deletions capella_git_hooks/fix_links.py
Original file line number Diff line number Diff line change
@@ -1,113 +1,242 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""A script which fixes wrong links with an index:/ prefix.
"""A script which fixes broken links within Capella models.

See https://github.com/eclipse/capella/issues/2725 for more information.
This is intended to fix the issue where the merge tool adds an
``index:/`` or similar prefix to links within Capella models. See
`eclipse/capella#2725`__ for more information.

__ https://github.com/eclipse/capella/issues/2725
"""
from __future__ import annotations

import collections.abc as cabc
import logging
import os
import re
import pathlib
import subprocess
import sys

from capellambse.loader import exs
from lxml import etree as ET

# pylint: disable=line-too-long
root_pattern = re.compile(
r"(index:/).*\.(capella|aird)#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}"
import typing as t

import capellambse
import click
from capellambse import helpers, loader
from capellambse.loader import modelinfo

LOGGER = logging.getLogger(__name__)
# FIXME This isn't conventional-commits compliant, and might fail checks
COMMIT_MSG = "fix[by-script]: merge tool index-prefix"


@click.command()
@click.option(
"-m",
"--model",
"models",
help=(
"Path to a Capella model to fix."
" If not given, run on all tracked models."
" Can be specified multiple times."
),
multiple=True,
)
fragment_pattern = re.compile(
r"(index:/fragments/).*\.(capella|aird)fragment#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}"
)
file_name_pattern = re.compile(r".*\.(capella|aird)(fragment)?")


def get_unmodified_tracked_files() -> list[str]:
"""Return a list of git tracked files not having uncommitted changes."""
repo_path = os.getcwd()
try:
modified_files = subprocess.check_output(
["git", "diff", "--name-only"], cwd=repo_path, text=True
).splitlines()
all_tracked_files = subprocess.check_output(
["git", "ls-files"], cwd=repo_path, text=True
).splitlines()
unmodified_tracked_files = [
file for file in all_tracked_files if file not in modified_files
]

return unmodified_tracked_files
except subprocess.CalledProcessError as e:
# Handle any Git command errors
print("Git command error:", e)
return []


def fix_xml(path: str) -> bool:
"""Fix the XML located in the provided path and write changes to disk."""
is_fragment = path.startswith("fragments/")
assert is_fragment or "/" not in path
if path.endswith(".capella") or path.endswith(".capellafragment"):
line_length = exs.LINE_LENGTH
@click.option("--no-commit", is_flag=True, help="Do not commit changes.")
def main(
models: cabc.Sequence[str],
no_commit: bool,
):
"""Fix links in all tracked Capella models."""
logging.basicConfig(level="INFO")

dirty_files = (
subprocess.check_output(
["git", "diff", "--name-only", "-z"], text=True
)
.rstrip("\0")
.split("\0")
)
if dirty_files != [""]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we won't be able to split changes in different commits anymore? E.g. I changed something in the SA layer and in the OA layer and want to commit it separately - in this case the script will fail. I know the struggle here and in a perfect world the user would have had committed his changes from the one layer before making changes in another one, but we should at least make people aware of that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pre-commit should stash away all not-to-be-committed changes before it runs any hooks, so it should actually work the same in that regard. Although I'm not 100% sure if it also does that for "post" hooks - maybe something to check before merging this.

LOGGER.error(
"Worktree is dirty, commit or stash changes and try again: %s",
dirty_files,
)
raise SystemExit(1)

changes = False
for modelfile in find_tracked_models(models):
LOGGER.info("Loading model %s", modelfile)
model = loader.MelodyLoader(modelfile)
dirty = fix_model(model)
if dirty:
LOGGER.info("Saving changes to %s", modelfile)
model.resources["\0"] = _IndexWriter()
model.save()
changes = True
else:
LOGGER.info("Model is clean, not saving: %s", modelfile)

if changes:
if no_commit:
LOGGER.info("Not committing changes (--no-commit)")
else:
LOGGER.info("Committing changes")
subprocess.call(
["git", "commit", "-m", COMMIT_MSG],
env=os.environ | {"SKIP": "fix-capella-fragment-links"},
)


def find_tracked_models(models: cabc.Sequence[str]) -> cabc.Iterable[str]:
"""Find all tracked models in the current Git repository.

If a list of models is provided, filter the result to only include
those models.

This function always returns paths relative to the current working
directory. Any provided models that live outside of the CWD are
ignored.
"""
if not models:
filter: cabc.Container = EverythingContainer()
else:
line_length = sys.maxsize

changed = False

tree = ET.parse(path)
root = tree.getroot()

root_replacement = "../" if is_fragment else ""
fragment_replacement = "" if is_fragment else "fragments/"

for elem in root.iter():
for key, value in list(elem.attrib.items()):
# Use the regex pattern to find and replace attribute values
while match := root_pattern.search(value):
changed = True
index_match = match.span(1)
value = (
value[: index_match[0]]
+ root_replacement
+ value[index_match[1] :]
)
filter = [os.path.relpath(m) for m in models]

while match := fragment_pattern.search(value):
changed = True
index_match = match.span(1)
value = (
value[: index_match[0]]
+ fragment_replacement
+ value[index_match[1] :]
for file in subprocess.check_output(
["git", "ls-files", "-cz"], text=True
).split("\0"):
if file.endswith(".aird") and file in filter:
yield file


def is_file_dirty(path: pathlib.PurePosixPath) -> bool:
"""Check if the given file is dirty (worktree != index)."""
p = subprocess.run(
["git", "diff-index", "--quiet", "--", path],
check=False,
)
return p.returncode != 0


def fix_model(model: loader.MelodyLoader) -> bool:
"""Fix the links in the provided model.

Returns
-------
bool
True if the model was modified, False otherwise.
"""
dirty = False
for element in model.iterall():
if element.tag == "semanticResources":
if element.text == "":
element.getparent().remove(element)
continue

targetfrag = element.text.rsplit("/", 1)[-1]
frags = [i for i in model.trees if i.name == targetfrag]
if len(frags) != 1:
raise RuntimeError(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we execute this as a post-commit-hook, the hook would in this case fail, but the users commit (which may contains several broken links) would still be present and could be pushed. Imo it would be a better to fix the rest of the model on best effort and report errors at the end of the hook

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So do you propose to just not touch links with ambiguous target at all (but still fix all others)? I suppose that would at least fix some of the issues, but the model would still be in a broken state anyways.

In any case, definitely something the user should be aware of. I'm just not sure how we could make the user aware, as Capella generally hides hook output completely (even if it fails). Maybe we could try to show a message box with zenity or something similar if that's installed?

f"Ambiguous fragment name: {element.text}"
" - please report this as bug at"
" https://github.com/DSD-DBS/capella-git-hooks"
)

elem.attrib[key] = value
sourcefrag = model.find_fragment(element)
if sourcefrag == frags[0]:
element.getparent().remove(element)
continue

if changed:
exs.write(tree, path, line_length=line_length, siblings=True)
link = os.path.relpath(frags[0], sourcefrag.parent)
if link != element.text:
element.text = link
dirty = True

return changed
for attr, value in element.attrib.items():
if attr == "href":
include_target_type = False
else:
include_target_type = None

try:
links = tuple(helpers.split_links(value))
except ValueError:
continue

def commit_changes(changed_files_to_be_committed: list[str]):
"""Add and commit the provided list of files using a predefined message."""
subprocess.call(["git", "add"] + changed_files_to_be_committed)
subprocess.call(
["git", "commit", "-m", "fix[by-script]: merge tool index-prefix"]
)
new_links: list[str] = []
for link in links:
_, _, target_id = link.partition("#")
try:
target = model.follow_link(None, target_id)
except KeyError:
LOGGER.error("Cannot repair dangling link: #%s", target_id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The user will never find this error message. Maybe we should think about collecting error messages and let the hook fail after repairing the model on best effort basis?

else:
link = model.create_link(
element,
target,
include_target_type=include_target_type,
)
new_links.append(link)

new_value = " ".join(new_links)
if new_value != value.strip():
element.attrib[attr] = new_value
dirty = True

return dirty


class EverythingContainer(cabc.Container):
"""A container that returns True for any key."""

def __contains__(self, key: object) -> bool:
"""Return True."""
return True


class _IndexWriter(capellambse.FileHandler):
"""A file handler that writes to the worktree and the git index."""

def __init__(self) -> None:
super().__init__(".")

def get_model_info(self) -> modelinfo.ModelInfo:
return modelinfo.ModelInfo(url="index:")

def open(
self,
filename: str | pathlib.PurePosixPath,
mode: t.Literal["r", "rb", "w", "wb"] = "rb",
) -> t.BinaryIO:
if "w" not in mode:
raise ValueError("IndexWriter can only be used for writing")

path = helpers.normalize_pure_path(filename, base=self.subdir)
return _IndexFile(path) # type: ignore[abstract]


class _IndexFile(t.BinaryIO):
def __init__(self, path: pathlib.PurePosixPath) -> None:
# pylint: disable=consider-using-with
self.__path = path
self.__file = open(path, "wb")

def __enter__(self) -> _IndexFile:
return self

def __exit__(self, *args: object) -> None:
self.close()

def __del__(self) -> None:
import warnings

warnings.warn("IndexFile should be closed explicitly", ResourceWarning)
self.close()

def main():
"""Fix links in all tracked, unchanged files and commit the changes."""
tracked_unmodified_files = get_unmodified_tracked_files()
changed_files = []
for file in tracked_unmodified_files:
if file_name_pattern.match(file):
if fix_xml(file):
changed_files.append(file)
def close(self) -> None:
self.__file.close()
subprocess.check_call(["git", "add", self.__path])

if changed_files:
commit_changes(changed_files)
def write(self, s: bytes) -> int: # type: ignore[override]
return self.__file.write(s)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
]

[project.scripts]
fix-links = "capella_git_hooks.fix_links:main"
fix-capella-fragment-links = "capella_git_hooks.fix_links:main"

[project.urls]
Homepage = "https://github.com/DSD-DBS/capella-git-hooks"
Expand Down
Loading