-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rewrite the link fixer script #2
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
- id: fix-links | ||
- id: fix-capella-fragment-links | ||
name: fix links after breaking merge | ||
description: After a breaking merge by the merge tool, this script fixes links starting with index:/. | ||
entry: fix-links | ||
entry: fix-capella-fragment-links | ||
language: python | ||
always_run: true | ||
stages: [post-commit] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,113 +1,242 @@ | ||
# Copyright DB Netz AG and contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
"""A script which fixes wrong links with an index:/ prefix. | ||
"""A script which fixes broken links within Capella models. | ||
|
||
See https://github.com/eclipse/capella/issues/2725 for more information. | ||
This is intended to fix the issue where the merge tool adds an | ||
``index:/`` or similar prefix to links within Capella models. See | ||
`eclipse/capella#2725`__ for more information. | ||
|
||
__ https://github.com/eclipse/capella/issues/2725 | ||
""" | ||
from __future__ import annotations | ||
|
||
import collections.abc as cabc | ||
import logging | ||
import os | ||
import re | ||
import pathlib | ||
import subprocess | ||
import sys | ||
|
||
from capellambse.loader import exs | ||
from lxml import etree as ET | ||
|
||
# pylint: disable=line-too-long | ||
root_pattern = re.compile( | ||
r"(index:/).*\.(capella|aird)#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}" | ||
import typing as t | ||
|
||
import capellambse | ||
import click | ||
from capellambse import helpers, loader | ||
from capellambse.loader import modelinfo | ||
|
||
LOGGER = logging.getLogger(__name__) | ||
# FIXME This isn't conventional-commits compliant, and might fail checks | ||
COMMIT_MSG = "fix[by-script]: merge tool index-prefix" | ||
|
||
|
||
@click.command() | ||
@click.option( | ||
"-m", | ||
"--model", | ||
"models", | ||
help=( | ||
"Path to a Capella model to fix." | ||
" If not given, run on all tracked models." | ||
" Can be specified multiple times." | ||
), | ||
multiple=True, | ||
) | ||
fragment_pattern = re.compile( | ||
r"(index:/fragments/).*\.(capella|aird)fragment#[0-9a-f]{8}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{4}\b-[0-9a-f]{12}" | ||
) | ||
file_name_pattern = re.compile(r".*\.(capella|aird)(fragment)?") | ||
|
||
|
||
def get_unmodified_tracked_files() -> list[str]: | ||
"""Return a list of git tracked files not having uncommitted changes.""" | ||
repo_path = os.getcwd() | ||
try: | ||
modified_files = subprocess.check_output( | ||
["git", "diff", "--name-only"], cwd=repo_path, text=True | ||
).splitlines() | ||
all_tracked_files = subprocess.check_output( | ||
["git", "ls-files"], cwd=repo_path, text=True | ||
).splitlines() | ||
unmodified_tracked_files = [ | ||
file for file in all_tracked_files if file not in modified_files | ||
] | ||
|
||
return unmodified_tracked_files | ||
except subprocess.CalledProcessError as e: | ||
# Handle any Git command errors | ||
print("Git command error:", e) | ||
return [] | ||
|
||
|
||
def fix_xml(path: str) -> bool: | ||
"""Fix the XML located in the provided path and write changes to disk.""" | ||
is_fragment = path.startswith("fragments/") | ||
assert is_fragment or "/" not in path | ||
if path.endswith(".capella") or path.endswith(".capellafragment"): | ||
line_length = exs.LINE_LENGTH | ||
@click.option("--no-commit", is_flag=True, help="Do not commit changes.") | ||
def main( | ||
models: cabc.Sequence[str], | ||
no_commit: bool, | ||
): | ||
"""Fix links in all tracked Capella models.""" | ||
logging.basicConfig(level="INFO") | ||
|
||
dirty_files = ( | ||
subprocess.check_output( | ||
["git", "diff", "--name-only", "-z"], text=True | ||
) | ||
.rstrip("\0") | ||
.split("\0") | ||
) | ||
if dirty_files != [""]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So we won't be able to split changes in different commits anymore? E.g. I changed something in the SA layer and in the OA layer and want to commit it separately - in this case the script will fail. I know the struggle here and in a perfect world the user would have had committed his changes from the one layer before making changes in another one, but we should at least make people aware of that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pre-commit should stash away all not-to-be-committed changes before it runs any hooks, so it should actually work the same in that regard. Although I'm not 100% sure if it also does that for "post" hooks - maybe something to check before merging this. |
||
LOGGER.error( | ||
"Worktree is dirty, commit or stash changes and try again: %s", | ||
dirty_files, | ||
) | ||
raise SystemExit(1) | ||
|
||
changes = False | ||
for modelfile in find_tracked_models(models): | ||
LOGGER.info("Loading model %s", modelfile) | ||
model = loader.MelodyLoader(modelfile) | ||
dirty = fix_model(model) | ||
if dirty: | ||
LOGGER.info("Saving changes to %s", modelfile) | ||
model.resources["\0"] = _IndexWriter() | ||
model.save() | ||
changes = True | ||
else: | ||
LOGGER.info("Model is clean, not saving: %s", modelfile) | ||
|
||
if changes: | ||
if no_commit: | ||
LOGGER.info("Not committing changes (--no-commit)") | ||
else: | ||
LOGGER.info("Committing changes") | ||
subprocess.call( | ||
["git", "commit", "-m", COMMIT_MSG], | ||
env=os.environ | {"SKIP": "fix-capella-fragment-links"}, | ||
) | ||
|
||
|
||
def find_tracked_models(models: cabc.Sequence[str]) -> cabc.Iterable[str]: | ||
"""Find all tracked models in the current Git repository. | ||
|
||
If a list of models is provided, filter the result to only include | ||
those models. | ||
|
||
This function always returns paths relative to the current working | ||
directory. Any provided models that live outside of the CWD are | ||
ignored. | ||
""" | ||
if not models: | ||
filter: cabc.Container = EverythingContainer() | ||
else: | ||
line_length = sys.maxsize | ||
|
||
changed = False | ||
|
||
tree = ET.parse(path) | ||
root = tree.getroot() | ||
|
||
root_replacement = "../" if is_fragment else "" | ||
fragment_replacement = "" if is_fragment else "fragments/" | ||
|
||
for elem in root.iter(): | ||
for key, value in list(elem.attrib.items()): | ||
# Use the regex pattern to find and replace attribute values | ||
while match := root_pattern.search(value): | ||
changed = True | ||
index_match = match.span(1) | ||
value = ( | ||
value[: index_match[0]] | ||
+ root_replacement | ||
+ value[index_match[1] :] | ||
) | ||
filter = [os.path.relpath(m) for m in models] | ||
|
||
while match := fragment_pattern.search(value): | ||
changed = True | ||
index_match = match.span(1) | ||
value = ( | ||
value[: index_match[0]] | ||
+ fragment_replacement | ||
+ value[index_match[1] :] | ||
for file in subprocess.check_output( | ||
["git", "ls-files", "-cz"], text=True | ||
).split("\0"): | ||
if file.endswith(".aird") and file in filter: | ||
yield file | ||
|
||
|
||
def is_file_dirty(path: pathlib.PurePosixPath) -> bool: | ||
"""Check if the given file is dirty (worktree != index).""" | ||
p = subprocess.run( | ||
["git", "diff-index", "--quiet", "--", path], | ||
check=False, | ||
) | ||
return p.returncode != 0 | ||
|
||
|
||
def fix_model(model: loader.MelodyLoader) -> bool: | ||
"""Fix the links in the provided model. | ||
|
||
Returns | ||
------- | ||
bool | ||
True if the model was modified, False otherwise. | ||
""" | ||
dirty = False | ||
for element in model.iterall(): | ||
if element.tag == "semanticResources": | ||
if element.text == "": | ||
element.getparent().remove(element) | ||
continue | ||
|
||
targetfrag = element.text.rsplit("/", 1)[-1] | ||
frags = [i for i in model.trees if i.name == targetfrag] | ||
if len(frags) != 1: | ||
raise RuntimeError( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we execute this as a post-commit-hook, the hook would in this case fail, but the users commit (which may contains several broken links) would still be present and could be pushed. Imo it would be a better to fix the rest of the model on best effort and report errors at the end of the hook There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So do you propose to just not touch links with ambiguous target at all (but still fix all others)? I suppose that would at least fix some of the issues, but the model would still be in a broken state anyways. In any case, definitely something the user should be aware of. I'm just not sure how we could make the user aware, as Capella generally hides hook output completely (even if it fails). Maybe we could try to show a message box with zenity or something similar if that's installed? |
||
f"Ambiguous fragment name: {element.text}" | ||
" - please report this as bug at" | ||
" https://github.com/DSD-DBS/capella-git-hooks" | ||
) | ||
|
||
elem.attrib[key] = value | ||
sourcefrag = model.find_fragment(element) | ||
if sourcefrag == frags[0]: | ||
element.getparent().remove(element) | ||
continue | ||
|
||
if changed: | ||
exs.write(tree, path, line_length=line_length, siblings=True) | ||
link = os.path.relpath(frags[0], sourcefrag.parent) | ||
if link != element.text: | ||
element.text = link | ||
dirty = True | ||
|
||
return changed | ||
for attr, value in element.attrib.items(): | ||
if attr == "href": | ||
include_target_type = False | ||
else: | ||
include_target_type = None | ||
|
||
try: | ||
links = tuple(helpers.split_links(value)) | ||
except ValueError: | ||
continue | ||
|
||
def commit_changes(changed_files_to_be_committed: list[str]): | ||
"""Add and commit the provided list of files using a predefined message.""" | ||
subprocess.call(["git", "add"] + changed_files_to_be_committed) | ||
subprocess.call( | ||
["git", "commit", "-m", "fix[by-script]: merge tool index-prefix"] | ||
) | ||
new_links: list[str] = [] | ||
for link in links: | ||
_, _, target_id = link.partition("#") | ||
try: | ||
target = model.follow_link(None, target_id) | ||
except KeyError: | ||
LOGGER.error("Cannot repair dangling link: #%s", target_id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The user will never find this error message. Maybe we should think about collecting error messages and let the hook fail after repairing the model on best effort basis? |
||
else: | ||
link = model.create_link( | ||
element, | ||
target, | ||
include_target_type=include_target_type, | ||
) | ||
new_links.append(link) | ||
|
||
new_value = " ".join(new_links) | ||
if new_value != value.strip(): | ||
element.attrib[attr] = new_value | ||
dirty = True | ||
|
||
return dirty | ||
|
||
|
||
class EverythingContainer(cabc.Container): | ||
"""A container that returns True for any key.""" | ||
|
||
def __contains__(self, key: object) -> bool: | ||
"""Return True.""" | ||
return True | ||
|
||
|
||
class _IndexWriter(capellambse.FileHandler): | ||
"""A file handler that writes to the worktree and the git index.""" | ||
|
||
def __init__(self) -> None: | ||
super().__init__(".") | ||
|
||
def get_model_info(self) -> modelinfo.ModelInfo: | ||
return modelinfo.ModelInfo(url="index:") | ||
|
||
def open( | ||
self, | ||
filename: str | pathlib.PurePosixPath, | ||
mode: t.Literal["r", "rb", "w", "wb"] = "rb", | ||
) -> t.BinaryIO: | ||
if "w" not in mode: | ||
raise ValueError("IndexWriter can only be used for writing") | ||
|
||
path = helpers.normalize_pure_path(filename, base=self.subdir) | ||
return _IndexFile(path) # type: ignore[abstract] | ||
|
||
|
||
class _IndexFile(t.BinaryIO): | ||
def __init__(self, path: pathlib.PurePosixPath) -> None: | ||
# pylint: disable=consider-using-with | ||
self.__path = path | ||
self.__file = open(path, "wb") | ||
|
||
def __enter__(self) -> _IndexFile: | ||
return self | ||
|
||
def __exit__(self, *args: object) -> None: | ||
self.close() | ||
|
||
def __del__(self) -> None: | ||
import warnings | ||
|
||
warnings.warn("IndexFile should be closed explicitly", ResourceWarning) | ||
self.close() | ||
|
||
def main(): | ||
"""Fix links in all tracked, unchanged files and commit the changes.""" | ||
tracked_unmodified_files = get_unmodified_tracked_files() | ||
changed_files = [] | ||
for file in tracked_unmodified_files: | ||
if file_name_pattern.match(file): | ||
if fix_xml(file): | ||
changed_files.append(file) | ||
def close(self) -> None: | ||
self.__file.close() | ||
subprocess.check_call(["git", "add", self.__path]) | ||
|
||
if changed_files: | ||
commit_changes(changed_files) | ||
def write(self, s: bytes) -> int: # type: ignore[override] | ||
return self.__file.write(s) | ||
|
||
|
||
if __name__ == "__main__": | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As this repository is for capella-git-hooks, I don't think it is necessary to include capella in the hook ID. I get the point of adding
fragment
to the ID, but I am not sure whether this justifies the additional work it takes to change the hook in the pre-commit config of the projects...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at this scope, yes, but the hook ID is also used in the (pre-commit wide)
SKIP
environment variable. That's why I included "capella" there. See the commit command that this script executes (translated to bash):