From efcffa3cd974166bcb7aeafc9f4f6ab796c9e207 Mon Sep 17 00:00:00 2001 From: Arnau Casau <47946624+arnaucasau@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:48:43 +0100 Subject: [PATCH] Add a linter to check the images' alt text (#138) * Create linter * fix script * refactor linter and add it to CI * refactor loop * fix lint --------- Co-authored-by: Yaiza --- .github/workflows/ci.yml | 1 + tools/verify_images.py | 108 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 tools/verify_images.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd24b82..5b319dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,7 @@ jobs: - name: Run lint run: | python -m black --check . + python tools/verify_images.py - name: Build documentation uses: ./.github/actions/build-docs diff --git a/tools/verify_images.py b/tools/verify_images.py new file mode 100644 index 0000000..98fb8f0 --- /dev/null +++ b/tools/verify_images.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# This code is part of Qiskit. +# +# (C) Copyright IBM 2024 +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Utility script to verify that all images have alt text""" + +from pathlib import Path +import multiprocessing +import sys +import glob + +# List of allowlist files that the checker will not verify +ALLOWLIST_MISSING_ALT_TEXT = [] + + +def is_image(line: str) -> bool: + return line.strip().startswith((".. image:", ".. plot:")) + + +def is_option(line: str) -> bool: + return line.strip().startswith(":") + + +def is_valid_image(options: list[str]) -> bool: + alt_exists = any(option.strip().startswith(":alt:") for option in options) + nofigs_exists = any(option.strip().startswith(":nofigs:") for option in options) + + # Only `.. plot::`` directives without the `:nofigs:` option are required to have alt text. + # Meanwhile, all `.. image::` directives need alt text and they don't have a `:nofigs:` option. + return alt_exists or nofigs_exists + + +def validate_image(file_path: str) -> tuple[str, list[str]]: + """Validate all the images of a single file""" + + if file_path in ALLOWLIST_MISSING_ALT_TEXT: + return [file_path, []] + + invalid_images: list[str] = [] + + lines = Path(file_path).read_text().splitlines() + + image_found = False + options: list[str] = [] + + for line_index, line in enumerate(lines): + if image_found: + if is_option(line): + options.append(line) + continue + + # Else, the prior image_found has no more options so we should determine if it was valid. + # + # Note that, either way, we do not early exit out of the loop iteration because this `line` + # might be the start of a new image. + if not is_valid_image(options): + image_line = line_index - len(options) + invalid_images.append( + f"- Error in line {image_line}: {lines[image_line-1].strip()}" + ) + + image_found = is_image(line) + options = [] + + return (file_path, invalid_images) + + +def main() -> None: + files = glob.glob("qiskit_ibm_transpiler/**/*.py", recursive=True) + + with multiprocessing.Pool() as pool: + results = pool.map(validate_image, files) + + failed_files = { + file: image_errors for file, image_errors in results if image_errors + } + + if not len(failed_files): + print("✅ All images have alt text") + sys.exit(0) + + print("💔 Some images are missing the alt text", file=sys.stderr) + + for file, image_errors in failed_files.items(): + print(f"\nErrors found in {file}:", file=sys.stderr) + + for image_error in image_errors: + print(image_error, file=sys.stderr) + + print( + "\nAlt text is crucial for making documentation accessible to all users. It should serve the same purpose as the images on the page, conveying the same meaning rather than describing visual characteristics. When an image contains words that are important to understanding the content, the alt text should include those words as well.", + file=sys.stderr, + ) + + sys.exit(1) + + +if __name__ == "__main__": + main()