Add the sphinx-alt-text-validator tool to VCS install it in each API …

…repo (#2445) This PR adds a tool that all the APIs should use to avoid having images without alt text. By having the script in a central repository, we could use the VCS method to pip install it everywhere instead of duplicating the same script 9 times. The tool is currently being used in qiskit, runtime, and transpiler. Code -> https://github.com/Qiskit/qiskit/blob/main/tools/verify_images.py The code added in this PR is the same but split into two files (`__init__.py` and `verify_images.py`) and with the possibility of using two new arguments to specify the folder where the linter will check their files and an argument to enumerate all the files the user wants to skip, replacing the `ALLOWLIST_MISSING_ALT_TEXT` list that was defined at the beginning of the script. To install and test the tool we use ```bash pip install -e "git+https://github.com/arnaucasau/documentation.git@AC/sphinx-alt-text-validator#egg=sphinx-alt-text-validator&subdirectory=scripts/image-tester" ``` and run `sphinx-alt-text-validator -f [FolderToCheck]` > [!NOTE] > This PR only copies the script to the repo and adds the necessary files to make it installable from the API repos. In a follow-up, I'm planning to add some tests and maybe do a little refactoring to the verify_image.py --------- Co-authored-by: Frank Harkins <frankharkins@hotmail.co.uk>
Qiskit · Dec 9, 2024 · bc34108 · bc34108
1 parent 21c31bc
commit bc34108
Show file tree

Hide file tree

Showing 3 changed files with 148 additions and 0 deletions.
diff --git a/scripts/image-tester/pyproject.toml b/scripts/image-tester/pyproject.toml
@@ -0,0 +1,21 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "sphinx-alt-text-validator"
+version = "0.0.1"
+authors = [
+  { name="Qiskit docs team" },
+]
+description = "This is a tool that helps improving the accessibility of a project that uses Sphinx to build their documentation by detecting images without alt text defined"
+requires-python = ">=3.8"
+license = "Apache-2.0"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+]
+
+[project.scripts]
+sphinx-alt-text-validator = "sphinx_alt_text_validator:main"
diff --git a/scripts/image-tester/sphinx_alt_text_validator/__init__.py b/scripts/image-tester/sphinx_alt_text_validator/__init__.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+# This code is part of Qiskit.
+#
+# (C) Copyright IBM 2024
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+from .verify_images import validate_image
+import multiprocessing
+import glob
+import sys
+import argparse
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(prog="verify_images.py")
+    parser.add_argument("-f", "--folder", required=True)
+    parser.add_argument("-s", "--skip", nargs="+")
+    args = parser.parse_args()
+
+    skip_list = args.skip or []
+    files = glob.glob(f"{args.folder}/**/*.py", recursive=True)
+    filtered_files = [file for file in files if file not in skip_list]
+
+    with multiprocessing.Pool() as pool:
+        results = pool.map(validate_image, filtered_files)
+
+    failed_files = {
+        file: image_errors for file, image_errors in results if image_errors
+    }
+
+    if not failed_files:
+        print("✅ All images have alt text")
+        sys.exit(0)
+
+    print("💔 Some images are missing the alt text", file=sys.stderr)
+
+    for file, image_errors in failed_files.items():
+        print(f"\nErrors found in {file}:", file=sys.stderr)
+
+        for image_error in image_errors:
+            print(image_error, file=sys.stderr)
+
+    print(
+        "\nAlt text is crucial for making documentation accessible to all users.",
+        "It should serve the same purpose as the images on the page,",
+        "conveying the same meaning rather than describing visual characteristics.",
+        "When an image contains words that are important to understanding the content,",
+        "the alt text should include those words as well.",
+        file=sys.stderr,
+    )
+
+    sys.exit(1)
diff --git a/scripts/image-tester/sphinx_alt_text_validator/verify_images.py b/scripts/image-tester/sphinx_alt_text_validator/verify_images.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+# This code is part of Qiskit.
+#
+# (C) Copyright IBM 2024
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""Utility script to verify that all images have alt text"""
+
+from pathlib import Path
+
+
+def is_image(line: str) -> bool:
+    """Determine if a line is an image"""
+    return line.strip().startswith((".. image:", ".. plot:"))
+
+
+def is_option(line: str) -> bool:
+    """Determine if a line is an option"""
+    return line.strip().startswith(":")
+
+
+def is_valid_image(options: list[str]) -> bool:
+    """Validate one single image"""
+    alt_exists = any(option.strip().startswith(":alt:") for option in options)
+    nofigs_exists = any(option.strip().startswith(":nofigs:") for option in options)
+
+    # Only `.. plot::`` directives without the `:nofigs:` option are required to have alt text.
+    # Meanwhile, all `.. image::` directives need alt text and they don't have a `:nofigs:` option.
+    return alt_exists or nofigs_exists
+
+
+def validate_image(file_path: str) -> tuple[str, list[str]]:
+    """Validate all the images of a single file"""
+
+    invalid_images: list[str] = []
+
+    lines = Path(file_path).read_text().splitlines()
+
+    image_found = False
+    options: list[str] = []
+
+    for line_index, line in enumerate(lines):
+        if image_found:
+            if is_option(line):
+                options.append(line)
+                continue
+
+            # Else, the prior image_found has no more options so we should determine if it was valid.
+            #
+            # Note that, either way, we do not early exit out of the loop iteration because this `line`
+            # might be the start of a new image.
+            if not is_valid_image(options):
+                image_line = line_index - len(options)
+                invalid_images.append(
+                    f"- Error in line {image_line}: {lines[image_line-1].strip()}"
+                )
+
+        image_found = is_image(line)
+        options = []
+
+    return (file_path, invalid_images)