Skip to content

Commit

Permalink
Add the sphinx-alt-text-validator tool to VCS install it in each API …
Browse files Browse the repository at this point in the history
…repo (#2445)

This PR adds a tool that all the APIs should use to avoid having images
without alt text. By having the script in a central repository, we could
use the VCS method to pip install it everywhere instead of duplicating
the same script 9 times.

The tool is currently being used in qiskit, runtime, and transpiler.
Code ->
https://github.com/Qiskit/qiskit/blob/main/tools/verify_images.py

The code added in this PR is the same but split into two files
(`__init__.py` and `verify_images.py`) and with the possibility of using
two new arguments to specify the folder where the linter will check
their files and an argument to enumerate all the files the user wants to
skip, replacing the `ALLOWLIST_MISSING_ALT_TEXT` list that was defined
at the beginning of the script.

To install and test the tool we use

```bash
pip install -e "git+https://github.com/arnaucasau/documentation.git@AC/sphinx-alt-text-validator#egg=sphinx-alt-text-validator&subdirectory=scripts/image-tester"
```
and run `sphinx-alt-text-validator -f [FolderToCheck]`

> [!NOTE]  
> This PR only copies the script to the repo and adds the necessary
files to make it installable from the API repos. In a follow-up, I'm
planning to add some tests and maybe do a little refactoring to the
verify_image.py

---------

Co-authored-by: Frank Harkins <frankharkins@hotmail.co.uk>
  • Loading branch information
arnaucasau and frankharkins authored Dec 9, 2024
1 parent 21c31bc commit bc34108
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 0 deletions.
21 changes: 21 additions & 0 deletions scripts/image-tester/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "sphinx-alt-text-validator"
version = "0.0.1"
authors = [
{ name="Qiskit docs team" },
]
description = "This is a tool that helps improving the accessibility of a project that uses Sphinx to build their documentation by detecting images without alt text defined"
requires-python = ">=3.8"
license = "Apache-2.0"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
]

[project.scripts]
sphinx-alt-text-validator = "sphinx_alt_text_validator:main"
59 changes: 59 additions & 0 deletions scripts/image-tester/sphinx_alt_text_validator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python3
# This code is part of Qiskit.
#
# (C) Copyright IBM 2024
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

from .verify_images import validate_image
import multiprocessing
import glob
import sys
import argparse


def main() -> None:
parser = argparse.ArgumentParser(prog="verify_images.py")
parser.add_argument("-f", "--folder", required=True)
parser.add_argument("-s", "--skip", nargs="+")
args = parser.parse_args()

skip_list = args.skip or []
files = glob.glob(f"{args.folder}/**/*.py", recursive=True)
filtered_files = [file for file in files if file not in skip_list]

with multiprocessing.Pool() as pool:
results = pool.map(validate_image, filtered_files)

failed_files = {
file: image_errors for file, image_errors in results if image_errors
}

if not failed_files:
print("✅ All images have alt text")
sys.exit(0)

print("💔 Some images are missing the alt text", file=sys.stderr)

for file, image_errors in failed_files.items():
print(f"\nErrors found in {file}:", file=sys.stderr)

for image_error in image_errors:
print(image_error, file=sys.stderr)

print(
"\nAlt text is crucial for making documentation accessible to all users.",
"It should serve the same purpose as the images on the page,",
"conveying the same meaning rather than describing visual characteristics.",
"When an image contains words that are important to understanding the content,",
"the alt text should include those words as well.",
file=sys.stderr,
)

sys.exit(1)
68 changes: 68 additions & 0 deletions scripts/image-tester/sphinx_alt_text_validator/verify_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
# This code is part of Qiskit.
#
# (C) Copyright IBM 2024
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""Utility script to verify that all images have alt text"""

from pathlib import Path


def is_image(line: str) -> bool:
"""Determine if a line is an image"""
return line.strip().startswith((".. image:", ".. plot:"))


def is_option(line: str) -> bool:
"""Determine if a line is an option"""
return line.strip().startswith(":")


def is_valid_image(options: list[str]) -> bool:
"""Validate one single image"""
alt_exists = any(option.strip().startswith(":alt:") for option in options)
nofigs_exists = any(option.strip().startswith(":nofigs:") for option in options)

# Only `.. plot::`` directives without the `:nofigs:` option are required to have alt text.
# Meanwhile, all `.. image::` directives need alt text and they don't have a `:nofigs:` option.
return alt_exists or nofigs_exists


def validate_image(file_path: str) -> tuple[str, list[str]]:
"""Validate all the images of a single file"""

invalid_images: list[str] = []

lines = Path(file_path).read_text().splitlines()

image_found = False
options: list[str] = []

for line_index, line in enumerate(lines):
if image_found:
if is_option(line):
options.append(line)
continue

# Else, the prior image_found has no more options so we should determine if it was valid.
#
# Note that, either way, we do not early exit out of the loop iteration because this `line`
# might be the start of a new image.
if not is_valid_image(options):
image_line = line_index - len(options)
invalid_images.append(
f"- Error in line {image_line}: {lines[image_line-1].strip()}"
)

image_found = is_image(line)
options = []

return (file_path, invalid_images)

0 comments on commit bc34108

Please sign in to comment.