Skip to content

Commit c76f694

Browse files
committed
fixup! Metadata length validation: count graphemes instead of unicode code points
1 parent ef2aceb commit c76f694

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ dependencies = [
1818
"beautifulsoup4>=4.9.3,<5.0",
1919
"lxml>=4.6.3,<6.0",
2020
"optimize-images>=1.3.6,<2.0",
21+
# regex has nNo upper-bound due to "date-based" release numbers, no semver, so their
22+
# promise is that they will never (or always) break the API, and the API is very
23+
# limited and we use only a very small subset of it.
2124
"regex>=2020.7.14",
2225
# youtube-dl should be updated as frequently as possible
2326
"yt-dlp"

src/zimscraperlib/zim/metadata.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
from zimscraperlib.image.probing import is_valid_image
1919

2020

21-
def grapheme_count(value: str) -> int:
22-
"""count number of graphemes (visually perceived characters) in a given string"""
21+
def nb_grapheme_for(value: str) -> int:
22+
"""Number of graphemes (visually perceived characters) in a given string"""
2323
return len(regex.findall(r"\X", value))
2424

2525

@@ -50,7 +50,7 @@ def validate_standard_str_types(name: str, value: str):
5050

5151
def validate_title(name: str, value: str):
5252
"""ensures Title metadata is within recommended length"""
53-
if name == "Title" and grapheme_count(value) > RECOMMENDED_MAX_TITLE_LENGTH:
53+
if name == "Title" and nb_grapheme_for(value) > RECOMMENDED_MAX_TITLE_LENGTH:
5454
raise ValueError(f"{name} is too long.")
5555

5656

@@ -92,7 +92,7 @@ def validate_description(name: str, value: str):
9292
"""ensures Description metadata is with required length"""
9393
if (
9494
name == "Description"
95-
and grapheme_count(value) > MAXIMUM_DESCRIPTION_METADATA_LENGTH
95+
and nb_grapheme_for(value) > MAXIMUM_DESCRIPTION_METADATA_LENGTH
9696
):
9797
raise ValueError(f"{name} is too long.")
9898

@@ -101,7 +101,7 @@ def validate_longdescription(name: str, value: str):
101101
"""ensures LongDescription metadata is with required length"""
102102
if (
103103
name == "LongDescription"
104-
and grapheme_count(value) > MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH
104+
and nb_grapheme_for(value) > MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH
105105
):
106106
raise ValueError(f"{name} is too long.")
107107

0 commit comments

Comments
 (0)