Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Process / return a new typed Lang class in i18n methods #183

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- New `creator.Creator.convert_and_check_metadata` to convert metadata to bytes or str for known use cases and check proper type is passed to libzim
- Add svg2png image conversion function #113
- Add `conversion.convert_svg2png` image conversion function + support for SVG in `probing.format_for` #113
- Add `i18n.Lang` class used as typed result of i18n operations #151

## Changed

Expand All @@ -31,6 +32,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **BREAKING** `creator.Creator.add_metadata` and `creator.Creator.validate_metadata` now only accepts `bytes | str` as value (it must have been converted before call)
- **BREAKING** second argument of `creator.Creator.add_metadata` has been renamed to `value` instead of `content` to align with other methods
- When a type issue arises in metadata checks, wrong value type is displayed in exception
- **BREAKING** `i18n.get_language_details()`, `i18n.get_iso_lang_data()`, `i18n.find_language_names()` and `i18n.update_with_macro` now process / return a new typed `Lang` class #151
- **BREAKING** Rename `i18.NotFound` to `i18n.NotFoundError`
benoit74 marked this conversation as resolved.
Show resolved Hide resolved

### Fixed

Expand Down
106 changes: 77 additions & 29 deletions src/zimscraperlib/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,63 @@ def setlocale(root_dir: pathlib.Path, locale_name: str):
) from exc


def get_iso_lang_data(lang: str) -> tuple[dict, dict | None]:
"""ISO-639-x languages details for lang. Raises NotFound
class Lang(dict):

Included keys: iso-639-1, iso-639-2b, iso-639-2t, iso-639-3, iso-639-5
english, iso_types
@property
def iso_639_1(self) -> str | None:
"""ISO-639-1 language code"""
return self["iso-639-1"]

See get_language_details() for details"""
@property
def iso_639_2b(self) -> str | None:
"""ISO-639-2b language code"""
return self["iso-639-2b"]

@property
def iso_639_2t(self) -> str | None:
"""ISO-639-2t language code"""
return self["iso-639-2t"]

@property
def iso_639_3(self) -> str | None:
"""ISO-639-3 language code"""
return self["iso-639-3"]

@property
def iso_639_5(self) -> str | None:
"""ISO-639-5 language code"""
return self["iso-639-5"]

@property
def english(self) -> str:
"""language name in English"""
return self["english"]

@property
def native(self) -> str:
"""language name in native language"""
return self["native"]

@property
def iso_types(self) -> list[str]:
"""list of supported iso types"""
return self["iso_types"]

@property
def query(self) -> list[str]:
"""Query issued for these language details"""
return self["query"]

@property
def querytype(self) -> list[str]:
"""Type of query issued to retrieve language details"""
return self["querytype"]


def get_iso_lang_data(lang: str) -> tuple[Lang, Lang | None]:
"""ISO-639-x languages details for lang. Raises NotFoundError

Returns a tuple (main_language, macro_language | None)"""

iso_types = []

Expand Down Expand Up @@ -105,9 +155,9 @@ def replace_types(new_type: str) -> str:
if str(getattr(isolang, code_type)).lower() == lang.lower():
iso_types.append(replace_types(code_type))

lang_data = {
f"iso-639-{lang_}": getattr(isolang, f"pt{lang_}") for lang_ in ISO_LEVELS
}
lang_data = Lang(
**{f"iso-639-{lang_}": getattr(isolang, f"pt{lang_}") for lang_ in ISO_LEVELS}
)
lang_data.update({"english": isolang.name, "iso_types": iso_types})

if isolang.macro():
Expand All @@ -118,53 +168,51 @@ def replace_types(new_type: str) -> str:
return lang_data, None


def find_language_names(
query: str, lang_data: dict | None = None
) -> tuple[str | None, str | None]:
"""(native, english) language names for lang with help from language_details dict
def find_language_names(query: str, lang_data: Lang | None = None) -> tuple[str, str]:
"""(native, english) language names for lang with help from lang_data

Falls back to English name if available or query if not"""
if lang_data is None:
lang_data = get_language_details(query, failsafe=True) or {}
lang_data = get_language_details(query, failsafe=True)
if not lang_data:
return query, query

try:
query_locale = babel.Locale.parse(query)
return query_locale.get_display_name(), query_locale.get_display_name("en")
if native_display_name := query_locale.get_display_name():
if english_display_name := query_locale.get_display_name("en"):
return native_display_name, english_display_name
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
pass

# ISO code lookup order matters (most qualified first)!
for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]:
try:
query_locale = babel.Locale.parse(lang_data.get(iso_level))
return query_locale.get_display_name(), query_locale.get_display_name("en")
if native_display_name := query_locale.get_display_name():
if english_display_name := query_locale.get_display_name("en"):
return native_display_name, english_display_name
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
pass
default = lang_data.get("english", query)
default = lang_data.get("english") or query
return default, default


def update_with_macro(lang_data: dict, macro_data: dict | None):
def update_with_macro(lang_data: Lang, macro_data: Lang | None):
"""update empty keys from lang_data with ones of macro_data"""
if macro_data:
for key, value in macro_data.items():
if key in lang_data and not lang_data[key]:
if key in lang_data and not lang_data.get(key):
lang_data[key] = value
return lang_data


def get_language_details(query: str, *, failsafe: bool | None = False) -> dict | None:
def get_language_details(
query: str, failsafe: bool | None = False # noqa: FBT002
) -> Lang | None:
"""language details dict from query.

Raises NotFound or return `und` language details if failsafe

iso-639-1: str ISO-639-1 language code
iso-639-2b: str ISO-639-2b language code
iso-639-2t: str ISO-639-2t language code
iso-639-3: str ISO-639-3 language code
iso-639-5: str ISO-639-5 language code
english: str language name in English
native: str language name in is native language
iso_types: [str] list of supported iso types
When query fails, either raises NotFoundError or return None, based on failsafe

"""

Expand Down
60 changes: 59 additions & 1 deletion tests/i18n/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

import locale
import pathlib
from unittest.mock import Mock

import pytest

from zimscraperlib.i18n import (
Lang,
NotFoundError,
_,
find_language_names,
Expand Down Expand Up @@ -190,7 +192,19 @@ def test_lang_details(query, expected):
with pytest.raises(NotFoundError):
get_language_details(query)
else:
assert get_language_details(query) == expected
result = get_language_details(query)
assert result == expected
if result:
assert result.iso_639_1 == expected.get("iso-639-1")
assert result.iso_639_2b == expected.get("iso-639-2b")
assert result.iso_639_2t == expected.get("iso-639-2t")
assert result.iso_639_3 == expected.get("iso-639-3")
assert result.iso_639_5 == expected.get("iso-639-5")
assert result.english == expected.get("english")
assert result.native == expected.get("native")
assert result.iso_types == expected.get("iso_types")
assert result.query == expected.get("query")
assert result.querytype == expected.get("querytype")


@pytest.mark.parametrize(
Expand All @@ -201,6 +215,7 @@ def test_lang_details(query, expected):
("bm", ("bamanakan", "Bambara")),
("zh", ("中文", "Chinese")),
("ar", ("العربية", "Arabic")),
("qq", ("qq", "qq")),
],
)
def test_lang_name(query, expected):
Expand All @@ -214,3 +229,46 @@ def test_lang_name(query, expected):
def test_translation(lang, expected):
setlocale(pathlib.Path(__file__).parent, lang)
assert _("Hello World!") == expected


@pytest.mark.parametrize(
"dict_data",
[{}, {"iso-639-1": "ar"}],
)
def test_lang_equals(dict_data):
assert Lang(dict_data) == Lang(dict_data)
assert Lang(dict_data) == Lang({**dict_data})


@pytest.mark.parametrize(
"dict_data_left, dict_data_right",
[
({}, {"iso-639-1": "ar"}),
({"iso-639-1": "ar"}, {"iso-639-1": "ab"}),
({"iso-639-1": "ar"}, {"iso-639-2": "ar"}),
],
)
def test_lang_not_equals(dict_data_left, dict_data_right):
assert Lang(dict_data_left) != Lang(dict_data_right)
assert Lang(dict_data_left) != "foo"


@pytest.mark.parametrize(
"babel_native_return, babel_english_return, expected_native, expected_english",
[
("Native value", "English value", "Native value", "English value"),
(None, "English value", "German", "German"),
("Native value", None, "German", "German"),
],
)
def test_find_language_names(
mocker, babel_native_return, babel_english_return, expected_native, expected_english
):
mock_locale = Mock()
mock_locale.get_display_name.side_effect = lambda lang=None: (
babel_native_return if lang is None else babel_english_return
)

mocker.patch("babel.Locale.parse", return_value=mock_locale)

assert find_language_names("de") == (expected_native, expected_english)
Loading