Skip to content

Commit 6e700d8

Browse files
committed
Process / return a new typed Lang class in i18n methods
1 parent 6eb77a5 commit 6e700d8

File tree

3 files changed

+160
-34
lines changed

3 files changed

+160
-34
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3131
- **BREAKING** `creator.Creator.add_metadata` and `creator.Creator.validate_metadata` now only accepts `bytes | str` as value (it must have been converted before call)
3232
- **BREAKING** second argument of `creator.Creator.add_metadata` has been renamed to `value` instead of `content` to align with other methods
3333
- When a type issue arises in metadata checks, wrong value type is displayed in exception
34+
- **BREAKING** `i18n.get_language_details()`, `i18n.get_iso_lang_data()`, `i18n.find_language_names()` and `i18n.update_with_macro` now process / return a new typed `Lang` class #151
35+
- **BREAKING** Rename `i18.NotFound` to `i18n.NotFoundError`
3436

3537
### Fixed
3638

src/zimscraperlib/i18n.py

Lines changed: 99 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,81 @@ def setlocale(root_dir: pathlib.Path, locale_name: str):
6565
) from exc
6666

6767

68-
def get_iso_lang_data(lang: str) -> tuple[dict, dict | None]:
69-
"""ISO-639-x languages details for lang. Raises NotFound
70-
71-
Included keys: iso-639-1, iso-639-2b, iso-639-2t, iso-639-3, iso-639-5
72-
english, iso_types
73-
74-
See get_language_details() for details"""
68+
class Lang:
69+
def __init__(self, lang_data: dict):
70+
self._lang_data = lang_data
71+
72+
@property
73+
def iso_639_1(self) -> str | None:
74+
"""ISO-639-1 language code"""
75+
return self._lang_data["iso-639-1"]
76+
77+
@property
78+
def iso_639_2b(self) -> str | None:
79+
"""ISO-639-2b language code"""
80+
return self._lang_data["iso-639-2b"]
81+
82+
@property
83+
def iso_639_2t(self) -> str | None:
84+
"""ISO-639-2t language code"""
85+
return self._lang_data["iso-639-2t"]
86+
87+
@property
88+
def iso_639_3(self) -> str | None:
89+
"""ISO-639-3 language code"""
90+
return self._lang_data["iso-639-3"]
91+
92+
@property
93+
def iso_639_5(self) -> str | None:
94+
"""ISO-639-5 language code"""
95+
return self._lang_data["iso-639-5"]
96+
97+
@property
98+
def english(self) -> str:
99+
"""language name in English"""
100+
return self._lang_data["english"]
101+
102+
@property
103+
def native(self) -> str:
104+
"""language name in native language"""
105+
return self._lang_data["native"]
106+
107+
@property
108+
def iso_types(self) -> list[str]:
109+
"""list of supported iso types"""
110+
return self._lang_data["iso_types"]
111+
112+
@property
113+
def query(self) -> list[str]:
114+
"""Query issued for these language details"""
115+
return self._lang_data["query"]
116+
117+
@property
118+
def querytype(self) -> list[str]:
119+
"""Type of query issued to retrieve language details"""
120+
return self._lang_data["querytype"]
121+
122+
def get(self, attribute: str) -> str | None:
123+
"""Return language code for given ISO code"""
124+
return self._lang_data.get(attribute)
125+
126+
def __eq__(self, value: object) -> bool:
127+
"""Compare with another value, either Lang or dict supported"""
128+
if isinstance(value, Lang):
129+
return self._lang_data == value._lang_data
130+
if isinstance(value, dict):
131+
return self._lang_data == value
132+
return False
133+
134+
def __iter__(self):
135+
"""Iterate over dict properties"""
136+
return self._lang_data.__iter__()
137+
138+
139+
def get_iso_lang_data(lang: str) -> tuple[Lang, Lang | None]:
140+
"""ISO-639-x languages details for lang. Raises NotFoundError
141+
142+
Returns a tuple (main_language, macro_language | None)"""
75143

76144
iso_types = []
77145

@@ -112,59 +180,57 @@ def replace_types(new_type: str) -> str:
112180

113181
if isolang.macro():
114182
return (
115-
lang_data,
183+
Lang(lang_data),
116184
get_iso_lang_data(isolang.macro().name)[0],
117185
) # first item in the returned tuple
118-
return lang_data, None
186+
return Lang(lang_data), None
119187

120188

121-
def find_language_names(
122-
query: str, lang_data: dict | None = None
123-
) -> tuple[str | None, str | None]:
124-
"""(native, english) language names for lang with help from language_details dict
189+
def find_language_names(query: str, lang_data: Lang | None = None) -> tuple[str, str]:
190+
"""(native, english) language names for lang with help from lang_data
125191
126192
Falls back to English name if available or query if not"""
127193
if lang_data is None:
128-
lang_data = get_language_details(query, failsafe=True) or {}
194+
lang_data = get_language_details(query, failsafe=True)
195+
if not lang_data:
196+
return query, query
197+
129198
try:
130199
query_locale = babel.Locale.parse(query)
131-
return query_locale.get_display_name(), query_locale.get_display_name("en")
200+
if native_display_name := query_locale.get_display_name():
201+
if english_display_name := query_locale.get_display_name("en"):
202+
return native_display_name, english_display_name
132203
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
133204
pass
134205

135206
# ISO code lookup order matters (most qualified first)!
136207
for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]:
137208
try:
138209
query_locale = babel.Locale.parse(lang_data.get(iso_level))
139-
return query_locale.get_display_name(), query_locale.get_display_name("en")
210+
if native_display_name := query_locale.get_display_name():
211+
if english_display_name := query_locale.get_display_name("en"):
212+
return native_display_name, english_display_name
140213
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
141214
pass
142-
default = lang_data.get("english", query)
215+
default = lang_data.get("english") or query
143216
return default, default
144217

145218

146-
def update_with_macro(lang_data: dict, macro_data: dict | None):
219+
def update_with_macro(lang_data: Lang, macro_data: Lang | None):
147220
"""update empty keys from lang_data with ones of macro_data"""
148221
if macro_data:
149-
for key, value in macro_data.items():
150-
if key in lang_data and not lang_data[key]:
151-
lang_data[key] = value
222+
for key, value in macro_data._lang_data.items():
223+
if key in lang_data and not lang_data.get(key):
224+
lang_data._lang_data[key] = value
152225
return lang_data
153226

154227

155-
def get_language_details(query: str, *, failsafe: bool | None = False) -> dict | None:
228+
def get_language_details(
229+
query: str, failsafe: bool | None = False # noqa: FBT002
230+
) -> Lang | None:
156231
"""language details dict from query.
157232
158-
Raises NotFound or return `und` language details if failsafe
159-
160-
iso-639-1: str ISO-639-1 language code
161-
iso-639-2b: str ISO-639-2b language code
162-
iso-639-2t: str ISO-639-2t language code
163-
iso-639-3: str ISO-639-3 language code
164-
iso-639-5: str ISO-639-5 language code
165-
english: str language name in English
166-
native: str language name in is native language
167-
iso_types: [str] list of supported iso types
233+
When query fails, either raises NotFoundError or return None, based on failsafe
168234
169235
"""
170236

@@ -195,7 +261,7 @@ def get_language_details(query: str, *, failsafe: bool | None = False) -> dict |
195261

196262
iso_data = update_with_macro(lang_data, macro_data)
197263
native_name, english_name = find_language_names(native_query, iso_data)
198-
iso_data.update(
264+
iso_data._lang_data.update(
199265
{
200266
"english": english_name,
201267
"native": native_name,

tests/i18n/test_i18n.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
import locale
55
import pathlib
6+
from unittest.mock import Mock
67

78
import pytest
89

910
from zimscraperlib.i18n import (
11+
Lang,
1012
NotFoundError,
1113
_,
1214
find_language_names,
@@ -190,7 +192,19 @@ def test_lang_details(query, expected):
190192
with pytest.raises(NotFoundError):
191193
get_language_details(query)
192194
else:
193-
assert get_language_details(query) == expected
195+
result = get_language_details(query)
196+
assert result == expected
197+
if result:
198+
assert result.iso_639_1 == expected.get("iso-639-1")
199+
assert result.iso_639_2b == expected.get("iso-639-2b")
200+
assert result.iso_639_2t == expected.get("iso-639-2t")
201+
assert result.iso_639_3 == expected.get("iso-639-3")
202+
assert result.iso_639_5 == expected.get("iso-639-5")
203+
assert result.english == expected.get("english")
204+
assert result.native == expected.get("native")
205+
assert result.iso_types == expected.get("iso_types")
206+
assert result.query == expected.get("query")
207+
assert result.querytype == expected.get("querytype")
194208

195209

196210
@pytest.mark.parametrize(
@@ -201,6 +215,7 @@ def test_lang_details(query, expected):
201215
("bm", ("bamanakan", "Bambara")),
202216
("zh", ("中文", "Chinese")),
203217
("ar", ("العربية", "Arabic")),
218+
("qq", ("qq", "qq")),
204219
],
205220
)
206221
def test_lang_name(query, expected):
@@ -214,3 +229,46 @@ def test_lang_name(query, expected):
214229
def test_translation(lang, expected):
215230
setlocale(pathlib.Path(__file__).parent, lang)
216231
assert _("Hello World!") == expected
232+
233+
234+
@pytest.mark.parametrize(
235+
"dict_data",
236+
[{}, {"iso-639-1": "ar"}],
237+
)
238+
def test_lang_equals(dict_data):
239+
assert Lang(dict_data) == Lang(dict_data)
240+
assert Lang(dict_data) == Lang({**dict_data})
241+
242+
243+
@pytest.mark.parametrize(
244+
"dict_data_left, dict_data_right",
245+
[
246+
({}, {"iso-639-1": "ar"}),
247+
({"iso-639-1": "ar"}, {"iso-639-1": "ab"}),
248+
({"iso-639-1": "ar"}, {"iso-639-2": "ar"}),
249+
],
250+
)
251+
def test_lang_not_equals(dict_data_left, dict_data_right):
252+
assert Lang(dict_data_left) != Lang(dict_data_right)
253+
assert Lang(dict_data_left) != "foo"
254+
255+
256+
@pytest.mark.parametrize(
257+
"babel_native_return, babel_english_return, expected_native, expected_english",
258+
[
259+
("Native value", "English value", "Native value", "English value"),
260+
(None, "English value", "German", "German"),
261+
("Native value", None, "German", "German"),
262+
],
263+
)
264+
def test_find_language_names(
265+
mocker, babel_native_return, babel_english_return, expected_native, expected_english
266+
):
267+
mock_locale = Mock()
268+
mock_locale.get_display_name.side_effect = lambda lang=None: (
269+
babel_native_return if lang is None else babel_english_return
270+
)
271+
272+
mocker.patch("babel.Locale.parse", return_value=mock_locale)
273+
274+
assert find_language_names("de") == (expected_native, expected_english)

0 commit comments

Comments
 (0)