Skip to content

Commit ad54f02

Browse files
committed
Process / return a new typed Lang class in i18n methods
1 parent 0feff56 commit ad54f02

File tree

3 files changed

+111
-22
lines changed

3 files changed

+111
-22
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515
- Automatically index PDF documents content #167
1616
- Automatically set proper title on PDF documents #168
1717

18+
### Changed
19+
- **BREAKING** `i18n.get_language_details()`, `i18n.get_iso_lang_data()`, `i18n.find_language_names()` and `i18n.update_with_macro` now process / return a new typed `Lang` class #151
20+
1821
### Fixed
1922

2023
- Metadata length validation is buggy for unicode strings #158

src/zimscraperlib/i18n.py

Lines changed: 95 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,78 @@ def setlocale(root_dir: pathlib.Path, locale_name: str):
6565
) from exc
6666

6767

68-
def get_iso_lang_data(lang: str) -> tuple[dict, dict | None]:
68+
class Lang:
69+
def __init__(self, lang_data: dict):
70+
self._lang_data = lang_data
71+
72+
@property
73+
def iso_639_1(self) -> str | None:
74+
"""ISO-639-1 language code"""
75+
return self._lang_data["iso-639-1"]
76+
77+
@property
78+
def iso_639_2b(self) -> str | None:
79+
"""ISO-639-2b language code"""
80+
return self._lang_data["iso-639-2b"]
81+
82+
@property
83+
def iso_639_2t(self) -> str | None:
84+
"""ISO-639-2t language code"""
85+
return self._lang_data["iso-639-2t"]
86+
87+
@property
88+
def iso_639_3(self) -> str | None:
89+
"""ISO-639-3 language code"""
90+
return self._lang_data["iso-639-3"]
91+
92+
@property
93+
def iso_639_5(self) -> str | None:
94+
"""ISO-639-5 language code"""
95+
return self._lang_data["iso-639-5"]
96+
97+
@property
98+
def english(self) -> str:
99+
"""language name in English"""
100+
return self._lang_data["english"]
101+
102+
@property
103+
def native(self) -> str:
104+
"""language name in native language"""
105+
return self._lang_data["native"]
106+
107+
@property
108+
def iso_types(self) -> list[str]:
109+
"""list of supported iso types"""
110+
return self._lang_data["iso_types"]
111+
112+
@property
113+
def query(self) -> list[str]:
114+
"""Query issued for these language details"""
115+
return self._lang_data["query"]
116+
117+
@property
118+
def querytype(self) -> list[str]:
119+
"""Type of query issued to retrieve language details"""
120+
return self._lang_data["querytype"]
121+
122+
def get(self, attribute: str) -> str | None:
123+
"""Return language code for given ISO code"""
124+
return self._lang_data.get(attribute)
125+
126+
def __eq__(self, value: object) -> bool:
127+
"""Compare with another value, either Lang or dict supported"""
128+
if isinstance(value, Lang):
129+
return self._lang_data == value._lang_data
130+
if isinstance(value, dict):
131+
return self._lang_data == value
132+
return False
133+
134+
def __iter__(self):
135+
"""Iterate over dict properties"""
136+
return self._lang_data.__iter__()
137+
138+
139+
def get_iso_lang_data(lang: str) -> tuple[Lang, Lang | None]:
69140
"""ISO-639-x languages details for lang. Raises NotFound
70141
71142
Included keys: iso-639-1, iso-639-2b, iso-639-2t, iso-639-3, iso-639-5
@@ -112,51 +183,54 @@ def replace_types(new_type: str) -> str:
112183

113184
if isolang.macro():
114185
return (
115-
lang_data,
186+
Lang(lang_data),
116187
get_iso_lang_data(isolang.macro().name)[0],
117188
) # first item in the returned tuple
118-
return lang_data, None
189+
return Lang(lang_data), None
119190

120191

121-
def find_language_names(query: str, lang_data: dict | None = None) -> tuple[str, str]:
122-
"""(native, english) language names for lang with help from language_details dict
192+
def find_language_names(query: str, lang_data: Lang | None = None) -> tuple[str, str]:
193+
"""(native, english) language names for lang with help from lang_data
123194
124195
Falls back to English name if available or query if not"""
125196
if lang_data is None:
126-
lang_data = get_language_details(query, failsafe=True) or {}
197+
lang_data = get_language_details(query, failsafe=True)
198+
if not lang_data:
199+
return query, query
200+
127201
try:
128202
query_locale = babel.Locale.parse(query)
129-
return query_locale.get_display_name(), query_locale.get_display_name(
130-
"en"
131-
) # pyright: ignore
203+
if native_display_name := query_locale.get_display_name():
204+
if english_display_name := query_locale.get_display_name("en"):
205+
return native_display_name, english_display_name
132206
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
133207
pass
134208

135209
# ISO code lookup order matters (most qualified first)!
136210
for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]:
137211
try:
138212
query_locale = babel.Locale.parse(lang_data.get(iso_level))
139-
return query_locale.get_display_name(), query_locale.get_display_name(
140-
"en"
141-
) # pyright: ignore
213+
if native_display_name := query_locale.get_display_name():
214+
if english_display_name := query_locale.get_display_name("en"):
215+
return native_display_name, english_display_name
142216
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
143217
pass
144-
default = lang_data.get("english", query)
218+
default = lang_data.get("english") or query
145219
return default, default
146220

147221

148-
def update_with_macro(lang_data: dict, macro_data: dict):
222+
def update_with_macro(lang_data: Lang, macro_data: Lang | None):
149223
"""update empty keys from lang_data with ones of macro_data"""
150224
if macro_data:
151-
for key, value in macro_data.items():
152-
if key in lang_data and not lang_data[key]:
153-
lang_data[key] = value
225+
for key, value in macro_data._lang_data.items():
226+
if key in lang_data and not lang_data.get(key):
227+
lang_data._lang_data[key] = value
154228
return lang_data
155229

156230

157231
def get_language_details(
158232
query: str, failsafe: bool | None = False # noqa: FBT002
159-
) -> dict:
233+
) -> Lang | None:
160234
"""language details dict from query.
161235
162236
Raises NotFound or return `und` language details if failsafe
@@ -194,12 +268,12 @@ def get_language_details(
194268
lang_data, macro_data = get_iso_lang_data(adjusted_query)
195269
except NotFound as exc:
196270
if failsafe:
197-
return None # pyright: ignore
271+
return None
198272
raise exc
199273

200-
iso_data = update_with_macro(lang_data, macro_data) # pyright: ignore
274+
iso_data = update_with_macro(lang_data, macro_data)
201275
native_name, english_name = find_language_names(native_query, iso_data)
202-
iso_data.update(
276+
iso_data._lang_data.update(
203277
{
204278
"english": english_name,
205279
"native": native_name,

tests/i18n/test_i18n.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,19 @@ def test_lang_details(query, expected):
190190
with pytest.raises(NotFound):
191191
get_language_details(query)
192192
else:
193-
assert get_language_details(query) == expected
193+
result = get_language_details(query)
194+
assert result == expected
195+
if result:
196+
assert result.iso_639_1 == expected.get("iso-639-1")
197+
assert result.iso_639_2b == expected.get("iso-639-2b")
198+
assert result.iso_639_2t == expected.get("iso-639-2t")
199+
assert result.iso_639_3 == expected.get("iso-639-3")
200+
assert result.iso_639_5 == expected.get("iso-639-5")
201+
assert result.english == expected.get("english")
202+
assert result.native == expected.get("native")
203+
assert result.iso_types == expected.get("iso_types")
204+
assert result.query == expected.get("query")
205+
assert result.querytype == expected.get("querytype")
194206

195207

196208
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)