Skip to content

Commit b9db22f

Browse files
committed
Move to real classes for i18n classes for proper typing in strict mode
1 parent f68d568 commit b9db22f

File tree

2 files changed

+120
-161
lines changed

2 files changed

+120
-161
lines changed

src/zimscraperlib/i18n.py

Lines changed: 85 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
1-
#!/usr/bin/env python3
2-
# vim: ai ts=4 sts=4 et sw=4 nu
3-
41
from __future__ import annotations
52

63
import re
74

85
import babel
9-
import iso639
10-
import iso639.exceptions
6+
import iso639 # pyright: ignore[reportMissingTypeStubs]
7+
import iso639.exceptions # pyright: ignore[reportMissingTypeStubs]
118

129
ISO_LEVELS = ["1", "2b", "2t", "3", "5"]
1310

@@ -16,66 +13,75 @@ class NotFoundError(ValueError):
1613
pass
1714

1815

19-
class Lang(dict):
20-
21-
@property
22-
def iso_639_1(self) -> str | None:
23-
"""ISO-639-1 language code"""
24-
return self["iso-639-1"]
25-
26-
@property
27-
def iso_639_2b(self) -> str | None:
28-
"""ISO-639-2b language code"""
29-
return self["iso-639-2b"]
30-
31-
@property
32-
def iso_639_2t(self) -> str | None:
33-
"""ISO-639-2t language code"""
34-
return self["iso-639-2t"]
35-
36-
@property
37-
def iso_639_3(self) -> str | None:
38-
"""ISO-639-3 language code"""
39-
return self["iso-639-3"]
40-
41-
@property
42-
def iso_639_5(self) -> str | None:
43-
"""ISO-639-5 language code"""
44-
return self["iso-639-5"]
45-
46-
@property
47-
def english(self) -> str:
48-
"""language name in English"""
49-
return self["english"]
50-
51-
@property
52-
def native(self) -> str:
53-
"""language name in native language"""
54-
return self["native"]
55-
56-
@property
57-
def iso_types(self) -> list[str]:
58-
"""list of supported iso types"""
59-
return self["iso_types"]
60-
61-
@property
62-
def query(self) -> str:
63-
"""Query issued for these language details"""
64-
return self["query"]
65-
66-
@property
67-
def querytype(self) -> str:
68-
"""Type of query issued to retrieve language details"""
69-
return self["querytype"]
16+
class Lang:
17+
18+
def __init__(self, requested_lang: str, iso639_lang_obj: iso639.Lang):
19+
self.iso_639_1 = iso639_lang_obj.pt1 or None
20+
self.iso_639_2b = iso639_lang_obj.pt2b or None
21+
self.iso_639_2t = iso639_lang_obj.pt2t or None
22+
self.iso_639_3 = iso639_lang_obj.pt3 or None
23+
self.iso_639_5 = iso639_lang_obj.pt5 or None
24+
self.english = iso639_lang_obj.name or None
25+
self.iso_types = [
26+
part_level
27+
for iso_level, part_level in [
28+
(f"pt{level}", f"part{level}") for level in ISO_LEVELS
29+
]
30+
+ [("name", "name")]
31+
if getattr(iso639_lang_obj, iso_level).lower() == requested_lang.lower()
32+
]
33+
34+
35+
class LangAndDetails:
36+
def __init__(
37+
self, lang: Lang, english_name: str, native: str, querytype: str, query: str
38+
):
39+
self.iso_639_1 = lang.iso_639_1
40+
self.iso_639_2b = lang.iso_639_2b
41+
self.iso_639_2t = lang.iso_639_2t
42+
self.iso_639_3 = lang.iso_639_3
43+
self.iso_639_5 = lang.iso_639_5
44+
self.iso_types = lang.iso_types
45+
self.english = english_name
46+
self.native = native
47+
self.querytype = querytype
48+
self.query = query
49+
50+
def __eq__(self, value: object) -> bool:
51+
if not isinstance(value, LangAndDetails):
52+
return False
53+
54+
return (
55+
self.iso_639_1 == value.iso_639_1
56+
and self.iso_639_2b == value.iso_639_2b
57+
and self.iso_639_2t == value.iso_639_2t
58+
and self.iso_639_3 == value.iso_639_3
59+
and self.iso_639_5 == value.iso_639_5
60+
and self.english == value.english
61+
and self.native == value.native
62+
)
63+
64+
def __hash__(self) -> int:
65+
return hash(
66+
(
67+
self.iso_639_1,
68+
self.iso_639_2b,
69+
self.iso_639_2t,
70+
self.iso_639_3,
71+
self.iso_639_5,
72+
self.english,
73+
self.native,
74+
self.query,
75+
self.querytype,
76+
)
77+
)
7078

7179

7280
def get_iso_lang_data(lang: str) -> tuple[Lang, Lang | None]:
7381
"""ISO-639-x languages details for lang. Raises NotFoundError
7482
7583
Returns a tuple (main_language, macro_language | None)"""
7684

77-
iso_types = []
78-
7985
try:
8086
isolang = iso639.Lang(lang)
8187
except (
@@ -84,39 +90,16 @@ def get_iso_lang_data(lang: str) -> tuple[Lang, Lang | None]:
8490
) as exc:
8591
raise NotFoundError("Not a valid iso language name/code") from exc
8692

87-
def replace_types(new_type: str) -> str:
88-
# convert new iso_types from iso639-lang Pypi package to old iso_types from
89-
# iso-639 package, since we were returning these values for a long time
90-
if new_type == "pt1":
91-
return "part1"
92-
elif new_type == "pt2b":
93-
return "part2b"
94-
elif new_type == "pt2t":
95-
return "part2t"
96-
elif new_type == "pt3":
97-
return "part3"
98-
elif new_type == "pt5":
99-
return "part5"
100-
return new_type
101-
102-
for code_type in [f"pt{lang_}" for lang_ in ISO_LEVELS] + ["name"]:
103-
# the `if` condition below is a bit hackish but it is the only way to know
104-
# if the passed value is matching a code type or not with new python-i639
105-
# library and we do not expect weird things to happen here
106-
if str(getattr(isolang, code_type)).lower() == lang.lower():
107-
iso_types.append(replace_types(code_type))
108-
109-
lang_data = Lang(
110-
**{f"iso-639-{lang_}": getattr(isolang, f"pt{lang_}") for lang_ in ISO_LEVELS}
111-
)
112-
lang_data.update({"english": isolang.name, "iso_types": iso_types})
113-
114-
# first item in the returned tuple
93+
ourlang = Lang(lang, isolang)
94+
11595
macro = isolang.macro()
116-
return (lang_data, get_iso_lang_data(macro.name)[0] if macro else None)
96+
97+
return (ourlang, get_iso_lang_data(macro.name)[0] if macro else None)
11798

11899

119-
def find_language_names(query: str, lang_data: Lang | None = None) -> tuple[str, str]:
100+
def find_language_names(
101+
query: str, lang_data: Lang | LangAndDetails | None = None
102+
) -> tuple[str, str]:
120103
"""(native, english) language names for lang with help from lang_data
121104
122105
Falls back to English name if available or query if not"""
@@ -134,30 +117,33 @@ def find_language_names(query: str, lang_data: Lang | None = None) -> tuple[str,
134117
pass
135118

136119
# ISO code lookup order matters (most qualified first)!
137-
for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]:
120+
for iso_level in [f"iso_639_{level}" for level in reversed(ISO_LEVELS)]:
138121
try:
139-
query_locale = babel.Locale.parse(lang_data.get(iso_level))
122+
query_locale = babel.Locale.parse(getattr(lang_data, iso_level))
140123
if native_display_name := query_locale.get_display_name():
141124
if english_display_name := query_locale.get_display_name("en"):
142125
return native_display_name, english_display_name
143126
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
144127
pass
145-
default = lang_data.get("english") or query
128+
default = lang_data.english or query
146129
return default, default
147130

148131

149132
def update_with_macro(lang_data: Lang, macro_data: Lang | None):
150133
"""update empty keys from lang_data with ones of macro_data"""
151-
if macro_data:
152-
for key, value in macro_data.items():
153-
if key in lang_data and not lang_data.get(key):
154-
lang_data[key] = value
134+
if not macro_data:
135+
return lang_data
136+
137+
for iso_level in [f"iso_639_{level}" for level in ISO_LEVELS]:
138+
if not getattr(lang_data, iso_level):
139+
setattr(lang_data, iso_level, getattr(macro_data, iso_level))
140+
155141
return lang_data
156142

157143

158144
def get_language_details(
159145
query: str, failsafe: bool | None = False # noqa: FBT002
160-
) -> Lang | None:
146+
) -> LangAndDetails | None:
161147
"""language details dict from query.
162148
163149
When query fails, either raises NotFoundError or return None, based on failsafe
@@ -191,17 +177,10 @@ def get_language_details(
191177

192178
iso_data = update_with_macro(lang_data, macro_data)
193179
native_name, english_name = find_language_names(native_query, iso_data)
194-
iso_data.update(
195-
{
196-
"english": english_name,
197-
"native": native_name,
198-
"querytype": query_type,
199-
"query": query,
200-
}
201-
)
202-
return iso_data
180+
return LangAndDetails(iso_data, english_name, native_name, query_type, query)
203181

204182

205183
def is_valid_iso_639_3(code: str) -> bool:
206184
"""whether code is a valid ISO-639-3 code"""
207-
return (get_language_details(code, failsafe=True) or {}).get("iso-639-3") == code
185+
lang = get_language_details(code, failsafe=True)
186+
return lang is not None and lang.iso_639_3 == code

0 commit comments

Comments
 (0)