diff --git a/config.py b/config.py index b6d75d1..564f0c1 100644 --- a/config.py +++ b/config.py @@ -53,8 +53,7 @@ prefs.defaults["use_all_formats"] = False prefs.defaults["minimal_x_ray_count"] = 1 prefs.defaults["choose_format_manually"] = True -prefs.defaults["wiktionary_gloss_lang"] = "en" -prefs.defaults["kindle_gloss_lang"] = "en" +prefs.defaults["gloss_lang"] = "en" prefs.defaults["use_wiktionary_for_kindle"] = False prefs.defaults["remove_link_styles"] = False prefs.defaults["python_path"] = "" @@ -220,9 +219,7 @@ def open_choose_lemma_lang_dialog(self, is_kindle: bool = True) -> None: if choose_lang_dlg.exec(): lemma_lang = choose_lang_dlg.lemma_lang_box.currentData() gloss_lang = choose_lang_dlg.gloss_lang_box.currentData() - prefs["kindle_gloss_lang" if is_kindle else "wiktionary_gloss_lang"] = ( - gloss_lang - ) + prefs["gloss_lang"] = gloss_lang if is_kindle and lemma_lang == "en" and gloss_lang in ["en", "zh", "zh_cn"]: prefs["use_wiktionary_for_kindle"] = ( choose_lang_dlg.use_wiktionary_box.isChecked() @@ -436,10 +433,6 @@ class ChooseLemmaLangDialog(QDialog): def __init__(self, parent: QObject, is_kindle: bool): super().__init__(parent) self.setWindowTitle(_("Choose language")) - self.prefer_gloss_code = prefs[ - "kindle_gloss_lang" if is_kindle else "wiktionary_gloss_lang" - ] - form_layout = QFormLayout() form_layout.setFieldGrowthPolicy( QFormLayout.FieldGrowthPolicy.ExpandingFieldsGrow @@ -448,14 +441,12 @@ def __init__(self, parent: QObject, is_kindle: bool): self.lemma_lang_box = QComboBox() self.gloss_lang_box = QComboBox() language_dict = load_languages_data(get_plugin_path()) - selected_gloss_code = prefs[ - "kindle_gloss_lang" if is_kindle else "wiktionary_gloss_lang" - ] + selected_gloss_code = prefs["gloss_lang"] self.gloss_lang_box.currentIndexChanged.connect( partial(self.gloss_lang_changed, language_dict) ) for gloss_lang, lang_value in language_dict.items(): - if len(lang_value.get("lemma_languages", [])) == 0: + if lang_value.get("gloss_source", []) == "": continue gloss_lang_name = _(lang_value["name"]) self.gloss_lang_box.addItem(gloss_lang_name, gloss_lang) @@ -503,6 +494,8 @@ def gloss_lang_changed(self, lang_dict) -> None: gloss_lang = self.gloss_lang_box.currentData() self.lemma_lang_box.clear() lemma_langs = lang_dict[gloss_lang].get("lemma_languages", []) + if len(lemma_langs) == 0: + lemma_langs = lang_dict.keys() for index, lemma_lang in enumerate(lemma_langs): lemma_lang_name = _(lang_dict[lemma_lang]["name"]) self.lemma_lang_box.addItem(lemma_lang_name, lemma_lang) diff --git a/custom_lemmas.py b/custom_lemmas.py index fac5921..9b79ceb 100644 --- a/custom_lemmas.py +++ b/custom_lemmas.py @@ -210,7 +210,7 @@ def check_empty_kindle_gloss(self) -> None: if not package_name: device_not_found_dialog(self) return - custom_folder = custom_lemmas_folder(plugin_path, "en") + custom_folder = custom_lemmas_folder(plugin_path) if isinstance(package_name, str): copy_klld_from_android(package_name, custom_folder) else: diff --git a/data/languages.json b/data/languages.json index 39564f8..b93199f 100644 --- a/data/languages.json +++ b/data/languages.json @@ -24,34 +24,6 @@ "639-2": "deu", "gloss_source": "kaikki", "has_trf": false, - "lemma_languages": [ - "ca", - "cs", - "da", - "de", - "el", - "en", - "es", - "fi", - "fr", - "he", - "hr", - "it", - "ja", - "ko", - "lt", - "mk", - "nl", - "no", - "pl", - "pt", - "ro", - "ru", - "sl", - "sv", - "uk", - "zh" - ], "name": "German", "spacy": "de_core_news_" }, @@ -71,34 +43,6 @@ "639-2": "eng", "gloss_source": "kaikki", "has_trf": true, - "lemma_languages": [ - "ca", - "cs", - "da", - "de", - "el", - "en", - "es", - "fi", - "fr", - "he", - "hr", - "it", - "ja", - "ko", - "lt", - "mk", - "nl", - "no", - "pl", - "pt", - "ro", - "ru", - "sl", - "sv", - "uk", - "zh" - ], "name": "English", "spacy": "en_core_web_" }, @@ -106,34 +50,6 @@ "639-2": "spa", "gloss_source": "kaikki", "has_trf": false, - "lemma_languages": [ - "ca", - "cs", - "da", - "de", - "el", - "en", - "es", - "fi", - "fr", - "he", - "hr", - "it", - "ja", - "ko", - "lt", - "mk", - "nl", - "no", - "pl", - "pt", - "ro", - "ru", - "sl", - "sv", - "uk", - "zh" - ], "name": "Spanish", "spacy": "es_core_news_" }, @@ -151,34 +67,6 @@ "639-2": "fra", "gloss_source": "kaikki", "has_trf": false, - "lemma_languages": [ - "ca", - "cs", - "da", - "de", - "el", - "en", - "es", - "fi", - "fr", - "he", - "hr", - "it", - "ja", - "ko", - "lt", - "mk", - "nl", - "no", - "pl", - "pt", - "ro", - "ru", - "sl", - "sv", - "uk", - "zh" - ], "name": "French", "spacy": "fr_core_news_" }, @@ -215,11 +103,8 @@ }, "ja": { "639-2": "jpn", - "gloss_source": "dbnary", + "gloss_source": "kaikki", "has_trf": true, - "lemma_languages": [ - "ja" - ], "name": "Japanese", "spacy": "ja_core_news_" }, @@ -257,7 +142,7 @@ "name": "Dutch", "spacy": "nl_core_news_" }, - "no": { + "nb": { "639-2": "nob", "gloss_source": "dbnary", "has_trf": false, @@ -269,11 +154,8 @@ }, "pl": { "639-2": "pol", - "gloss_source": "dbnary", + "gloss_source": "kaikki", "has_trf": false, - "lemma_languages": [ - "pl" - ], "name": "Polish", "spacy": "pl_core_news_" }, @@ -298,34 +180,6 @@ "639-2": "rus", "gloss_source": "kaikki", "has_trf": false, - "lemma_languages": [ - "ca", - "cs", - "da", - "de", - "el", - "en", - "es", - "fi", - "fr", - "he", - "hr", - "it", - "ja", - "ko", - "lt", - "mk", - "nl", - "no", - "pl", - "pt", - "ro", - "ru", - "sl", - "sv", - "uk", - "zh" - ], "name": "Russian", "spacy": "ru_core_news_" }, @@ -357,34 +211,6 @@ "639-2": "zho", "gloss_source": "kaikki", "has_trf": true, - "lemma_languages": [ - "ca", - "cs", - "da", - "de", - "el", - "en", - "es", - "fi", - "fr", - "he", - "hr", - "it", - "ja", - "ko", - "lt", - "mk", - "nl", - "no", - "pl", - "pt", - "ro", - "ru", - "sl", - "sv", - "uk", - "zh" - ], "name": "Chinese", "spacy": "zh_core_web_" } diff --git a/deps.py b/deps.py index cbf9577..f4dfb45 100644 --- a/deps.py +++ b/deps.py @@ -1,6 +1,6 @@ -import bz2 import platform import shutil +import tarfile from pathlib import Path from typing import Any from urllib.request import urlopen @@ -10,14 +10,12 @@ from .utils import ( PROFICIENCY_RELEASE_URL, Prefs, + custom_lemmas_folder, get_plugin_path, get_spacy_model_version, - get_wiktionary_klld_path, - kindle_db_path, load_plugin_json, mac_bin_path, run_subprocess, - wiktionary_db_path, ) PY_PATH = "" @@ -146,7 +144,7 @@ def download_word_wise_file( log=None, notifications=None, ) -> None: - gloss_lang = prefs["kindle_gloss_lang" if is_kindle else "wiktionary_gloss_lang"] + gloss_lang = prefs["gloss_lang"] if notifications: notifications.put( ( @@ -156,27 +154,14 @@ def download_word_wise_file( ) ) plugin_path = get_plugin_path() - if is_kindle: - db_path = kindle_db_path(plugin_path, lemma_lang, prefs) - else: - db_path = wiktionary_db_path(plugin_path, lemma_lang, gloss_lang) - - if not db_path.exists(): - bz2_filename = db_path.with_suffix(db_path.suffix + ".bz2").name - download_and_extract(f"{PROFICIENCY_RELEASE_URL}/{bz2_filename}", db_path) - - if is_kindle: - klld_path = get_wiktionary_klld_path(plugin_path, lemma_lang, gloss_lang) - if not klld_path.exists(): - bz2_filename = klld_path.with_suffix(klld_path.suffix + ".bz2").name - download_and_extract(f"{PROFICIENCY_RELEASE_URL}/{bz2_filename}", klld_path) - - -def download_and_extract(url: str, extract_path: Path) -> None: - extract_path.parent.mkdir(parents=True, exist_ok=True) - download_path = extract_path.with_name(url.rsplit("/", 1)[-1]) + bz2_filename = f"{lemma_lang}_{gloss_lang}.tar.bz2" + url = f"{PROFICIENCY_RELEASE_URL}/{bz2_filename}" + download_folder = custom_lemmas_folder(plugin_path) + if not download_folder.is_dir(): + download_folder.mkdir() + download_path = download_folder / bz2_filename with urlopen(url) as r, open(download_path, "wb") as f: shutil.copyfileobj(r, f) - with bz2.open(download_path) as bz2_f, extract_path.open("wb") as f: - shutil.copyfileobj(bz2_f, f) + with tarfile.open(name=download_path, mode="r:bz2") as tar_f: + tar_f.extractall(download_folder) download_path.unlink() diff --git a/docs/index.rst b/docs/index.rst index b59618c..de7bb38 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -40,36 +40,38 @@ Features Supported languages ------------------- -============= ===== =================================================== -Book language X-Ray Word Wise gloss languages -============= ===== =================================================== -Bokmål ✅ Bokmål, English, Français, 中文 -Català ✅ English, Français, 中文 -Dansk ✅ English, Français, 中文 -Deutsch ✅ Deutsch, English, Français, 中文 -English ✅ English, Español, Français, Italiano, Ελληνικά, עִבְרִית‎, 中文 -Español ✅ Español, English, Français, 中文 -Français ✅ English, Français, Ελληνικά, 中文 -Hrvatski ✅ English, Français, Hrvatski, 中文 -Italiano ✅ English, Français, Italiano, 中文 -Lietuvių ✅ English, Français, Lietuvių, 中文 -Nederlands ✅ English, Français, Nederlands, 中文 -Polski ✅ English, Français, Polski, 中文 -Português ✅ English, Français, Português, 中文 -Română ✅ English, Français, 中文 -Slovenščina ✅ English, Français, 中文 -Suomi ✅ English, Français, Suomi, 中文 -Svenska ✅ English, Français, Svenska, 中文 -čeština ❌ English, Français, 中文 -Ελληνικά ✅ English, Français, Ελληνικά, 中文 -Македонски ✅ English, Français, 中文 -Русский ✅ English, Français, Русский, 中文 -Українська ✅ English, Français, 中文 -עִבְֿרִית‎ ❌ English, Français, 中文 -中文 ✅ English, Français, 中文 -日本語 ✅ English, Français, 中文, 日本語 -한국어 ✅ English, Français, 中文 -============= ===== =================================================== +Supported Word Wise definition languages are listed in the plugin's customize window. + +============= ===== +Book language X-Ray +============= ===== +Bokmål ✅ +Català ✅ +Dansk ✅ +Deutsch ✅ +English ✅ +Español ✅ +Français ✅ +Hrvatski ✅ +Italiano ✅ +Lietuvių ✅ +Nederlands ✅ +Polski ✅ +Português ✅ +Română ✅ +Slovenščina ✅ +Suomi ✅ +Svenska ✅ +čeština ❌ +Ελληνικά ✅ +Македонски ✅ +Русский ✅ +Українська ✅ +עִבְֿרִית‎ ❌ +中文 ✅ +日本語 ✅ +한국어 ✅ +============= ===== Community --------- diff --git a/dump_lemmas.py b/dump_lemmas.py index 6e7d65d..7946ae8 100644 --- a/dump_lemmas.py +++ b/dump_lemmas.py @@ -31,11 +31,11 @@ def spacy_doc_path( ): import platform - gloss_lang = prefs["kindle_gloss_lang" if is_kindle else "wiktionary_gloss_lang"] + gloss_lang = prefs["gloss_lang"] if is_kindle and not use_kindle_ww_db(lemma_lang, prefs): is_kindle = False py_version = ".".join(platform.python_version_tuple()[:2]) - path = custom_lemmas_folder(plugin_path, lemma_lang).joinpath( + path = custom_lemmas_folder(plugin_path).joinpath( f"{spacy_model or lemma_lang}_{'kindle' if is_kindle else 'wiktionary'}" f"_{gloss_lang}_{model_version}_{py_version}" ) diff --git a/metadata.py b/metadata.py index fba632d..4ef6482 100644 --- a/metadata.py +++ b/metadata.py @@ -25,7 +25,11 @@ def is_ww_supported(book_lang: str, gloss_lang: str) -> bool: from .utils import get_plugin_path, load_languages_data lang_dict = load_languages_data(get_plugin_path()) - return book_lang in lang_dict.get(gloss_lang, {}).get("lemma_languages", []) + lang_data = lang_dict.get(gloss_lang, {}) + supported_codes = lang_data.get("lemma_languages", []) + if len(supported_codes) == 0 and lang_data["gloss_source"] == "kaikki": + supported_codes = lang_dict.keys() + return book_lang in supported_codes def check_metadata(gui: Any, book_id: int, custom_x_ray: bool) -> MetaDataResult | None: @@ -62,9 +66,7 @@ def check_metadata(gui: Any, book_id: int, custom_x_ray: bool) -> MetaDataResult book_lang = supported_languages[calibre_book_lang] support_ww_list = [] for fmt in supported_fmts: - gloss_lang = prefs[ - "kindle_gloss_lang" if fmt != "EPUB" else "wiktionary_gloss_lang" - ] + gloss_lang = prefs["gloss_lang"] support_ww_list.append(is_ww_supported(book_lang, gloss_lang)) return MetaDataResult( @@ -118,9 +120,7 @@ def cli_check_metadata(book_path_str: str, log: Any) -> MetaDataResult | None: ) return None book_lang = supported_languages[calibre_book_lang] - gloss_lang = prefs[ - "kindle_gloss_lang" if book_fmt != "EPUB" else "wiktionary_gloss_lang" - ] + gloss_lang = prefs["gloss_lang"] return MetaDataResult( book_fmts=[book_fmt], mi=mi, diff --git a/parse_job.py b/parse_job.py index a15b2bb..0b0367b 100644 --- a/parse_job.py +++ b/parse_job.py @@ -145,7 +145,7 @@ def do_job( if ( data.create_ww and not wiktionary_db_path( - data.plugin_path, data.book_lang, prefs["wiktionary_gloss_lang"] + data.plugin_path, data.book_lang, prefs["gloss_lang"] ).exists() ): download_word_wise_file( @@ -161,7 +161,7 @@ def do_job( if data.create_ww and ( not kindle_db_path(data.plugin_path, data.book_lang, prefs).exists() or not get_wiktionary_klld_path( - data.plugin_path, data.book_lang, prefs["kindle_gloss_lang"] + data.plugin_path, data.book_lang, prefs["gloss_lang"] ).exists() ): download_word_wise_file( @@ -241,9 +241,7 @@ def create_files(data: ParseJobData, prefs: Prefs, notif: Any) -> None: lemmas_conn = None if data.create_ww: lemmas_db_path = ( - wiktionary_db_path( - data.plugin_path, data.book_lang, prefs["wiktionary_gloss_lang"] - ) + wiktionary_db_path(data.plugin_path, data.book_lang, prefs["gloss_lang"]) if is_epub else kindle_db_path(data.plugin_path, data.book_lang, prefs) ) @@ -321,7 +319,7 @@ def create_files(data: ParseJobData, prefs: Prefs, notif: Any) -> None: xhtml_path, ) supported_languages = load_languages_data(data.plugin_path) - gloss_lang = prefs["wiktionary_gloss_lang"] + gloss_lang = prefs["gloss_lang"] gloss_source = supported_languages[gloss_lang]["gloss_source"] epub.modify_epub(prefs, data.book_lang, gloss_lang, gloss_source) return diff --git a/send_file.py b/send_file.py index 7f30899..cea0875 100644 --- a/send_file.py +++ b/send_file.py @@ -270,7 +270,7 @@ def copy_klld_to_device( plugin_path = get_plugin_path() if use_kindle_ww_db(book_lang, prefs): - if prefs["kindle_gloss_lang"] in ("zh", "zh_cn"): # restore origin ww db + if prefs["gloss_lang"] in ("zh", "zh_cn"): # restore origin ww db local_klld_path = get_kindle_klld_path(plugin_path, True) if local_klld_path is None: return @@ -278,7 +278,7 @@ def copy_klld_to_device( return else: local_klld_path = get_wiktionary_klld_path( - plugin_path, book_lang, prefs["kindle_gloss_lang"] + plugin_path, book_lang, prefs["gloss_lang"] ) if adb_path is not None: diff --git a/tests/test.py b/tests/test.py index 14d9d10..bab6590 100644 --- a/tests/test.py +++ b/tests/test.py @@ -21,7 +21,7 @@ def setUpClass(cls): prefs["mediawiki_api"] = "" prefs["add_locator_map"] = True prefs["minimal_x_ray_count"] = 1 - prefs["kindle_gloss_lang"] = "en" + prefs["gloss_lang"] = "en" prefs["use_wiktionary_for_kindle"] = False lib_db = db("~/Calibre Library").new_api diff --git a/utils.py b/utils.py index afe590d..1e971ce 100644 --- a/utils.py +++ b/utils.py @@ -9,7 +9,7 @@ from typing import Any, TypedDict CJK_LANGS = ["zh", "ja", "ko"] -PROFICIENCY_VERSION = "0.5.20" +PROFICIENCY_VERSION = "0.5.21" PROFICIENCY_RELEASE_URL = ( f"https://github.com/xxyzz/Proficiency/releases/download/v{PROFICIENCY_VERSION}" ) @@ -26,10 +26,7 @@ class Prefs(TypedDict): use_all_formats: bool mal_x_ray_count: int choose_format_manually: bool - wiktionary_gloss_lang: str - kindle_gloss_lang: str - last_opened_kindle_lemmas_language: str - last_opened_wiktionary_lemmas_language: str + gloss_lang: str use_wiktionary_for_kindle: bool python_path: str show_change_kindle_ww_lang_warning: bool @@ -93,14 +90,14 @@ def get_plugin_path() -> Path: return Path(config_dir) / "plugins/WordDumb.zip" -def custom_lemmas_folder(plugin_path: Path, lemma_lang: str) -> Path: - return plugin_path.parent / "worddumb-lemmas" / lemma_lang +def custom_lemmas_folder(plugin_path: Path) -> Path: + return plugin_path.parent / "worddumb-lemmas" def use_kindle_ww_db(lemma_lang: str, prefs: Prefs) -> bool: return ( lemma_lang == "en" - and prefs["kindle_gloss_lang"] in ["en", "zh", "zh_cn"] + and prefs["gloss_lang"] in ["en", "zh", "zh_cn"] and not prefs["use_wiktionary_for_kindle"] ) @@ -108,22 +105,22 @@ def use_kindle_ww_db(lemma_lang: str, prefs: Prefs) -> bool: def kindle_db_path(plugin_path: Path, lemma_lang: str, prefs: Prefs) -> Path: if use_kindle_ww_db(lemma_lang, prefs): return ( - custom_lemmas_folder(plugin_path, lemma_lang) + custom_lemmas_folder(plugin_path) / f"kindle_en_en_v{PROFICIENCY_MAJOR_VERSION}.db" ) else: - return wiktionary_db_path(plugin_path, lemma_lang, prefs["kindle_gloss_lang"]) + return wiktionary_db_path(plugin_path, lemma_lang, prefs["gloss_lang"]) def wiktionary_db_path(plugin_path: Path, lemma_lang: str, gloss_lang: str) -> Path: return ( - custom_lemmas_folder(plugin_path, lemma_lang) + custom_lemmas_folder(plugin_path) / f"wiktionary_{lemma_lang}_{gloss_lang}_v{PROFICIENCY_MAJOR_VERSION}.db" ) def get_kindle_klld_path(plugin_path: Path, zh_gloss: bool = False) -> Path | None: - custom_folder = custom_lemmas_folder(plugin_path, "en") + custom_folder = custom_lemmas_folder(plugin_path) for path in custom_folder.glob("*.zh.klld" if zh_gloss else "*.en.klld"): return path for path in custom_folder.glob("*.zh.db" if zh_gloss else "*.en.db"): @@ -135,7 +132,7 @@ def get_wiktionary_klld_path( plugin_path: Path, lemma_lang: str, gloss_lang: str ) -> Path: return ( - custom_lemmas_folder(plugin_path, lemma_lang) + custom_lemmas_folder(plugin_path) / f"kll.{lemma_lang}.{gloss_lang}_v{PROFICIENCY_MAJOR_VERSION}.klld" )