From d2a93fa19392d858e9572ccc4b806fbe800e6242 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Fri, 27 Jun 2025 22:36:49 +0200 Subject: [PATCH] Add plural per-language forms in check-translations script Different languages have different plural forms. Our script should take the original English forms and convert them into the right plural forms for the language. Also noticed that sorting order is slightly different than the one that eslint uses. The "eslint" sorting order is now used when generating missing keys. --- dev/i18n/check_translations_completeness.py | 92 ++++++++++++++++++--- 1 file changed, 81 insertions(+), 11 deletions(-) diff --git a/dev/i18n/check_translations_completeness.py b/dev/i18n/check_translations_completeness.py index 9202ca0012880..b8d5437d631c9 100755 --- a/dev/i18n/check_translations_completeness.py +++ b/dev/i18n/check_translations_completeness.py @@ -44,6 +44,19 @@ Path(__file__).parents[2] / "airflow-core" / "src" / "airflow" / "ui" / "public" / "i18n" / "locales" ) +# Plural suffixes per language (expand as needed) +PLURAL_SUFFIXES = { + "en": ["_one", "_other"], + "pl": ["_one", "_few", "_many", "_other"], + "de": ["_one", "_other"], + "fr": ["_one", "_other"], + "nl": ["_one", "_other"], + "ar": ["_zero", "_one", "_two", "_few", "_many", "_other"], + "he": ["_one", "_other"], + "ko": ["_other"], + "zh-TW": ["_other"], +} + class LocaleSummary(NamedTuple): """ @@ -84,6 +97,30 @@ class LocaleKeySet(NamedTuple): keys: set[str] | None +def get_plural_base(key: str, suffixes: list[str]) -> str | None: + for suffix in suffixes: + if key.endswith(suffix): + return key[: -len(suffix)] + return None + + +def expand_plural_keys(keys: set[str], lang: str) -> set[str]: + """ + For a set of keys, expand all plural bases to include all required suffixes for the language. + """ + suffixes = PLURAL_SUFFIXES.get(lang, ["_one", "_other"]) + base_to_suffixes: dict[str, set[str]] = {} + for key in keys: + base = get_plural_base(key, suffixes) + if base: + base_to_suffixes.setdefault(base, set()).add(key[len(base) :]) + expanded = set(keys) + for base in base_to_suffixes.keys(): + for suffix in suffixes: + expanded.add(base + suffix) + return expanded + + def get_locale_files() -> list[LocaleFiles]: return [ LocaleFiles( @@ -127,6 +164,7 @@ def compare_keys( for filename in all_files: key_sets: list[LocaleKeySet] = [] for lf in locale_files: + keys = set() if filename in lf.files: path = LOCALES_DIR / lf.locale / filename try: @@ -134,26 +172,26 @@ def compare_keys( keys = set(flatten_keys(data)) except Exception as e: print(f"Error loading {path}: {e}") - keys = set() - else: - keys = None key_sets.append(LocaleKeySet(locale=lf.locale, keys=keys)) keys_by_locale = {ks.locale: ks.keys for ks in key_sets} en_keys = keys_by_locale.get("en", set()) or set() + # Expand English keys for all required plural forms in each language + expanded_en_keys = {lang: expand_plural_keys(en_keys, lang) for lang in keys_by_locale.keys()} missing_keys: dict[str, list[str]] = {} extra_keys: dict[str, list[str]] = {} missing_counts[filename] = {} for ks in key_sets: if ks.locale == "en": continue + required_keys = expanded_en_keys.get(ks.locale, en_keys) if ks.keys is None: - missing_keys[ks.locale] = list(en_keys) + missing_keys[ks.locale] = list(required_keys) extra_keys[ks.locale] = [] - missing_counts[filename][ks.locale] = len(en_keys) + missing_counts[filename][ks.locale] = len(required_keys) else: - missing = list(en_keys - ks.keys) + missing = list(required_keys - ks.keys) missing_keys[ks.locale] = missing - extra_keys[ks.locale] = list(ks.keys - en_keys) + extra_keys[ks.locale] = list(ks.keys - required_keys) missing_counts[filename][ks.locale] = len(missing) summary[filename] = LocaleSummary(missing_keys=missing_keys, extra_keys=extra_keys) return summary, missing_counts @@ -429,9 +467,11 @@ def add_missing_translations(language: str, summary: dict[str, LocaleSummary], c Add missing translations for the selected language. It does it by copying them from English and prefixing with 'TODO: translate:'. + Ensures all required plural forms for the language are added. """ + suffixes = PLURAL_SUFFIXES.get(language, ["_one", "_other"]) for filename, diff in summary.items(): - missing_keys = diff.missing_keys.get(language, []) + missing_keys = set(diff.missing_keys.get(language, [])) if not missing_keys: continue en_path = LOCALES_DIR / "en" / filename @@ -447,10 +487,23 @@ def add_missing_translations(language: str, summary: dict[str, LocaleSummary], c console.print(f"[yellow]Failed to load {language} file {language}: {e}[/yellow]") lang_data = {} # Start with an empty dict if the file doesn't exist - # Helper to recursively add missing keys + # Helper to recursively add missing keys, including plural forms def add_keys(src, dst, prefix=""): for k, v in src.items(): full_key = f"{prefix}.{k}" if prefix else k + base = get_plural_base(full_key, suffixes) + if base and any(full_key == base + s for s in suffixes): + # Add all plural forms at the current level (not nested) + for suffix in suffixes: + plural_key = base + suffix + key_name = plural_key.split(".")[-1] + if plural_key in missing_keys: + if isinstance(v, dict): + dst[key_name] = {} + add_keys(v, dst[key_name], plural_key) + else: + dst[key_name] = f"TODO: translate: {v}" + continue if full_key in missing_keys: if isinstance(v, dict): dst[k] = {} @@ -464,10 +517,27 @@ def add_keys(src, dst, prefix=""): add_keys(v, dst[k], full_key) add_keys(en_data, lang_data) - # Write back to file, preserving order + + # Write back to file, preserving order and using eslint-style key sorting + def eslint_key_sort(obj): + if isinstance(obj, dict): + # Sort keys: numbers first, then uppercase, then lowercase, then others (eslint default) + def sort_key(k): + if k.isdigit(): + return (0, int(k)) + if k and k[0].isupper(): + return (1, k) + if k and k[0].islower(): + return (2, k) + return (3, k) + + return {k: eslint_key_sort(obj[k]) for k in sorted(obj, key=sort_key)} + return obj + + lang_data = eslint_key_sort(lang_data) lang_path.parent.mkdir(parents=True, exist_ok=True) with open(lang_path, "w", encoding="utf-8") as f: - json.dump(lang_data, f, ensure_ascii=False, indent=2, sort_keys=True) + json.dump(lang_data, f, ensure_ascii=False, indent=2) f.write("\n") # Ensure newline at the end of the file console.print(f"[green]Added missing translations to {lang_path}[/green]")