Skip to content

Commit

Permalink
🌐 Language export to CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
thinkyhead committed May 26, 2023
1 parent d20ce5f commit b1f0d47
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 3 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ bdf2u8g.exe
genpages.exe
marlin_config.json
mczip.h
language*.csv
csv-out/
*.gen
*.sublime-workspace

Expand Down
193 changes: 193 additions & 0 deletions buildroot/share/scripts/exportTranslations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#!/usr/bin/env python3
#
# exportTranslations.py
#
# Export LCD language strings to CSV files for easier translation.
# Use importTranslations.py to import CSV into the language files.
#

import re
from pathlib import Path

# Write multiple sheets if true, otherwise write one giant sheet
MULTISHEET = True

# Where to look for the language files
LANGHOME = "Marlin/src/lcd/language"

# Check for the path to the language files
if not Path(LANGHOME).is_dir():
print("Error: Couldn't find the '%s' directory." % LANGHOME)
print("Edit LANGHOME or cd to the root of the repo before running.")
exit(1)

# A dictionary to contain language names
LANGNAME = {
'an': "Aragonese",
'bg': "Bulgarian",
'ca': "Catalan",
'cz': "Czech",
'da': "Danish",
'de': "German",
'el': "Greek", 'el_CY': "Greek (Cyprus)", 'el_gr': "Greek (Greece)",
'en': "English",
'es': "Spanish",
'eu': "Basque-Euskera",
'fi': "Finnish",
'fr': "French", 'fr_na': "French (no accent)",
'gl': "Galician",
'hr': "Croatian (Hrvatski)",
'hu': "Hungarian / Magyar",
'it': "Italian",
'jp_kana': "Japanese (Kana)",
'ko_KR': "Korean",
'nl': "Dutch",
'pl': "Polish",
'pt': "Portuguese", 'pt_br': "Portuguese (Brazil)",
'ro': "Romanian",
'ru': "Russian",
'sk': "Slovak",
'sv': "Swedish",
'tr': "Turkish",
'uk': "Ukrainian",
'vi': "Vietnamese",
'zh_CN': "Simplified Chinese", 'zh_TW': "Traditional Chinese"
}

# A limit just for testing
LIMIT = 0

# A dictionary to contain strings for each language.
# Init with 'en' so English will always be first.
language_strings = { 'en': 0 }

# A dictionary to contain all distinct LCD string names
names = {}

# Get all "language_*.h" files
langfiles = sorted(list(Path(LANGHOME).glob('language_*.h')))

# Read each language file
for langfile in langfiles:
# Get the language code from the filename
langcode = langfile.name.replace('language_', '').replace('.h', '')

# Skip 'test' and any others that we don't want
if langcode in ['test']: continue

# Open the file
f = open(langfile, 'r', encoding='utf-8')
if not f: continue

# Flags to indicate a wide or tall section
wideflag = False
tallflag = False
# A counter for the number of strings in the file
stringcount = 0
# A dictionary to hold all the strings
strings = { 'narrow': {}, 'wide': {}, 'tall': {} }
# Read each line in the file
for line in f:
# Clean up the line for easier parsing
line = line.split("//")[0].strip()
if line.endswith(';'): line = line[:-1].strip()

# Check for wide or tall sections, assume no complicated nesting
if line.startswith("#endif") or line.startswith("#else"):
wideflag = False
tallflag = False
elif re.match(r'#if.*WIDTH\s*>=?\s*2[01].*', line): wideflag = True
elif re.match(r'#if.*LCD_HEIGHT\s*>=?\s*4.*', line): tallflag = True

# For string-defining lines capture the string data
match = re.match(r'LSTR\s+([A-Z0-9_]+)\s*=\s*(.+)\s*', line)
if match:
# The name is the first captured group
name = match.group(1)
# The value is the second captured group
value = match.group(2)
# Replace escaped quotes temporarily
value = value.replace('\\"', '__Q__')

# Remove all _UxGT wrappers from the value in a non-greedy way
value = re.sub(r'_UxGT\((".*?")\)', r'\1', value)

# Multi-line strings will get one or more bars | for identification
multiline = 0
multimatch = re.match(r'.*MSG_(\d)_LINE\s*\(\s*(.+?)\s*\).*', value)
if multimatch:
multiline = int(multimatch.group(1))
value = '|' + re.sub(r'"\s*,\s*"', '|', multimatch.group(2))

# Wrap inline defines in parentheses
value = re.sub(r' *([A-Z0-9]+_[A-Z0-9_]{4,}) *', r'(\1)', value)
# Remove quotes around strings
value = re.sub(r'"(.*?)"', r'\1', value).replace('__Q__', '"')
# Store all unique names as dictionary keys
names[name] = 1
# Store the string as narrow or wide
strings['tall' if tallflag else 'wide' if wideflag else 'narrow'][name] = value

# Increment the string counter
stringcount += 1
# Break for testing
if LIMIT and stringcount >= LIMIT: break

# Close the file
f.close()
# Store the array in the dict
language_strings[langcode] = strings

# Get the language codes from the dictionary
langcodes = list(language_strings.keys())

# Print the array
#print(language_strings)

# Write a single language entry to the CSV file with narrow, wide, and tall strings
def write_csv_lang(f, strings, name):
f.write(',')
if name in strings['narrow']: f.write('"%s"' % strings['narrow'][name])
f.write(',')
if name in strings['wide']: f.write('"%s"' % strings['wide'][name])
f.write(',')
if name in strings['tall']: f.write('"%s"' % strings['tall'][name])

if MULTISHEET:
#
# Export a separate sheet for each language
#
OUTDIR = 'csv-out'
Path.mkdir(Path(OUTDIR), exist_ok=True)

for lang in langcodes:
f = open("%s/language_%s.csv" % (OUTDIR, lang), 'w', encoding='utf-8')
if not f: continue

lname = lang + ' ' + LANGNAME[lang]
header = ['name', lname, lname + ' (wide)', lname + ' (tall)']
f.write('"' + '","'.join(header) + '"\n')

for name in names.keys():
f.write('"' + name + '"')
write_csv_lang(f, language_strings[lang], name)
f.write('\n')
f.close()

else:
#
# Export one large sheet containing all languages
#
f = open("languages.csv", 'w', encoding='utf-8')
if f:
header = ['name']
for lang in langcodes:
lname = lang + ' ' + LANGNAME[lang]
header += [lname, lname + ' (wide)', lname + ' (tall)']
f.write('"' + '","'.join(header) + '"\n')

for name in names.keys():
f.write('"' + name + '"')
for lang in langcodes: write_csv_lang(f, language_strings[lang], name)
f.write('\n')
f.close()
6 changes: 3 additions & 3 deletions buildroot/share/scripts/findMissingTranslations.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ langname() {
it ) echo "Italian" ;; jp_kana) echo "Japanese (Kana)" ;;
ko_KR) echo "Korean" ;; nl ) echo "Dutch" ;;
pl ) echo "Polish" ;; pt ) echo "Portuguese" ;;
pt_br) echo "Portuguese (Brazil)" ;; ru ) echo "Russian" ;;
sk ) echo "Slovak" ;; sv ) echo "Swedish" ;;
test ) echo "TEST" ;; tr ) echo "Turkish" ;;
pt_br) echo "Portuguese (Brazil)" ;; ro ) echo "Romanian" ;;
ru ) echo "Russian" ;; sk ) echo "Slovak" ;;
sv ) echo "Swedish" ;; tr ) echo "Turkish" ;;
uk ) echo "Ukrainian" ;; vi ) echo "Vietnamese" ;;
zh_CN) echo "Simplified Chinese" ;; zh_TW ) echo "Traditional Chinese" ;;
* ) echo "<unknown>" ;;
Expand Down

0 comments on commit b1f0d47

Please sign in to comment.