Various enhancements

This commit is contained in:
kdmukai 2026-01-21 10:20:25 -06:00
parent 7177490bff
commit 7ba7453348
No known key found for this signature in database
GPG Key ID: 785B25305CD27F23

View File

@ -1,38 +1,74 @@
# extract_characters_from_babel_mo.py
"""
This is a utility for build / dev purposes.
Extracts all unique characters that appear in the translated strings for the specified
locale.
This is a utility for build / dev purposes only.
"""
if __name__ == "__main__":
import sys
from babel.messages import mofile
import argparse
import os
from babel.messages import mofile
include_chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!@#$%^&*()_+-=|?><,.;:[]{}"
# Define required input args and help text
parser = argparse.ArgumentParser(
description="Extracts all unique characters that appear in the translated strings for the specified locale."
)
parser.usage = "python3 extract_characters_from_babel_mo.py <locale>"
parser.add_argument("locale", help="Target locale (e.g. es, pt_BR, zh_Hans_CN)")
parser.add_argument("--debug", action="store_true", help="Enable debug output")
args = parser.parse_args()
debug = args.debug
# read first argument to get path of babel mo file
mo_fullfilename = sys.argv[1]
with open(mo_fullfilename, "rb") as f:
catalog = mofile.read_mo(f)
basic_chars = set((chr(c) for c in range(0x20, 0x7E + 1))) # all basic ascii chars from SPACE to "~"
all_chars = []
for msg in catalog:
if msg.string:
if isinstance(msg.string, list):
# pural message
all_chars.extend(msg.string)
else:
# singular message
all_chars.append(msg.string)
mo_fullfilename = os.path.join(os.pardir, "l10n", args.locale, "LC_MESSAGES", "messages.mo")
try:
with open(mo_fullfilename, "rb") as f:
catalog = mofile.read_mo(f)
except FileNotFoundError:
print(f"Could not find translations for locale \"{args.locale}\" ({mo_fullfilename})")
exit(1)
id_chars = set()
translations_chars = set()
for msg in catalog:
if msg.id:
if isinstance(msg.id, list):
# plural message
# get chars from all plural forms
for msgid in msg.id:
id_chars.update(msgid)
else:
# singular message
id_chars.update(msg.id)
if msg.string:
if isinstance(msg.string, list):
# plural message
# get chars from all plural forms
for msgstring in msg.string:
translations_chars.update(msgstring)
else:
# singular message
translations_chars.update(msg.string)
# get a unique list of chars from all translation messages and included_chars string
chars = sorted(set("".join(all_chars) + include_chars))
if debug:
# Print the difference between the chars in the ids vs the basic_chars
print("Chars in ids but not in basic_chars:", sorted(set("".join(id_chars)) - set(basic_chars)))
# convert set to string
chars_string = "".join(chars)
# And show the opposite
print("Chars in basic_chars but not in ids:", sorted(basic_chars - set("".join(id_chars))))
# remove newlines
chars_string = chars_string.replace("\n", "").replace("\r", "")
print("Chars in translations:", "".join(sorted(translations_chars)))
print(chars_string)
# get a unique list of chars from all translation messages and included_chars string
chars = sorted(translations_chars.union(basic_chars))
# convert set to string
chars_string = "".join(chars)
# remove newlines
chars_string = chars_string.replace("\n", "").replace("\r", "")
print(chars_string)