Improve script to find unused strings

This script broke in the migration to Python 3 (see
1101db6a29). This change fixes those bugs,
and also:

1. Allows you to supply multiple source directories. Previously, you
   could only provide one, which meant that you'd see lots of unused
   strings, even though they were used in a different folder.
2. Adds usage instructions with `--help`, thanks to Python's `argparse`
   module.
3. Quits with a `1` status code if any unused strings are found.
4. Prints unused strings instead of used ones.
5. Speeds up the script by only searching for keys. Previously, it'd
   also look for comments and blank lines in the `Localizable.strings`
   file.
6. Speeds up the script by only reading files once, instead of once per
   key.
This commit is contained in:
Evan Hahn 2022-04-15 13:34:27 -05:00
parent ca8c573da5
commit 32476f0fa7

View File

@ -1,45 +1,100 @@
#!/usr/bin/env python3
"""
This script can be used to grep the source to tree to see which localized strings are in use.
author: corbett
usage: ./unused_strings.py Localizable.strings source_dir
eg: ./unused_strings.py ../Signal/translations/en.lproj/Localizable.strings ../Signal/src
Find unused localization strings.
"""
import argparse
import sys
import os
import re
import pathlib
from typing import Iterable
def file_match(fname, pat):
try:
f = open(fname, "rt")
except IOError:
return
for i, line in enumerate(f):
if pat.search(line):
return True
f.close()
return False
project_root = pathlib.Path(__file__).parent.parent.resolve()
key_re = re.compile('^"([^"]+)" =')
def rgrep_match(dir_name, s_pat):
pat = re.compile(s_pat)
for dirpath, dirnames, filenames in os.walk(dir_name):
for fname in filenames:
fullname = os.path.join(dirpath, fname)
match=file_match(fullname, pat)
if match:
return match
return False
def project_path(from_root: str) -> pathlib.Path:
return project_root / from_root
if __name__ == '__main__':
strings_file = sys.argv[1]
src_dir_name = sys.argv[2]
for item in open(strings_file).readlines():
grep_for = item.strip().split(' = ')[0].replace('"','')
if rgrep_match(src_dir_name, grep_for):
print(item.strip())
def parse_args():
parser = argparse.ArgumentParser(description="Find unused localization strings.")
parser.add_argument(
"--strings",
type=pathlib.Path,
default=project_path("Signal/translations/en.lproj/Localizable.strings"),
help="A Localizable.strings file",
)
parser.add_argument(
"--src_dirs",
default=map(
project_path,
[
"Signal/src",
"SignalMessaging",
"SignalNSE",
"SignalServiceKit",
"SignalShareExtension",
"SignalUI",
],
),
nargs="+",
type=pathlib.Path,
help="One or more source directories",
)
return parser.parse_args()
def get_all_keys(strings_file: pathlib.Path) -> Iterable[str]:
with open(strings_file, "r") as file:
for line in file:
match = key_re.match(line)
if match:
yield match.group(1)
def get_all_file_paths(src_dir: os.PathLike) -> Iterable[str]:
for root, _, files in os.walk(src_dir):
for name in files:
yield os.path.join(root, name)
def matching_substrings(file_path: str, substrings: Iterable[bytes]) -> Iterable[bytes]:
with open(file_path, "rb") as file:
contents = file.read()
return filter(lambda s: s in contents, substrings)
def get_unused_keys(
strings_file: pathlib.Path, src_dirs: Iterable[pathlib.Path]
) -> Iterable[str]:
all_keys = get_all_keys(strings_file)
all_keys_as_bytes = [key.encode() for key in all_keys]
keys_not_seen = set(all_keys_as_bytes)
for src_dir in src_dirs:
for file_path in get_all_file_paths(src_dir):
keys_seen = matching_substrings(file_path, keys_not_seen)
keys_not_seen -= set(keys_seen)
return map(lambda k: k.decode(), keys_not_seen)
def main() -> None:
args = parse_args()
unused_keys = get_unused_keys(args.strings, args.src_dirs)
sorted_unused_keys = sorted(unused_keys)
for key in sorted_unused_keys:
print(key)
if len(sorted_unused_keys) != 0:
sys.exit(1)
if __name__ == "__main__":
main()