Skip to content

Commit

Permalink
[1469] Update Translation Scripting
Browse files Browse the repository at this point in the history
Also runs through the Black linter for parsability sake.
  • Loading branch information
Rixxan committed May 30, 2024
1 parent 3dd3652 commit b6d4371
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 71 deletions.
6 changes: 1 addition & 5 deletions .github/workflows/push-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,4 @@ jobs:
- name: translation checks
run: |
output=$(python ./scripts/find_localised_strings.py --compare-lang L10n/en.template --directory . --ignore coriolis-data 2>&1)
if [ -n "$output" ]; then
echo $output
exit 1
fi
python ./scripts/find_localised_strings.py --compare-lang L10n/en.template --directory . --ignore coriolis-data
171 changes: 105 additions & 66 deletions scripts/find_localised_strings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Search all given paths recursively for localised string calls."""

from __future__ import annotations

import argparse
Expand All @@ -17,29 +18,35 @@ def get_func_name(thing: ast.AST) -> str:

if isinstance(thing, ast.Attribute):
return get_func_name(thing.value)
return ''
return ""


def get_arg(call: ast.Call) -> str:
"""Extract the argument string to the translate function."""
if len(call.args) > 1:
print('??? > 1 args', call.args, file=sys.stderr)
print("??? > 1 args", call.args, file=sys.stderr)

arg = call.args[0]
if isinstance(arg, ast.Constant):
return arg.value
if isinstance(arg, ast.Name):
return f'VARIABLE! CHECK CODE! {arg.id}'
return f'Unknown! {type(arg)=} {ast.dump(arg)} ||| {ast.unparse(arg)}'
return f"VARIABLE! CHECK CODE! {arg.id}"
return f"Unknown! {type(arg)=} {ast.dump(arg)} ||| {ast.unparse(arg)}"


def find_calls_in_stmt(statement: ast.AST) -> list[ast.Call]:
"""Recursively find ast.Calls in a statement."""
out = []
for n in ast.iter_child_nodes(statement):
out.extend(find_calls_in_stmt(n))
if isinstance(statement, ast.Call) and get_func_name(statement.func) in ('tr', 'translations'):
if ast.unparse(statement).find('.tl') != -1 or ast.unparse(statement).find('translate') != -1:
if isinstance(statement, ast.Call) and get_func_name(statement.func) in (
"tr",
"translations",
):
if (
ast.unparse(statement).find(".tl") != -1
or ast.unparse(statement).find("translate") != -1
):
out.append(statement)
return out

Expand All @@ -53,11 +60,13 @@ def find_calls_in_stmt(statement: ast.AST) -> list[ast.Call]:
The difference is necessary in order to tell if a 'above' LANG comment is for
its own line (SAME_LINE), or meant to be for this following line (OWN_LINE).
"""
COMMENT_SAME_LINE_RE = re.compile(r'^.*?(#.*)$')
COMMENT_OWN_LINE_RE = re.compile(r'^\s*?(#.*)$')
COMMENT_SAME_LINE_RE = re.compile(r"^.*?(#.*)$")
COMMENT_OWN_LINE_RE = re.compile(r"^\s*?(#.*)$")


def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> str | None: # noqa: CCR001
def extract_comments( # noqa: CCR001
call: ast.Call, lines: list[str], file: pathlib.Path
) -> str | None:
"""
Extract comments from source code based on the given call.
Expand All @@ -83,23 +92,23 @@ def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> st
match = COMMENT_OWN_LINE_RE.match(above_line)
if match:
above_comment = match.group(1).strip()
if not above_comment.startswith('# LANG:'):
bad_comment = f'Unknown comment for {file}:{call.lineno} {above_line}'
if not above_comment.startswith("# LANG:"):
bad_comment = f"Unknown comment for {file}:{call.lineno} {above_line}"
above_comment = None

else:
above_comment = above_comment.replace('# LANG:', '').strip()
above_comment = above_comment.replace("# LANG:", "").strip()

if current_line is not None:
match = COMMENT_SAME_LINE_RE.match(current_line)
if match:
current_comment = match.group(1).strip()
if not current_comment.startswith('# LANG:'):
bad_comment = f'Unknown comment for {file}:{call.lineno} {current_line}'
if not current_comment.startswith("# LANG:"):
bad_comment = f"Unknown comment for {file}:{call.lineno} {current_line}"
current_comment = None

else:
current_comment = current_comment.replace('# LANG:', '').strip()
current_comment = current_comment.replace("# LANG:", "").strip()

if current_comment is not None:
out = current_comment
Expand All @@ -109,13 +118,13 @@ def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> st
print(bad_comment, file=sys.stderr)

if out is None:
print(f'No comment for {file}:{call.lineno} {current_line}', file=sys.stderr)
print(f"No comment for {file}:{call.lineno} {current_line}", file=sys.stderr)
return out


def scan_file(path: pathlib.Path) -> list[ast.Call]:
"""Scan a file for ast.Calls."""
data = path.read_text(encoding='utf-8')
data = path.read_text(encoding="utf-8")
lines = data.splitlines()
parsed = ast.parse(data)
out: list[ast.Call] = []
Expand All @@ -125,13 +134,15 @@ def scan_file(path: pathlib.Path) -> list[ast.Call]:

# see if we can extract any comments
for call in out:
setattr(call, 'comment', extract_comments(call, lines, path))
setattr(call, "comment", extract_comments(call, lines, path))

out.sort(key=lambda c: c.lineno)
return out


def scan_directory(path: pathlib.Path, skip: list[pathlib.Path] | None = None) -> dict[pathlib.Path, list[ast.Call]]:
def scan_directory(
path: pathlib.Path, skip: list[pathlib.Path] | None = None
) -> dict[pathlib.Path, list[ast.Call]]:
"""
Scan a directory for expected callsites.
Expand All @@ -145,7 +156,7 @@ def scan_directory(path: pathlib.Path, skip: list[pathlib.Path] | None = None) -
if any(same_path.name == thing.name for same_path in skip):
continue

if thing.is_file() and thing.suffix == '.py':
if thing.is_file() and thing.suffix == ".py":
out[thing] = scan_file(thing)
elif thing.is_dir():
out.update(scan_directory(thing, skip))
Expand All @@ -163,10 +174,10 @@ def parse_template(path) -> set[str]:
"""
lang_re = re.compile(r'\s*"([^"]+)"\s*=\s*"([^"]+)"\s*;\s*$')
out = set()
with open(path, encoding='utf-8') as file:
with open(path, encoding="utf-8") as file:
for line in file:
match = lang_re.match(line.strip())
if match and match.group(1) != '!Language':
if match and match.group(1) != "!Language":
out.add(match.group(1))

return out
Expand All @@ -183,14 +194,16 @@ class FileLocation:
line_end_col: int | None

@staticmethod
def from_call(path: pathlib.Path, c: ast.Call) -> 'FileLocation':
def from_call(path: pathlib.Path, c: ast.Call) -> "FileLocation":
"""
Create a FileLocation from a Call and Path.
:param path: Path to the file this FileLocation is in
:param c: Call object to extract line information from
"""
return FileLocation(path, c.lineno, c.col_offset, c.end_lineno, c.end_col_offset)
return FileLocation(
path, c.lineno, c.col_offset, c.end_lineno, c.end_col_offset
)


@dataclasses.dataclass
Expand Down Expand Up @@ -238,95 +251,121 @@ def generate_lang_template(data: dict[pathlib.Path, list[ast.Call]]) -> str:
entries: list[LangEntry] = []
for path, calls in data.items():
for c in calls:
entries.append(LangEntry([FileLocation.from_call(path, c)], get_arg(c), [getattr(c, 'comment')]))
entries.append(
LangEntry(
[FileLocation.from_call(path, c)],
get_arg(c),
[getattr(c, "comment")],
)
)

deduped = dedupe_lang_entries(entries)
out = '''/* Language name */
out = """/* Language name */
"!Language" = "English";
'''
print(f'Done Deduping entries {len(entries)=} {len(deduped)=}', file=sys.stderr)
"""
print(f"Done Deduping entries {len(entries)=} {len(deduped)=}", file=sys.stderr)
for entry in deduped:
assert len(entry.comments) == len(entry.locations)

comment_set = set()
for comment, loc in zip(entry.comments, entry.locations):
if comment:
comment_set.add(f'{loc.path.name}: {comment};')
comment_set.add(f"{loc.path.name}: {comment};")

files = 'In files: ' + entry.files()
comment = ' '.join(comment_set).strip()
files = "In files: " + entry.files()
comment = " ".join(comment_set).strip()

header = f'{comment} {files}'.strip()
header = f"{comment} {files}".strip()
string = f'"{entry.string}"'
out += f'/* {header} */\n'
out += f'{string} = {string};\n\n'
out += f"/* {header} */\n"
out += f"{string} = {string};\n\n"

return out


if __name__ == '__main__':
def main(): # noqa: CCR001
"""Run the Translation Checker."""
parser = argparse.ArgumentParser()
parser.add_argument('--directory', help='Directory to search from', default='.')
parser.add_argument('--ignore', action='append', help='directories to ignore', default=['venv', '.venv', '.git'])
parser.add_argument("--directory", help="Directory to search from", default=".")
parser.add_argument(
"--ignore",
action="append",
help="Directories to ignore",
default=["venv", ".venv", ".git"],
)
group = parser.add_mutually_exclusive_group()
group.add_argument('--json', action='store_true', help='JSON output')
group.add_argument('--lang', help='en.template "strings" output to specified file, "-" for stdout')
group.add_argument('--compare-lang', help='en.template file to compare against')
group.add_argument("--json", action="store_true", help="JSON output")
group.add_argument(
"--lang", help='en.template "strings" output to specified file, "-" for stdout'
)
group.add_argument("--compare-lang", help="en.template file to compare against")

args = parser.parse_args()

directory = pathlib.Path(args.directory)
res = scan_directory(directory, [pathlib.Path(p) for p in args.ignore])

if args.compare_lang is not None and len(args.compare_lang) > 0:
output = []

if args.compare_lang:
seen = set()
template = parse_template(args.compare_lang)

for file, calls in res.items():
for c in calls:
arg = get_arg(c)
if arg in template:
seen.add(arg)
else:
print(f'NEW! {file}:{c.lineno}: {arg!r}')
output.append(f"NEW! {file}:{c.lineno}: {arg!r}")

for old in set(template) ^ seen:
print(f'No longer used: {old!r}')
output.append(f"No longer used: {old!r}")

elif args.json:
to_print_data = [
{
'path': str(path),
'string': get_arg(c),
'reconstructed': ast.unparse(c),
'start_line': c.lineno,
'start_offset': c.col_offset,
'end_line': c.end_lineno,
'end_offset': c.end_col_offset,
'comment': getattr(c, 'comment', None)
} for (path, calls) in res.items() for c in calls
"path": str(path),
"string": get_arg(c),
"reconstructed": ast.unparse(c),
"start_line": c.lineno,
"start_offset": c.col_offset,
"end_line": c.end_lineno,
"end_offset": c.end_col_offset,
"comment": getattr(c, "comment", None),
}
for path, calls in res.items()
for c in calls
]

print(json.dumps(to_print_data, indent=2))
output.append(json.dumps(to_print_data, indent=2))

elif args.lang:
if args.lang == '-':
print(generate_lang_template(res))

lang_template = generate_lang_template(res)
if args.lang == "-":
output.append(lang_template)
else:
with open(args.lang, mode='w+', newline='\n') as langfile:
langfile.writelines(generate_lang_template(res))
with open(args.lang, mode="w+", newline="\n", encoding="UTF-8") as langfile:
langfile.writelines(lang_template)

else:
for path, calls in res.items():
if len(calls) == 0:
if not calls:
continue

print(path)
output.append(str(path))
for c in calls:
print(
f' {c.lineno:4d}({c.col_offset:3d}):{c.end_lineno:4d}({c.end_col_offset:3d})\t', ast.unparse(c)
output.append(
f" {c.lineno:4d}({c.col_offset:3d}):{c.end_lineno:4d}({c.end_col_offset:3d})\t{ast.unparse(c)}"
)
output.append("")

# Print all collected output at the end
if output:
print("\n".join(output))
sys.exit(1)


print()
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
sys.exit()

0 comments on commit b6d4371

Please sign in to comment.