From 6df8db511bbafa6040d047af15bb25fe3ceb2258 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 5 Apr 2026 18:31:02 +0100 Subject: [PATCH] ci(i18n): add Ukrainian docs refresh workflow --- .github/workflows/translate-uk.yml | 193 ++++++++++++++++++++++++++ .github/workflows/translate-zh-cn.yml | 189 +++++++++++++++++++++++-- README.md | 7 +- docs/.i18n/README.md | 57 ++------ 4 files changed, 387 insertions(+), 59 deletions(-) create mode 100644 .github/workflows/translate-uk.yml diff --git a/.github/workflows/translate-uk.yml b/.github/workflows/translate-uk.yml new file mode 100644 index 000000000..cd74bf31a --- /dev/null +++ b/.github/workflows/translate-uk.yml @@ -0,0 +1,193 @@ +name: Translate uk + +on: + push: + branches: + - main + paths-ignore: + - docs/uk/** + - docs/.i18n/uk.tm.jsonl + schedule: + - cron: "29 * * * *" + repository_dispatch: + types: + - translate-uk-release + workflow_dispatch: + +permissions: + contents: write + +concurrency: + group: translate-uk + cancel-in-progress: false + +jobs: + translate-uk: + runs-on: ubuntu-latest + steps: + - name: Checkout publish repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Read source metadata + id: meta + run: | + node - <<'NODE' + const fs = require("node:fs"); + const path = ".openclaw-sync/source.json"; + const data = JSON.parse(fs.readFileSync(path, "utf8")); + if (!data.repository || !data.sha) { + throw new Error(`invalid source metadata in ${path}`); + } + fs.appendFileSync(process.env.GITHUB_OUTPUT, `repository=${data.repository}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `sha=${data.sha}\n`); + NODE + + - name: Checkout source repo + uses: actions/checkout@v4 + with: + repository: ${{ steps.meta.outputs.repository }} + ref: ${{ steps.meta.outputs.sha }} + path: source + fetch-depth: 1 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: "1.23" + + - name: Prune stale uk pages + run: | + python - <<'PY' + from pathlib import Path + root = Path("docs") + locale_root = root / "uk" + if not locale_root.exists(): + raise SystemExit(0) + for path in sorted(locale_root.rglob("*"), reverse=True): + if path.is_dir(): + if not any(path.iterdir()): + path.rmdir() + continue + rel = path.relative_to(locale_root) + source = root / rel + if not source.exists(): + path.unlink() + for path in sorted(locale_root.rglob("*"), reverse=True): + if path.is_dir() and not any(path.iterdir()): + path.rmdir() + PY + + - name: Build pending docs file list + id: pending + run: | + python - <<'PY' + import hashlib + import os + import re + from pathlib import Path + + source_hash_re = re.compile(r'^x-i18n:\n(?: .*\n)*? source_hash: ([0-9a-f]{64})$', re.M) + locale_dir_re = re.compile(r"^[a-z]{2,3}(?:-[A-Za-z0-9]{2,8})?$") + locale_dirs = { + path.name + for path in Path("docs").iterdir() + if path.is_dir() and locale_dir_re.match(path.name) + } + + def stored_source_hash(path: Path) -> str: + if not path.exists(): + return "" + text = path.read_text(encoding="utf-8", errors="ignore") + match = source_hash_re.search(text) + if not match: + return "" + return match.group(1).strip() + + all_files = [] + pending_files = [] + for path in Path("docs").rglob("*"): + if not path.is_file(): + continue + if path.suffix.lower() not in {".md", ".mdx"}: + continue + rel = path.as_posix() + rel_doc = path.relative_to("docs") + if rel_doc.parts and rel_doc.parts[0] in locale_dirs: + continue + if rel.startswith("docs/.generated/"): + continue + all_files.append(str(path.resolve())) + + locale_path = Path("docs") / "uk" / rel_doc + source_hash = hashlib.sha256(path.read_bytes()).hexdigest() + if stored_source_hash(locale_path) != source_hash: + pending_files.append(str(path.resolve())) + + Path(".openclaw-sync").mkdir(exist_ok=True) + Path(".openclaw-sync/docs-i18n-files.txt").write_text("\n".join(pending_files) + ("\n" if pending_files else "")) + print(f"all_docs={len(all_files)} pending_docs={len(pending_files)}") + with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as fh: + fh.write(f"all_count={len(all_files)}\n") + fh.write(f"pending_count={len(pending_files)}\n") + PY + + - name: Translate changed docs into uk + if: steps.pending.outputs.pending_count != '0' + env: + OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY }} + OPENCLAW_DOCS_I18N_PROVIDER: openai + OPENCLAW_DOCS_I18N_MODEL: gpt-5.4 + OPENCLAW_DOCS_I18N_PROMPT_TIMEOUT: 10m + run: | + if [ ! -s .openclaw-sync/docs-i18n-files.txt ]; then + echo "No docs files found." + exit 0 + fi + + mapfile -t DOC_FILES < .openclaw-sync/docs-i18n-files.txt + attempt=1 + max_attempts=5 + while [ "$attempt" -le "$max_attempts" ]; do + echo "docs-i18n attempt $attempt/$max_attempts" + if ( + cd source/scripts/docs-i18n + go run . \ + --docs "$GITHUB_WORKSPACE/docs" \ + --lang uk \ + --src en \ + --mode doc \ + --thinking low \ + --parallel 8 \ + "${DOC_FILES[@]}" + ); then + exit 0 + fi + + if [ "$attempt" -eq "$max_attempts" ]; then + echo "docs-i18n failed after $max_attempts attempts" + exit 1 + fi + + attempt=$((attempt + 1)) + sleep 5 + done + + - name: Commit uk refresh + run: | + if git diff --quiet -- docs/uk docs/.i18n/uk.tm.jsonl; then + echo "No uk translation changes." + exit 0 + fi + + git config user.name "openclaw-docs-i18n[bot]" + git config user.email "openclaw-docs-i18n[bot]@users.noreply.github.com" + git add docs/uk docs/.i18n/uk.tm.jsonl + git commit -m "chore(i18n): refresh uk translations" + git push origin HEAD:main diff --git a/.github/workflows/translate-zh-cn.yml b/.github/workflows/translate-zh-cn.yml index 9b09e2da7..eefab20a1 100644 --- a/.github/workflows/translate-zh-cn.yml +++ b/.github/workflows/translate-zh-cn.yml @@ -1,20 +1,193 @@ name: Translate zh-CN on: + push: + branches: + - main + paths-ignore: + - docs/zh-CN/** + - docs/.i18n/zh-CN.tm.jsonl + schedule: + - cron: "17 * * * *" repository_dispatch: types: - translate-zh-cn-release - schedule: - - cron: "17 3 * * *" workflow_dispatch: permissions: contents: write +concurrency: + group: translate-zh-cn + cancel-in-progress: false + jobs: - translate: - uses: ./.github/workflows/translate-locale-reusable.yml - with: - locale: zh-CN - locale_slug: zh-cn - secrets: inherit + translate-zh-cn: + runs-on: ubuntu-latest + steps: + - name: Checkout publish repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Read source metadata + id: meta + run: | + node - <<'NODE' + const fs = require("node:fs"); + const path = ".openclaw-sync/source.json"; + const data = JSON.parse(fs.readFileSync(path, "utf8")); + if (!data.repository || !data.sha) { + throw new Error(`invalid source metadata in ${path}`); + } + fs.appendFileSync(process.env.GITHUB_OUTPUT, `repository=${data.repository}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `sha=${data.sha}\n`); + NODE + + - name: Checkout source repo + uses: actions/checkout@v4 + with: + repository: ${{ steps.meta.outputs.repository }} + ref: ${{ steps.meta.outputs.sha }} + path: source + fetch-depth: 1 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: "1.23" + + - name: Prune stale zh-CN pages + run: | + python - <<'PY' + from pathlib import Path + root = Path("docs") + zh_root = root / "zh-CN" + if not zh_root.exists(): + raise SystemExit(0) + for path in sorted(zh_root.rglob("*"), reverse=True): + if path.is_dir(): + if not any(path.iterdir()): + path.rmdir() + continue + rel = path.relative_to(zh_root) + source = root / rel + if not source.exists(): + path.unlink() + for path in sorted(zh_root.rglob("*"), reverse=True): + if path.is_dir() and not any(path.iterdir()): + path.rmdir() + PY + + - name: Build pending docs file list + id: pending + run: | + python - <<'PY' + import hashlib + import os + import re + from pathlib import Path + + source_hash_re = re.compile(r'^x-i18n:\n(?: .*\n)*? source_hash: ([0-9a-f]{64})$', re.M) + locale_dir_re = re.compile(r"^[a-z]{2,3}(?:-[A-Za-z0-9]{2,8})?$") + locale_dirs = { + path.name + for path in Path("docs").iterdir() + if path.is_dir() and locale_dir_re.match(path.name) + } + + def stored_source_hash(path: Path) -> str: + if not path.exists(): + return "" + text = path.read_text(encoding="utf-8", errors="ignore") + match = source_hash_re.search(text) + if not match: + return "" + return match.group(1).strip() + + all_files = [] + pending_files = [] + for path in Path("docs").rglob("*"): + if not path.is_file(): + continue + if path.suffix.lower() not in {".md", ".mdx"}: + continue + rel = path.as_posix() + rel_doc = path.relative_to("docs") + if rel_doc.parts and rel_doc.parts[0] in locale_dirs: + continue + if rel.startswith("docs/.generated/"): + continue + all_files.append(str(path.resolve())) + + zh_path = Path("docs") / "zh-CN" / rel_doc + source_hash = hashlib.sha256(path.read_bytes()).hexdigest() + if stored_source_hash(zh_path) != source_hash: + pending_files.append(str(path.resolve())) + + Path(".openclaw-sync").mkdir(exist_ok=True) + Path(".openclaw-sync/docs-i18n-files.txt").write_text("\n".join(pending_files) + ("\n" if pending_files else "")) + print(f"all_docs={len(all_files)} pending_docs={len(pending_files)}") + with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as fh: + fh.write(f"all_count={len(all_files)}\n") + fh.write(f"pending_count={len(pending_files)}\n") + PY + + - name: Translate changed docs into zh-CN + if: steps.pending.outputs.pending_count != '0' + env: + OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY }} + OPENCLAW_DOCS_I18N_PROVIDER: openai + OPENCLAW_DOCS_I18N_MODEL: gpt-5.4 + OPENCLAW_DOCS_I18N_PROMPT_TIMEOUT: 10m + run: | + if [ ! -s .openclaw-sync/docs-i18n-files.txt ]; then + echo "No docs files found." + exit 0 + fi + + mapfile -t DOC_FILES < .openclaw-sync/docs-i18n-files.txt + attempt=1 + max_attempts=5 + while [ "$attempt" -le "$max_attempts" ]; do + echo "docs-i18n attempt $attempt/$max_attempts" + if ( + cd source/scripts/docs-i18n + go run . \ + --docs "$GITHUB_WORKSPACE/docs" \ + --lang zh-CN \ + --src en \ + --mode doc \ + --thinking low \ + --parallel 8 \ + "${DOC_FILES[@]}" + ); then + exit 0 + fi + + if [ "$attempt" -eq "$max_attempts" ]; then + echo "docs-i18n failed after $max_attempts attempts" + exit 1 + fi + + attempt=$((attempt + 1)) + sleep 5 + done + + - name: Commit zh-CN refresh + run: | + if git diff --quiet -- docs/zh-CN docs/.i18n/zh-CN.tm.jsonl; then + echo "No zh-CN translation changes." + exit 0 + fi + + git config user.name "openclaw-docs-i18n[bot]" + git config user.email "openclaw-docs-i18n[bot]@users.noreply.github.com" + git add docs/zh-CN docs/.i18n/zh-CN.tm.jsonl + git commit -m "chore(i18n): refresh zh-CN translations" + git push origin HEAD:main diff --git a/README.md b/README.md index 96dec9d6d..8371cc228 100644 --- a/README.md +++ b/README.md @@ -9,23 +9,22 @@ Source of truth lives in [`openclaw/openclaw`](https://github.com/openclaw/openc 1. English docs are authored in `openclaw/openclaw`. 2. `openclaw/openclaw/.github/workflows/docs-sync-publish.yml` mirrors the docs tree into this repo. 3. This repo stores the published docs tree plus generated locale output. -4. `openclaw/docs/.github/workflows/translate-zh-cn.yml`, `translate-ja-jp.yml`, `translate-es.yml`, `translate-pt-br.yml`, `translate-ko.yml`, `translate-de.yml`, `translate-fr.yml`, `translate-ar.yml`, `translate-it.yml`, `translate-tr.yml`, `translate-id.yml`, and `translate-pl.yml` refresh the generated locale trees on a staggered daily schedule, on manual dispatch, and after release dispatches from `openclaw/openclaw`. +4. `openclaw/docs/.github/workflows/translate-zh-cn.yml` and `translate-uk.yml` run on push, schedule, and release dispatch to refresh `docs/zh-CN/**` and `docs/uk/**`. ## Translation behavior -- zh-CN, ja-JP, es, pt-BR, ko, de, fr, ar, it, tr, id, and pl pages are generated output. +- zh-CN and uk pages are generated output. - Each translated page stores `x-i18n.source_hash`. - The translate workflow computes a pending file list before calling the model. - If no English source hashes changed, the workflow skips the expensive translation step entirely. - If files changed, only the pending files are translated. - The workflow retries transient model-format failures. -- Published releases in `openclaw/openclaw` dispatch extra locale refreshes so release-adjacent docs updates do not wait for the daily cron. ## Editing rules - Do not treat this repo as the primary place for English doc edits. - Make English doc changes in `openclaw/openclaw`, then let sync copy them here. -- Generated locale pages in `docs/zh-CN/**`, `docs/ja-JP/**`, `docs/es/**`, `docs/pt-BR/**`, `docs/ko/**`, `docs/de/**`, `docs/fr/**`, `docs/ar/**`, `docs/it/**`, `docs/tr/**`, `docs/id/**`, and `docs/pl/**` are generated output. +- zh-CN pages in `docs/zh-CN/**` and uk pages in `docs/uk/**` are generated output. - `.openclaw-sync/source.json` records which `openclaw/openclaw` commit this mirror was synced from. ## Secrets diff --git a/docs/.i18n/README.md b/docs/.i18n/README.md index 3581adcab..89cb9292b 100644 --- a/docs/.i18n/README.md +++ b/docs/.i18n/README.md @@ -2,40 +2,12 @@ This folder stores translation config for the source docs repo. -Generated locale trees and live translation memory now live in the publish repo: +Generated locale pages and live locale translation memory now live in the publish repo (`openclaw/docs`, local sibling checkout `~/Projects/openclaw-docs`). -- repo: `openclaw/docs` -- local checkout: `~/Projects/openclaw-docs` +## Files -## Source of truth - -- English docs are authored in `openclaw/openclaw`. -- The source docs tree lives under `docs/`. -- The source repo no longer keeps committed generated locale trees such as `docs/zh-CN/**`, `docs/ja-JP/**`, `docs/es/**`, `docs/pt-BR/**`, `docs/ko/**`, `docs/de/**`, `docs/fr/**`, `docs/ar/**`, `docs/it/**`, `docs/tr/**`, `docs/id/**`, or `docs/pl/**`. - -## End-to-end flow - -1. Edit English docs in `openclaw/openclaw`. -2. Push to `main`. -3. `openclaw/openclaw/.github/workflows/docs-sync-publish.yml` mirrors the docs tree into `openclaw/docs`. -4. The sync script rewrites the publish `docs/docs.json` so the generated locale picker blocks exist there even though they are no longer committed in the source repo. -5. `openclaw/docs/.github/workflows/translate-zh-cn.yml` refreshes `docs/zh-CN/**` once a day, on demand, and after source-repo release dispatches. -6. `openclaw/docs/.github/workflows/translate-ja-jp.yml` does the same for `docs/ja-JP/**`. -7. `openclaw/docs/.github/workflows/translate-es.yml`, `translate-pt-br.yml`, `translate-ko.yml`, `translate-de.yml`, `translate-fr.yml`, `translate-ar.yml`, `translate-it.yml`, `translate-tr.yml`, `translate-id.yml`, and `translate-pl.yml` do the same for `docs/es/**`, `docs/pt-BR/**`, `docs/ko/**`, `docs/de/**`, `docs/fr/**`, `docs/ar/**`, `docs/it/**`, `docs/tr/**`, `docs/id/**`, and `docs/pl/**`. - -## Why the split exists - -- Keep generated locale output out of the main product repo. -- Keep Mintlify on a single published docs tree. -- Preserve the built-in language switcher by letting the publish repo own generated locale trees. - -## Files in this folder - -- `glossary..json` — preferred term mappings used as prompt guidance. -- `ar-navigation.json`, `de-navigation.json`, `es-navigation.json`, `fr-navigation.json`, `id-navigation.json`, `it-navigation.json`, `ja-navigation.json`, `ko-navigation.json`, `pl-navigation.json`, `pt-BR-navigation.json`, `tr-navigation.json`, `zh-Hans-navigation.json` — Mintlify locale picker blocks reinserted into the publish repo during sync. -- `.tm.jsonl` — translation memory keyed by workflow + model + text hash. - -In this repo, generated locale TM files such as `docs/.i18n/zh-CN.tm.jsonl`, `docs/.i18n/ja-JP.tm.jsonl`, `docs/.i18n/es.tm.jsonl`, `docs/.i18n/pt-BR.tm.jsonl`, `docs/.i18n/ko.tm.jsonl`, `docs/.i18n/de.tm.jsonl`, `docs/.i18n/fr.tm.jsonl`, `docs/.i18n/ar.tm.jsonl`, `docs/.i18n/it.tm.jsonl`, `docs/.i18n/tr.tm.jsonl`, `docs/.i18n/id.tm.jsonl`, and `docs/.i18n/pl.tm.jsonl` are intentionally no longer committed. +- `glossary..json` — preferred term mappings (used in prompt guidance). +- `.tm.jsonl` — translation memory (cache) keyed by workflow + model + text hash. In this repo, locale TM files are generated on demand. ## Glossary format @@ -44,7 +16,9 @@ In this repo, generated locale TM files such as `docs/.i18n/zh-CN.tm.jsonl`, `do ```json { "source": "troubleshooting", - "target": "故障排除" + "target": "故障排除", + "ignore_case": true, + "whole_word": false } ``` @@ -53,19 +27,8 @@ Fields: - `source`: English (or source) phrase to prefer. - `target`: preferred translation output. -## Translation mechanics +## Notes +- Glossary entries are passed to the model as **prompt guidance** (no deterministic rewrites). - `scripts/docs-i18n` still owns translation generation. -- Doc mode writes `x-i18n.source_hash` into each translated page. -- Each publish workflow precomputes a pending file list by comparing the current English source hash to the stored locale `x-i18n.source_hash`. -- If the pending count is `0`, the expensive translation step is skipped entirely. -- If there are pending files, the workflow translates only those files. -- The publish workflow retries transient model-format failures, but unchanged files stay skipped because the same hash check runs on each retry. -- The source repo also dispatches zh-CN, ja-JP, es, pt-BR, ko, de, fr, ar, it, tr, id, and pl refreshes after published GitHub releases so release docs can catch up without waiting for the daily cron. - -## Operational notes - -- Sync metadata is written to `.openclaw-sync/source.json` in the publish repo. -- Source repo secret: `OPENCLAW_DOCS_SYNC_TOKEN` -- Publish repo secret: `OPENCLAW_DOCS_I18N_OPENAI_API_KEY` -- If locale output looks stale, check the matching `Translate ` workflow in `openclaw/docs` first. +- The source repo syncs English docs into the publish repo; locale generation runs there per-locale on push, schedule, and release dispatch.