ci(i18n): add Ukrainian docs refresh workflow

This commit is contained in:
Peter Steinberger 2026-04-05 18:31:02 +01:00
parent d305c4f3ed
commit 6df8db511b
No known key found for this signature in database
4 changed files with 387 additions and 59 deletions

193
.github/workflows/translate-uk.yml vendored Normal file
View File

@ -0,0 +1,193 @@
name: Translate uk
on:
push:
branches:
- main
paths-ignore:
- docs/uk/**
- docs/.i18n/uk.tm.jsonl
schedule:
- cron: "29 * * * *"
repository_dispatch:
types:
- translate-uk-release
workflow_dispatch:
permissions:
contents: write
concurrency:
group: translate-uk
cancel-in-progress: false
jobs:
translate-uk:
runs-on: ubuntu-latest
steps:
- name: Checkout publish repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Read source metadata
id: meta
run: |
node - <<'NODE'
const fs = require("node:fs");
const path = ".openclaw-sync/source.json";
const data = JSON.parse(fs.readFileSync(path, "utf8"));
if (!data.repository || !data.sha) {
throw new Error(`invalid source metadata in ${path}`);
}
fs.appendFileSync(process.env.GITHUB_OUTPUT, `repository=${data.repository}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `sha=${data.sha}\n`);
NODE
- name: Checkout source repo
uses: actions/checkout@v4
with:
repository: ${{ steps.meta.outputs.repository }}
ref: ${{ steps.meta.outputs.sha }}
path: source
fetch-depth: 1
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: 22
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: "1.23"
- name: Prune stale uk pages
run: |
python - <<'PY'
from pathlib import Path
root = Path("docs")
locale_root = root / "uk"
if not locale_root.exists():
raise SystemExit(0)
for path in sorted(locale_root.rglob("*"), reverse=True):
if path.is_dir():
if not any(path.iterdir()):
path.rmdir()
continue
rel = path.relative_to(locale_root)
source = root / rel
if not source.exists():
path.unlink()
for path in sorted(locale_root.rglob("*"), reverse=True):
if path.is_dir() and not any(path.iterdir()):
path.rmdir()
PY
- name: Build pending docs file list
id: pending
run: |
python - <<'PY'
import hashlib
import os
import re
from pathlib import Path
source_hash_re = re.compile(r'^x-i18n:\n(?: .*\n)*? source_hash: ([0-9a-f]{64})$', re.M)
locale_dir_re = re.compile(r"^[a-z]{2,3}(?:-[A-Za-z0-9]{2,8})?$")
locale_dirs = {
path.name
for path in Path("docs").iterdir()
if path.is_dir() and locale_dir_re.match(path.name)
}
def stored_source_hash(path: Path) -> str:
if not path.exists():
return ""
text = path.read_text(encoding="utf-8", errors="ignore")
match = source_hash_re.search(text)
if not match:
return ""
return match.group(1).strip()
all_files = []
pending_files = []
for path in Path("docs").rglob("*"):
if not path.is_file():
continue
if path.suffix.lower() not in {".md", ".mdx"}:
continue
rel = path.as_posix()
rel_doc = path.relative_to("docs")
if rel_doc.parts and rel_doc.parts[0] in locale_dirs:
continue
if rel.startswith("docs/.generated/"):
continue
all_files.append(str(path.resolve()))
locale_path = Path("docs") / "uk" / rel_doc
source_hash = hashlib.sha256(path.read_bytes()).hexdigest()
if stored_source_hash(locale_path) != source_hash:
pending_files.append(str(path.resolve()))
Path(".openclaw-sync").mkdir(exist_ok=True)
Path(".openclaw-sync/docs-i18n-files.txt").write_text("\n".join(pending_files) + ("\n" if pending_files else ""))
print(f"all_docs={len(all_files)} pending_docs={len(pending_files)}")
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as fh:
fh.write(f"all_count={len(all_files)}\n")
fh.write(f"pending_count={len(pending_files)}\n")
PY
- name: Translate changed docs into uk
if: steps.pending.outputs.pending_count != '0'
env:
OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY }}
OPENCLAW_DOCS_I18N_PROVIDER: openai
OPENCLAW_DOCS_I18N_MODEL: gpt-5.4
OPENCLAW_DOCS_I18N_PROMPT_TIMEOUT: 10m
run: |
if [ ! -s .openclaw-sync/docs-i18n-files.txt ]; then
echo "No docs files found."
exit 0
fi
mapfile -t DOC_FILES < .openclaw-sync/docs-i18n-files.txt
attempt=1
max_attempts=5
while [ "$attempt" -le "$max_attempts" ]; do
echo "docs-i18n attempt $attempt/$max_attempts"
if (
cd source/scripts/docs-i18n
go run . \
--docs "$GITHUB_WORKSPACE/docs" \
--lang uk \
--src en \
--mode doc \
--thinking low \
--parallel 8 \
"${DOC_FILES[@]}"
); then
exit 0
fi
if [ "$attempt" -eq "$max_attempts" ]; then
echo "docs-i18n failed after $max_attempts attempts"
exit 1
fi
attempt=$((attempt + 1))
sleep 5
done
- name: Commit uk refresh
run: |
if git diff --quiet -- docs/uk docs/.i18n/uk.tm.jsonl; then
echo "No uk translation changes."
exit 0
fi
git config user.name "openclaw-docs-i18n[bot]"
git config user.email "openclaw-docs-i18n[bot]@users.noreply.github.com"
git add docs/uk docs/.i18n/uk.tm.jsonl
git commit -m "chore(i18n): refresh uk translations"
git push origin HEAD:main

View File

@ -1,20 +1,193 @@
name: Translate zh-CN
on:
push:
branches:
- main
paths-ignore:
- docs/zh-CN/**
- docs/.i18n/zh-CN.tm.jsonl
schedule:
- cron: "17 * * * *"
repository_dispatch:
types:
- translate-zh-cn-release
schedule:
- cron: "17 3 * * *"
workflow_dispatch:
permissions:
contents: write
concurrency:
group: translate-zh-cn
cancel-in-progress: false
jobs:
translate:
uses: ./.github/workflows/translate-locale-reusable.yml
with:
locale: zh-CN
locale_slug: zh-cn
secrets: inherit
translate-zh-cn:
runs-on: ubuntu-latest
steps:
- name: Checkout publish repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Read source metadata
id: meta
run: |
node - <<'NODE'
const fs = require("node:fs");
const path = ".openclaw-sync/source.json";
const data = JSON.parse(fs.readFileSync(path, "utf8"));
if (!data.repository || !data.sha) {
throw new Error(`invalid source metadata in ${path}`);
}
fs.appendFileSync(process.env.GITHUB_OUTPUT, `repository=${data.repository}\n`);
fs.appendFileSync(process.env.GITHUB_OUTPUT, `sha=${data.sha}\n`);
NODE
- name: Checkout source repo
uses: actions/checkout@v4
with:
repository: ${{ steps.meta.outputs.repository }}
ref: ${{ steps.meta.outputs.sha }}
path: source
fetch-depth: 1
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: 22
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: "1.23"
- name: Prune stale zh-CN pages
run: |
python - <<'PY'
from pathlib import Path
root = Path("docs")
zh_root = root / "zh-CN"
if not zh_root.exists():
raise SystemExit(0)
for path in sorted(zh_root.rglob("*"), reverse=True):
if path.is_dir():
if not any(path.iterdir()):
path.rmdir()
continue
rel = path.relative_to(zh_root)
source = root / rel
if not source.exists():
path.unlink()
for path in sorted(zh_root.rglob("*"), reverse=True):
if path.is_dir() and not any(path.iterdir()):
path.rmdir()
PY
- name: Build pending docs file list
id: pending
run: |
python - <<'PY'
import hashlib
import os
import re
from pathlib import Path
source_hash_re = re.compile(r'^x-i18n:\n(?: .*\n)*? source_hash: ([0-9a-f]{64})$', re.M)
locale_dir_re = re.compile(r"^[a-z]{2,3}(?:-[A-Za-z0-9]{2,8})?$")
locale_dirs = {
path.name
for path in Path("docs").iterdir()
if path.is_dir() and locale_dir_re.match(path.name)
}
def stored_source_hash(path: Path) -> str:
if not path.exists():
return ""
text = path.read_text(encoding="utf-8", errors="ignore")
match = source_hash_re.search(text)
if not match:
return ""
return match.group(1).strip()
all_files = []
pending_files = []
for path in Path("docs").rglob("*"):
if not path.is_file():
continue
if path.suffix.lower() not in {".md", ".mdx"}:
continue
rel = path.as_posix()
rel_doc = path.relative_to("docs")
if rel_doc.parts and rel_doc.parts[0] in locale_dirs:
continue
if rel.startswith("docs/.generated/"):
continue
all_files.append(str(path.resolve()))
zh_path = Path("docs") / "zh-CN" / rel_doc
source_hash = hashlib.sha256(path.read_bytes()).hexdigest()
if stored_source_hash(zh_path) != source_hash:
pending_files.append(str(path.resolve()))
Path(".openclaw-sync").mkdir(exist_ok=True)
Path(".openclaw-sync/docs-i18n-files.txt").write_text("\n".join(pending_files) + ("\n" if pending_files else ""))
print(f"all_docs={len(all_files)} pending_docs={len(pending_files)}")
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as fh:
fh.write(f"all_count={len(all_files)}\n")
fh.write(f"pending_count={len(pending_files)}\n")
PY
- name: Translate changed docs into zh-CN
if: steps.pending.outputs.pending_count != '0'
env:
OPENAI_API_KEY: ${{ secrets.OPENCLAW_DOCS_I18N_OPENAI_API_KEY }}
OPENCLAW_DOCS_I18N_PROVIDER: openai
OPENCLAW_DOCS_I18N_MODEL: gpt-5.4
OPENCLAW_DOCS_I18N_PROMPT_TIMEOUT: 10m
run: |
if [ ! -s .openclaw-sync/docs-i18n-files.txt ]; then
echo "No docs files found."
exit 0
fi
mapfile -t DOC_FILES < .openclaw-sync/docs-i18n-files.txt
attempt=1
max_attempts=5
while [ "$attempt" -le "$max_attempts" ]; do
echo "docs-i18n attempt $attempt/$max_attempts"
if (
cd source/scripts/docs-i18n
go run . \
--docs "$GITHUB_WORKSPACE/docs" \
--lang zh-CN \
--src en \
--mode doc \
--thinking low \
--parallel 8 \
"${DOC_FILES[@]}"
); then
exit 0
fi
if [ "$attempt" -eq "$max_attempts" ]; then
echo "docs-i18n failed after $max_attempts attempts"
exit 1
fi
attempt=$((attempt + 1))
sleep 5
done
- name: Commit zh-CN refresh
run: |
if git diff --quiet -- docs/zh-CN docs/.i18n/zh-CN.tm.jsonl; then
echo "No zh-CN translation changes."
exit 0
fi
git config user.name "openclaw-docs-i18n[bot]"
git config user.email "openclaw-docs-i18n[bot]@users.noreply.github.com"
git add docs/zh-CN docs/.i18n/zh-CN.tm.jsonl
git commit -m "chore(i18n): refresh zh-CN translations"
git push origin HEAD:main

View File

@ -9,23 +9,22 @@ Source of truth lives in [`openclaw/openclaw`](https://github.com/openclaw/openc
1. English docs are authored in `openclaw/openclaw`.
2. `openclaw/openclaw/.github/workflows/docs-sync-publish.yml` mirrors the docs tree into this repo.
3. This repo stores the published docs tree plus generated locale output.
4. `openclaw/docs/.github/workflows/translate-zh-cn.yml`, `translate-ja-jp.yml`, `translate-es.yml`, `translate-pt-br.yml`, `translate-ko.yml`, `translate-de.yml`, `translate-fr.yml`, `translate-ar.yml`, `translate-it.yml`, `translate-tr.yml`, `translate-id.yml`, and `translate-pl.yml` refresh the generated locale trees on a staggered daily schedule, on manual dispatch, and after release dispatches from `openclaw/openclaw`.
4. `openclaw/docs/.github/workflows/translate-zh-cn.yml` and `translate-uk.yml` run on push, schedule, and release dispatch to refresh `docs/zh-CN/**` and `docs/uk/**`.
## Translation behavior
- zh-CN, ja-JP, es, pt-BR, ko, de, fr, ar, it, tr, id, and pl pages are generated output.
- zh-CN and uk pages are generated output.
- Each translated page stores `x-i18n.source_hash`.
- The translate workflow computes a pending file list before calling the model.
- If no English source hashes changed, the workflow skips the expensive translation step entirely.
- If files changed, only the pending files are translated.
- The workflow retries transient model-format failures.
- Published releases in `openclaw/openclaw` dispatch extra locale refreshes so release-adjacent docs updates do not wait for the daily cron.
## Editing rules
- Do not treat this repo as the primary place for English doc edits.
- Make English doc changes in `openclaw/openclaw`, then let sync copy them here.
- Generated locale pages in `docs/zh-CN/**`, `docs/ja-JP/**`, `docs/es/**`, `docs/pt-BR/**`, `docs/ko/**`, `docs/de/**`, `docs/fr/**`, `docs/ar/**`, `docs/it/**`, `docs/tr/**`, `docs/id/**`, and `docs/pl/**` are generated output.
- zh-CN pages in `docs/zh-CN/**` and uk pages in `docs/uk/**` are generated output.
- `.openclaw-sync/source.json` records which `openclaw/openclaw` commit this mirror was synced from.
## Secrets

View File

@ -2,40 +2,12 @@
This folder stores translation config for the source docs repo.
Generated locale trees and live translation memory now live in the publish repo:
Generated locale pages and live locale translation memory now live in the publish repo (`openclaw/docs`, local sibling checkout `~/Projects/openclaw-docs`).
- repo: `openclaw/docs`
- local checkout: `~/Projects/openclaw-docs`
## Files
## Source of truth
- English docs are authored in `openclaw/openclaw`.
- The source docs tree lives under `docs/`.
- The source repo no longer keeps committed generated locale trees such as `docs/zh-CN/**`, `docs/ja-JP/**`, `docs/es/**`, `docs/pt-BR/**`, `docs/ko/**`, `docs/de/**`, `docs/fr/**`, `docs/ar/**`, `docs/it/**`, `docs/tr/**`, `docs/id/**`, or `docs/pl/**`.
## End-to-end flow
1. Edit English docs in `openclaw/openclaw`.
2. Push to `main`.
3. `openclaw/openclaw/.github/workflows/docs-sync-publish.yml` mirrors the docs tree into `openclaw/docs`.
4. The sync script rewrites the publish `docs/docs.json` so the generated locale picker blocks exist there even though they are no longer committed in the source repo.
5. `openclaw/docs/.github/workflows/translate-zh-cn.yml` refreshes `docs/zh-CN/**` once a day, on demand, and after source-repo release dispatches.
6. `openclaw/docs/.github/workflows/translate-ja-jp.yml` does the same for `docs/ja-JP/**`.
7. `openclaw/docs/.github/workflows/translate-es.yml`, `translate-pt-br.yml`, `translate-ko.yml`, `translate-de.yml`, `translate-fr.yml`, `translate-ar.yml`, `translate-it.yml`, `translate-tr.yml`, `translate-id.yml`, and `translate-pl.yml` do the same for `docs/es/**`, `docs/pt-BR/**`, `docs/ko/**`, `docs/de/**`, `docs/fr/**`, `docs/ar/**`, `docs/it/**`, `docs/tr/**`, `docs/id/**`, and `docs/pl/**`.
## Why the split exists
- Keep generated locale output out of the main product repo.
- Keep Mintlify on a single published docs tree.
- Preserve the built-in language switcher by letting the publish repo own generated locale trees.
## Files in this folder
- `glossary.<lang>.json` — preferred term mappings used as prompt guidance.
- `ar-navigation.json`, `de-navigation.json`, `es-navigation.json`, `fr-navigation.json`, `id-navigation.json`, `it-navigation.json`, `ja-navigation.json`, `ko-navigation.json`, `pl-navigation.json`, `pt-BR-navigation.json`, `tr-navigation.json`, `zh-Hans-navigation.json` — Mintlify locale picker blocks reinserted into the publish repo during sync.
- `<lang>.tm.jsonl` — translation memory keyed by workflow + model + text hash.
In this repo, generated locale TM files such as `docs/.i18n/zh-CN.tm.jsonl`, `docs/.i18n/ja-JP.tm.jsonl`, `docs/.i18n/es.tm.jsonl`, `docs/.i18n/pt-BR.tm.jsonl`, `docs/.i18n/ko.tm.jsonl`, `docs/.i18n/de.tm.jsonl`, `docs/.i18n/fr.tm.jsonl`, `docs/.i18n/ar.tm.jsonl`, `docs/.i18n/it.tm.jsonl`, `docs/.i18n/tr.tm.jsonl`, `docs/.i18n/id.tm.jsonl`, and `docs/.i18n/pl.tm.jsonl` are intentionally no longer committed.
- `glossary.<lang>.json` — preferred term mappings (used in prompt guidance).
- `<lang>.tm.jsonl` — translation memory (cache) keyed by workflow + model + text hash. In this repo, locale TM files are generated on demand.
## Glossary format
@ -44,7 +16,9 @@ In this repo, generated locale TM files such as `docs/.i18n/zh-CN.tm.jsonl`, `do
```json
{
"source": "troubleshooting",
"target": "故障排除"
"target": "故障排除",
"ignore_case": true,
"whole_word": false
}
```
@ -53,19 +27,8 @@ Fields:
- `source`: English (or source) phrase to prefer.
- `target`: preferred translation output.
## Translation mechanics
## Notes
- Glossary entries are passed to the model as **prompt guidance** (no deterministic rewrites).
- `scripts/docs-i18n` still owns translation generation.
- Doc mode writes `x-i18n.source_hash` into each translated page.
- Each publish workflow precomputes a pending file list by comparing the current English source hash to the stored locale `x-i18n.source_hash`.
- If the pending count is `0`, the expensive translation step is skipped entirely.
- If there are pending files, the workflow translates only those files.
- The publish workflow retries transient model-format failures, but unchanged files stay skipped because the same hash check runs on each retry.
- The source repo also dispatches zh-CN, ja-JP, es, pt-BR, ko, de, fr, ar, it, tr, id, and pl refreshes after published GitHub releases so release docs can catch up without waiting for the daily cron.
## Operational notes
- Sync metadata is written to `.openclaw-sync/source.json` in the publish repo.
- Source repo secret: `OPENCLAW_DOCS_SYNC_TOKEN`
- Publish repo secret: `OPENCLAW_DOCS_I18N_OPENAI_API_KEY`
- If locale output looks stale, check the matching `Translate <locale>` workflow in `openclaw/docs` first.
- The source repo syncs English docs into the publish repo; locale generation runs there per-locale on push, schedule, and release dispatch.