From d6e2734f7a6ef5154988f5252e0e683ded70d5a1 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 7 May 2026 00:57:40 +0100 Subject: [PATCH] feat: add r2 docs cdn deployment path --- .github/workflows/r2-pages.yml | 73 ++++++++ CLOUDFLARE.md | 284 ++++++++++++++++--------------- README.md | 6 +- package.json | 2 + scripts/docs-site/r2-prepare.mjs | 126 ++++++++++++++ scripts/docs-site/r2-upload.mjs | 107 ++++++++++++ 6 files changed, 460 insertions(+), 138 deletions(-) create mode 100644 .github/workflows/r2-pages.yml create mode 100644 scripts/docs-site/r2-prepare.mjs create mode 100644 scripts/docs-site/r2-upload.mjs diff --git a/.github/workflows/r2-pages.yml b/.github/workflows/r2-pages.yml new file mode 100644 index 000000000..43f684d85 --- /dev/null +++ b/.github/workflows/r2-pages.yml @@ -0,0 +1,73 @@ +name: R2 Pages + +on: + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: r2-pages + cancel-in-progress: false + +jobs: + deploy: + name: Build and upload R2 site + runs-on: ubuntu-latest + timeout-minutes: 60 + environment: + name: cloudflare + url: https://documentation.openclaw.ai + steps: + - name: Check out + uses: actions/checkout@v6 + + - name: Read source metadata + id: source-meta + run: | + node - <<'NODE' + const fs = require("node:fs"); + const data = JSON.parse(fs.readFileSync(".openclaw-sync/source.json", "utf8")); + if (!data.repository || !data.sha) throw new Error("invalid .openclaw-sync/source.json"); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `repository=${data.repository}\n`); + fs.appendFileSync(process.env.GITHUB_OUTPUT, `sha=${data.sha}\n`); + NODE + + - name: Check out OpenClaw source + uses: actions/checkout@v6 + with: + repository: ${{ steps.source-meta.outputs.repository }} + ref: ${{ steps.source-meta.outputs.sha }} + path: source + fetch-depth: 1 + + - name: Set up Node + uses: actions/setup-node@v6 + with: + node-version: 24 + cache: npm + + - name: Install + run: npm ci + + - name: Install librsvg2-bin + run: sudo apt-get update && sudo apt-get install -y librsvg2-bin + + - name: Build R2 artifact + env: + DOCS_SITE_CNAME: documentation.openclaw.ai + DOCS_SOURCE_REPO_DIR: source + DOCS_SOURCE_REPO_URL: https://github.com/${{ steps.source-meta.outputs.repository }} + DOCS_SOURCE_SHA: ${{ steps.source-meta.outputs.sha }} + run: npm run docs:build:r2 + + - name: Smoke generated site + run: npm run docs:smoke + + - name: Upload changed R2 objects + env: + CLOUDFLARE_ACCOUNT_ID: 91b59577e757131d68d55a471fe32aca + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + CLOUDFLARE_R2_BUCKET: openclaw-docs + R2_UPLOAD_CONCURRENCY: 8 + run: npm run docs:r2:upload diff --git a/CLOUDFLARE.md b/CLOUDFLARE.md index ddb11bc65..a45f6e26c 100644 --- a/CLOUDFLARE.md +++ b/CLOUDFLARE.md @@ -2,176 +2,186 @@ Internal notes for `https://documentation.openclaw.ai`. -## Current Setup +## Target Design -- `documentation.openclaw.ai/*` is served by the Cloudflare Worker `openclaw-docs-router`. -- The Worker is deployed from this repo with `wrangler.toml`. -- Static files come from Workers Static Assets, bound as `env.ASSETS`. -- The Worker route is: - - zone: `openclaw.ai` - - account: `Services@openclaw.org` - - account id: `91b59577e757131d68d55a471fe32aca` -- DNS still has a proxied `CNAME` for `documentation.openclaw.ai` pointing at `openclaw.github.io`, but that is only a proxied placeholder. The Worker route handles traffic first. -- `docs.openclaw.ai` still points at Mintlify. +Vincent's design is the desired steady state: -Source files: +- Cloudflare R2 bucket `openclaw-docs` stores the full generated docs site. +- `documentation.openclaw.ai` is served from R2 through Cloudflare's CDN, not through a Worker on normal page traffic. +- `documentation.openclaw.ai/ask-molty/*` stays on the separate Ask Molty Worker. +- The docs site stays static/CDN-first, with full locale HTML, locale markdown, Pagefind search, and source indexes. -- `wrangler.toml` -- `workers/docs-router.ts` -- `.github/workflows/pages.yml` -- `scripts/docs-site/cloudflare-prune.mjs` +The repo-side pieces are in place: -Ops note lives in `~/Projects/manager/DNS.md`. +- `npm run docs:build:r2` +- `scripts/docs-site/r2-prepare.mjs` +- `scripts/docs-site/r2-upload.mjs` +- `.github/workflows/r2-pages.yml` + +`r2-prepare.mjs` writes `dist/docs-r2-manifest.json`. The manifest includes each object key, source file, SHA-256, content type, cache policy, and slashless HTML aliases such as: + +- `/concepts/models` -> `concepts/models/index.html` +- `/concepts/models.md` -> `concepts/models.md` + +`r2-upload.mjs` downloads `.openclaw-docs-r2-manifest.json` from R2, compares hashes and metadata, uploads only changed objects, and then writes the new manifest back. The first upload seeds everything; later uploads should be small. + +## Current Production State + +Production is still on the safe Worker Static Assets fallback until the Cloudflare account can write R2: + +- Worker: `openclaw-docs-router` +- Route: `documentation.openclaw.ai/*` +- Static assets binding: `env.ASSETS` +- Header: `X-OpenClaw-Docs-Origin: cloudflare-static-assets` + +The fallback exists because the Services@openclaw.org Cloudflare token currently cannot access R2. Local verification against account `91b59577e757131d68d55a471fe32aca` fails before bucket operations with Cloudflare API auth error `10000`. + +Do not remove the Worker route or switch `.github/workflows/pages.yml` to R2-only until R2 access is fixed and the R2 workflow has completed successfully. + +## Required Cloudflare Access + +Cloudflare account: + +- account: `Services@openclaw.org` +- account id: `91b59577e757131d68d55a471fe32aca` +- zone: `openclaw.ai` + +Required token scopes: + +- `Account: R2 Storage: Edit` +- `Zone: DNS: Edit` +- `Zone: Cache Rules: Edit` or `Zone: Rulesets: Edit` +- `Zone: Zone Settings: Edit` +- `Zone: Read` + +R2 must be enabled for the account before bucket creation works. ## Deploy Flow -`.github/workflows/pages.yml` runs on `main` pushes that touch docs/build files. +The production fallback workflow remains: -The workflow: +1. `.github/workflows/pages.yml` +2. `npm run docs:build:cloudflare` +3. `npm run docs:smoke` +4. `npx wrangler@4.88.0 deploy --config wrangler.toml` +5. `docs-live-smoke.yml` -1. Checks out this repo. -2. Reads `.openclaw-sync/source.json`. -3. Checks out the matching `openclaw/openclaw` source commit. -4. Runs `npm ci`. -5. Installs `librsvg2-bin` for OG image rendering. -6. Runs `npm run docs:build:cloudflare`. -7. Runs `npm run docs:smoke`. -8. Runs `npx wrangler@4.88.0 deploy --config wrangler.toml`. -9. Dispatches `docs-live-smoke.yml`. +The R2 target workflow is manual until access is fixed: -Required GitHub secret: +1. `.github/workflows/r2-pages.yml` +2. `npm run docs:build:r2` +3. `npm run docs:smoke` +4. `npm run docs:r2:upload` -- `CLOUDFLARE_API_TOKEN`: Services@openclaw.org Cloudflare token with Worker deploy and route permissions. +Local R2 build: -## Runtime Behavior +```sh +npm run docs:build:r2 +``` -`workers/docs-router.ts` handles: +Local R2 upload after access is fixed: -- HTTP to HTTPS redirect. -- Slashless docs URLs: - - `/concepts/models` serves `/concepts/models/index.html`. - - `/concepts/models/` redirects to `/concepts/models`. -- Markdown URLs: - - `/concepts/models.md` serves markdown. - - `Accept: text/markdown` on `/concepts/models` serves `/concepts/models.md`. -- Static asset serving from the `ASSETS` binding. +```sh +source ~/.profile +CLOUDFLARE_ACCOUNT_ID=91b59577e757131d68d55a471fe32aca \ +CLOUDFLARE_R2_BUCKET=openclaw-docs \ +CLOUDFLARE_API_TOKEN="$CRABBOX_CLOUDFLARE_API_TOKEN" \ +npm run docs:r2:upload +``` -The router sets: +## URL Behavior -- `X-OpenClaw-Docs-Origin: cloudflare-static-assets` +The generated R2 manifest uploads both canonical files and slashless aliases: -Use that header to verify traffic is no longer coming from GitHub Pages. +- `/concepts/models` serves HTML from object key `concepts/models`. +- `/concepts/models.md` serves markdown from object key `concepts/models.md`. +- `/docs/platforms/digitalocean` serves the compatibility redirect HTML. -Ask Molty is separate: +Plain R2 custom domains cannot do `Accept: text/markdown` negotiation by themselves. To keep the request path Worker-free, prefer explicit `.md` URLs. If `Accept: text/markdown` must stay, add a tiny Worker in front of only that behavior or keep the current router. -- `documentation.openclaw.ai/ask-molty/*` routes to Worker `openclaw-docs-chat-proxy`. -- That Worker is managed from `~/Projects/manager`. -- It should continue to take precedence over the docs static route. +Root `/` may need a Cloudflare URL rewrite to `/index.html`, depending on R2 custom-domain behavior at cutover time. Test it before removing the fallback Worker. -## Cloudflare Limits +## Cache Policy -Cloudflare Workers Static Assets currently limits asset files per Worker version: +`r2-prepare.mjs` assigns per-object `Cache-Control`: -- Free: `20,000` -- Paid: `100,000` +- hashed/static assets: `public, max-age=31536000, immutable` +- HTML and slashless HTML aliases: `public, max-age=60, s-maxage=86400, stale-while-revalidate=604800` +- markdown, JSON, JSONL, and text indexes: `public, max-age=300, s-maxage=3600, stale-while-revalidate=86400` +- upload manifest: `private, max-age=0, no-store` -Individual static asset file size limit: +Recommended Cloudflare cache rules: -- `25 MiB` +1. Cache static assets and Pagefind files for one year. +2. Cache HTML at the edge for one day with short browser TTL. +3. Cache `.md`, `.txt`, `.json`, and `.jsonl` for one hour at the edge. +4. Bypass cache for `/ask-molty/*`. -Official docs: +After cutover, verify repeated requests show `cf-cache-status: MISS` then `HIT`. -- https://developers.cloudflare.com/workers/platform/limits/ -- https://developers.cloudflare.com/workers/static-assets/billing-and-limitations/ +## Cutover Checklist -The unpruned docs build exceeded the Free limit: - -- `36,872` asset manifest files -- deploy error: `Invalid manifest: manifest contains 36,872 files which exceeds the limit of 20,000` - -After pruning, the deploy fit: - -- `13,834` asset manifest files - -## Pruning - -`npm run docs:build:cloudflare` runs the normal docs build and then `scripts/docs-site/cloudflare-prune.mjs`. - -The prune step keeps: - -- all English HTML pages -- all localized HTML pages -- English `.md` endpoints -- static assets -- generated source indexes - -The prune step removes: - -- localized `.md` duplicates, such as `/it/channels.md` -- stale/junk files such as `.DS_Store` - -The prune step also rebuilds Pagefind from canonical English HTML pages only. - -User-visible tradeoff: - -- Localized docs pages still work. -- English markdown endpoints still work. -- `Accept: text/markdown` works for English docs. -- Localized markdown endpoints such as `/it/channels.md` return `404`. -- Search is currently English-only after pruning. - -This is intentional while the Cloudflare account is on the Free static asset file limit. - -## If Cloudflare Is Upgraded - -If Services@openclaw.org gets Workers Paid or another limit increase: - -1. Remove or relax `scripts/docs-site/cloudflare-prune.mjs`. -2. Change `.github/workflows/pages.yml` back to `npm run docs:build` if no deploy pruning is needed. -3. Keep `workers/docs-router.ts` and `wrangler.toml`; they are still the right hosting model. -4. Re-run `npm run docs:build`. -5. Check file count: - - ```sh - find dist/docs-site -type f | wc -l - ``` - -6. Deploy: +1. Enable R2 on the Services@openclaw.org account. +2. Fix the GitHub `CLOUDFLARE_API_TOKEN` scopes listed above. +3. Create the bucket: ```sh source ~/.profile - CLOUDFLARE_API_TOKEN="$CRABBOX_CLOUDFLARE_API_TOKEN" npx wrangler deploy --config wrangler.toml + CLOUDFLARE_ACCOUNT_ID=91b59577e757131d68d55a471fe32aca \ + CLOUDFLARE_API_TOKEN="$CRABBOX_CLOUDFLARE_API_TOKEN" \ + npx wrangler@4.88.0 r2 bucket create openclaw-docs ``` -7. Live-test: +4. Run the manual `R2 Pages` workflow, or run the local upload command above. +5. Attach the R2 custom domain for `documentation.openclaw.ai`. +6. Add or verify Cloudflare rules: + - `/` rewrites to `/index.html` if needed. + - non-root trailing-slash docs paths redirect to slashless paths. + - cache rules match the policy above. + - `/ask-molty/*` remains routed to `openclaw-docs-chat-proxy`. +7. Remove the `documentation.openclaw.ai/*` route from `openclaw-docs-router`. +8. Purge Cloudflare cache. +9. Live-test the URLs below. + +## Live Smoke + +Use these after every deploy: + +```sh +curl -I https://documentation.openclaw.ai/ +curl -I https://documentation.openclaw.ai/start/getting-started +curl -I https://documentation.openclaw.ai/concepts/models +curl -I https://documentation.openclaw.ai/concepts/models.md +curl -I https://documentation.openclaw.ai/docs/platforms/digitalocean +curl -I https://documentation.openclaw.ai/llms-full.txt +curl -I https://documentation.openclaw.ai/assets/docs-site.css +curl -i https://documentation.openclaw.ai/ask-molty/api/session +``` + +Expected after R2 cutover: + +- slashless HTML paths return `200`. +- `.md` paths return `text/markdown`. +- static assets become `cf-cache-status: HIT` on repeat requests. +- `/ask-molty/api/session` returns `401` when logged out. +- no `X-OpenClaw-Docs-Origin: cloudflare-static-assets` header on normal docs pages. + +Expected before R2 cutover: + +- the same URLs work through the Worker Static Assets fallback. +- docs responses include `X-OpenClaw-Docs-Origin: cloudflare-static-assets`. + +## Rollback + +If R2 cutover misbehaves: + +1. Re-add the `documentation.openclaw.ai/*` route to `openclaw-docs-router`. +2. Re-run `.github/workflows/pages.yml` or deploy locally: ```sh - curl -I https://documentation.openclaw.ai/concepts/models - curl -I https://documentation.openclaw.ai/concepts/models/ - curl -I https://documentation.openclaw.ai/concepts/models.md - curl -I -H 'Accept: text/markdown' https://documentation.openclaw.ai/concepts/models - curl -I https://documentation.openclaw.ai/it/channels - curl -I https://documentation.openclaw.ai/pagefind/pagefind.js - curl -i https://documentation.openclaw.ai/ask-molty/api/session + source ~/.profile + CLOUDFLARE_API_TOKEN="$CRABBOX_CLOUDFLARE_API_TOKEN" npx wrangler@4.88.0 deploy --config wrangler.toml ``` -Expected highlights: - -- `/concepts/models`: `200`, `text/html`, `X-OpenClaw-Docs-Origin: cloudflare-static-assets` -- `/concepts/models/`: `308` to `/concepts/models` -- `/concepts/models.md`: `200`, markdown -- `Accept: text/markdown`: `200`, `text/markdown`, `Vary: Accept` -- `/ask-molty/api/session`: `401` when logged out - -## Why Not Cloudflare Pages - -Cloudflare Pages project creation/listing was blocked by the available Services@openclaw.org token. - -Workers Static Assets was deployable with the existing token and gives us the router behavior we need: - -- slashless canonical URLs -- markdown negotiation -- same hostname with Ask Molty Worker route -- static asset hosting without GitHub Pages as origin - -If we later switch to Cloudflare Pages, keep a Worker in front or use Pages Functions for the markdown negotiation. A plain static Pages project would lose `.md` and `Accept: text/markdown` behavior. +3. Purge Cloudflare cache. +4. Re-run the live smoke. diff --git a/README.md b/README.md index f733ba6a5..7eb5df19e 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,8 @@ Source of truth lives in [`openclaw/openclaw`](https://github.com/openclaw/openc 2. `openclaw/openclaw/.github/workflows/docs-sync-publish.yml` mirrors the docs tree into this repo. 3. This repo stores the published docs tree plus generated locale output. 4. `openclaw/docs/.github/workflows/translate-all.yml` debounces docs changes, runs locale translation in parallel, and commits one aggregate locale refresh. -5. `.github/workflows/pages.yml` builds `dist/docs-site` from the mirrored docs and deploys it to Cloudflare Workers Static Assets. +5. `.github/workflows/pages.yml` builds `dist/docs-site` from the mirrored docs and deploys the current production fallback to Cloudflare Workers Static Assets. +6. `.github/workflows/r2-pages.yml` builds the full unpruned R2 artifact for the target Cloudflare CDN design. ## Translation behavior @@ -34,6 +35,8 @@ Source of truth lives in [`openclaw/openclaw`](https://github.com/openclaw/openc - `npm run docs:build` renders the mirrored Mintlify-flavored docs into `dist/docs-site`. - `npm run docs:build:cloudflare` prunes deploy-only duplicates so the Worker asset manifest stays below Cloudflare Free's 20,000-file limit. +- `npm run docs:build:r2` renders the full unpruned site and prepares `dist/docs-r2-manifest.json` for R2 upload. +- `npm run docs:r2:upload` uploads only changed R2 objects by comparing against the remote manifest. - `npm run docs:smoke` checks representative English and locale pages plus the Pagefind search bundle. - `npm run docs:check` runs both steps. - The generated site includes the language picker and static full-text search via Pagefind. @@ -45,3 +48,4 @@ Source of truth lives in [`openclaw/openclaw`](https://github.com/openclaw/openc - `OPENCLAW_DOCS_SYNC_TOKEN` lives in `openclaw/openclaw` and lets the source repo push into this repo. - `OPENCLAW_DOCS_I18N_OPENAI_API_KEY` lives in this repo and powers locale translation refreshes. - `CLOUDFLARE_API_TOKEN` lives in this repo and deploys `documentation.openclaw.ai`. +- The R2 deploy path needs the same GitHub secret to include `Account: R2 Storage: Edit` for the Services@openclaw.org account before it can become production. diff --git a/package.json b/package.json index 96c817bdd..64609c312 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,8 @@ "scripts": { "docs:build": "node scripts/docs-site/build.mjs && node scripts/docs-site/source-index.mjs && pagefind --site dist/docs-site --output-path dist/docs-site/pagefind", "docs:build:cloudflare": "npm run docs:build && node scripts/docs-site/cloudflare-prune.mjs", + "docs:build:r2": "npm run docs:build && node scripts/docs-site/r2-prepare.mjs", + "docs:r2:upload": "node scripts/docs-site/r2-upload.mjs", "docs:smoke": "node scripts/docs-site/smoke.mjs", "docs:check": "npm run docs:build && npm run docs:smoke" }, diff --git a/scripts/docs-site/r2-prepare.mjs b/scripts/docs-site/r2-prepare.mjs new file mode 100644 index 000000000..ebc7e8899 --- /dev/null +++ b/scripts/docs-site/r2-prepare.mjs @@ -0,0 +1,126 @@ +#!/usr/bin/env node +import crypto from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; + +const root = process.cwd(); +const sourceDir = path.join(root, "dist", "docs-site"); +const outputDir = path.join(root, "dist", "docs-r2"); +const manifestPath = path.join(root, "dist", "docs-r2-manifest.json"); + +if (!fs.existsSync(sourceDir)) throw new Error("dist/docs-site does not exist; run docs:build first"); + +fs.rmSync(outputDir, { recursive: true, force: true }); +copyTree(sourceDir, outputDir); + +const entries = []; +for (const file of walk(outputDir)) { + const key = toKey(path.relative(outputDir, file)); + entries.push(entryFor(key, file, key)); +} + +for (const file of walk(outputDir)) { + const rel = toKey(path.relative(outputDir, file)); + if (!rel.endsWith("/index.html") || rel === "index.html") continue; + const slashlessKey = rel.slice(0, -"/index.html".length); + entries.push(entryFor(slashlessKey, file, rel)); +} + +entries.sort((a, b) => a.key.localeCompare(b.key)); + +const manifest = { + version: 1, + generatedAt: new Date().toISOString(), + sourceDir: "dist/docs-site", + outputDir: "dist/docs-r2", + objectCount: entries.length, + entries, +}; +fs.writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`); + +const physicalFiles = countFiles(outputDir); +const virtualFiles = entries.length - physicalFiles; +console.log(`r2 prepare ok: ${physicalFiles} files, ${virtualFiles} slashless html aliases, ${entries.length} objects`); + +function copyTree(from, to) { + fs.mkdirSync(to, { recursive: true }); + for (const entry of fs.readdirSync(from, { withFileTypes: true })) { + const source = path.join(from, entry.name); + const target = path.join(to, entry.name); + if (entry.isDirectory()) { + copyTree(source, target); + } else if (entry.isFile()) { + fs.copyFileSync(source, target); + } + } +} + +function entryFor(key, file, sourceKey) { + const data = fs.readFileSync(file); + return { + key, + sourceKey, + file: toKey(path.relative(root, file)), + size: data.byteLength, + sha256: crypto.createHash("sha256").update(data).digest("hex"), + contentType: contentTypeFor(key), + cacheControl: cacheControlFor(key), + }; +} + +function contentTypeFor(key) { + const ext = path.extname(key).toLowerCase(); + if (!ext || key.endsWith("/index.html")) return "text/html; charset=utf-8"; + switch (ext) { + case ".avif": return "image/avif"; + case ".css": return "text/css; charset=utf-8"; + case ".gif": return "image/gif"; + case ".html": return "text/html; charset=utf-8"; + case ".ico": return "image/x-icon"; + case ".jpeg": + case ".jpg": return "image/jpeg"; + case ".js": return "text/javascript; charset=utf-8"; + case ".json": return "application/json; charset=utf-8"; + case ".jsonl": return "application/x-ndjson; charset=utf-8"; + case ".md": return "text/markdown; charset=utf-8"; + case ".png": return "image/png"; + case ".svg": return "image/svg+xml"; + case ".txt": return "text/plain; charset=utf-8"; + case ".webp": return "image/webp"; + case ".wasm": return "application/wasm"; + case ".xml": return "application/xml; charset=utf-8"; + default: return "application/octet-stream"; + } +} + +function cacheControlFor(key) { + if (key === "CNAME") return "public, max-age=300, s-maxage=300"; + if (key.endsWith(".html") || !path.extname(key)) { + return "public, max-age=60, s-maxage=86400, stale-while-revalidate=604800"; + } + if (key.endsWith(".md") || key.endsWith(".txt") || key.endsWith(".json") || key.endsWith(".jsonl")) { + return "public, max-age=300, s-maxage=3600, stale-while-revalidate=86400"; + } + return "public, max-age=31536000, immutable"; +} + +function countFiles(dir) { + let count = 0; + for (const _file of walk(dir)) count += 1; + return count; +} + +function* walk(dir) { + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + yield* walk(fullPath); + } else if (entry.isFile()) { + yield fullPath; + } + } +} + +function toKey(value) { + return value.split(path.sep).join("/"); +} diff --git a/scripts/docs-site/r2-upload.mjs b/scripts/docs-site/r2-upload.mjs new file mode 100644 index 000000000..7b2fde6b9 --- /dev/null +++ b/scripts/docs-site/r2-upload.mjs @@ -0,0 +1,107 @@ +#!/usr/bin/env node +import { spawn } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const root = process.cwd(); +const bucket = process.env.CLOUDFLARE_R2_BUCKET || "openclaw-docs"; +const manifestPath = path.join(root, "dist", "docs-r2-manifest.json"); +const remoteManifestKey = ".openclaw-docs-r2-manifest.json"; +const concurrency = Number.parseInt(process.env.R2_UPLOAD_CONCURRENCY || "8", 10); + +if (!Number.isFinite(concurrency) || concurrency < 1) throw new Error("R2_UPLOAD_CONCURRENCY must be a positive integer"); +if (!fs.existsSync(manifestPath)) throw new Error("dist/docs-r2-manifest.json does not exist; run docs:build:r2 first"); +if (!process.env.CLOUDFLARE_API_TOKEN) throw new Error("CLOUDFLARE_API_TOKEN is required"); +if (!process.env.CLOUDFLARE_ACCOUNT_ID) throw new Error("CLOUDFLARE_ACCOUNT_ID is required"); + +const manifest = JSON.parse(fs.readFileSync(manifestPath, "utf8")); +const remoteManifest = await getRemoteManifest(); +const remoteEntries = new Map((remoteManifest?.entries || []).map((entry) => [entry.key, entry])); +const changed = manifest.entries.filter((entry) => { + const remote = remoteEntries.get(entry.key); + return !remote + || remote.sha256 !== entry.sha256 + || remote.contentType !== entry.contentType + || remote.cacheControl !== entry.cacheControl; +}); + +console.log(`r2 upload plan: ${changed.length}/${manifest.entries.length} changed objects for ${bucket}`); +await uploadEntries(changed); +await putObject({ + key: remoteManifestKey, + file: manifestPath, + contentType: "application/json; charset=utf-8", + cacheControl: "private, max-age=0, no-store", +}); +console.log(`r2 upload ok: ${changed.length} changed objects plus ${remoteManifestKey}`); + +async function getRemoteManifest() { + const tempFile = path.join(os.tmpdir(), `openclaw-docs-r2-manifest-${process.pid}.json`); + try { + const result = await runWrangler([ + "r2", + "object", + "get", + `${bucket}/${remoteManifestKey}`, + "--file", + tempFile, + "--remote", + ], { quiet: true, allowFailure: true }); + if (result.code !== 0 || !fs.existsSync(tempFile)) return null; + return JSON.parse(fs.readFileSync(tempFile, "utf8")); + } catch { + return null; + } finally { + fs.rmSync(tempFile, { force: true }); + } +} + +async function uploadEntries(entries) { + let next = 0; + const workers = Array.from({ length: Math.min(concurrency, entries.length) }, async () => { + while (next < entries.length) { + const entry = entries[next++]; + await putObject(entry); + } + }); + await Promise.all(workers); +} + +async function putObject(entry) { + const args = [ + "r2", + "object", + "put", + `${bucket}/${entry.key}`, + "--file", + path.isAbsolute(entry.file) ? entry.file : path.join(root, entry.file), + "--content-type", + entry.contentType, + "--cache-control", + entry.cacheControl, + "--remote", + "--force", + ]; + const result = await runWrangler(args); + if (result.code !== 0) throw new Error(`wrangler failed uploading ${entry.key}`); +} + +function runWrangler(args, options = {}) { + return new Promise((resolve) => { + const child = spawn("npx", ["wrangler@4.88.0", ...args], { + cwd: root, + env: process.env, + stdio: options.quiet ? ["ignore", "pipe", "pipe"] : "inherit", + }); + let output = ""; + if (options.quiet) { + child.stdout.on("data", (chunk) => { output += chunk; }); + child.stderr.on("data", (chunk) => { output += chunk; }); + } + child.on("close", (code) => { + if (code !== 0 && !options.allowFailure && options.quiet) process.stderr.write(output); + resolve({ code, output }); + }); + }); +}