feat: publish source search index
This commit is contained in:
parent
0eebe13de9
commit
403b82445d
22
.github/workflows/pages.yml
vendored
22
.github/workflows/pages.yml
vendored
@ -34,6 +34,25 @@ jobs:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Read source metadata
|
||||
id: source-meta
|
||||
run: |
|
||||
node - <<'NODE'
|
||||
const fs = require("node:fs");
|
||||
const data = JSON.parse(fs.readFileSync(".openclaw-sync/source.json", "utf8"));
|
||||
if (!data.repository || !data.sha) throw new Error("invalid .openclaw-sync/source.json");
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `repository=${data.repository}\n`);
|
||||
fs.appendFileSync(process.env.GITHUB_OUTPUT, `sha=${data.sha}\n`);
|
||||
NODE
|
||||
|
||||
- name: Check out OpenClaw source
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
repository: ${{ steps.source-meta.outputs.repository }}
|
||||
ref: ${{ steps.source-meta.outputs.sha }}
|
||||
path: source
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
@ -46,6 +65,9 @@ jobs:
|
||||
- name: Build
|
||||
env:
|
||||
DOCS_SITE_CNAME: documentation.openclaw.ai
|
||||
DOCS_SOURCE_REPO_DIR: source
|
||||
DOCS_SOURCE_REPO_URL: https://github.com/${{ steps.source-meta.outputs.repository }}
|
||||
DOCS_SOURCE_SHA: ${{ steps.source-meta.outputs.sha }}
|
||||
run: npm run docs:build
|
||||
|
||||
- name: Smoke generated site
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"docs:build": "node scripts/docs-site/build.mjs && pagefind --site dist/docs-site --output-path dist/docs-site/pagefind",
|
||||
"docs:build": "node scripts/docs-site/build.mjs && node scripts/docs-site/source-index.mjs && pagefind --site dist/docs-site --output-path dist/docs-site/pagefind",
|
||||
"docs:smoke": "node scripts/docs-site/smoke.mjs",
|
||||
"docs:check": "npm run docs:build && npm run docs:smoke"
|
||||
},
|
||||
|
||||
256
scripts/docs-site/source-index.mjs
Normal file
256
scripts/docs-site/source-index.mjs
Normal file
@ -0,0 +1,256 @@
|
||||
#!/usr/bin/env node
|
||||
import { execFileSync } from "node:child_process";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
const root = process.cwd();
|
||||
const outDir = path.join(root, "dist", "docs-site");
|
||||
const sourceMetaPath = path.join(root, ".openclaw-sync", "source.json");
|
||||
const defaultRepoUrl = "https://github.com/openclaw/openclaw";
|
||||
const maxFileBytes = 180_000;
|
||||
const maxSearchChars = 600;
|
||||
const maxIndexBytes = 18 * 1024 * 1024;
|
||||
|
||||
const includeExts = new Set([
|
||||
".cjs",
|
||||
".css",
|
||||
".go",
|
||||
".gql",
|
||||
".graphql",
|
||||
".html",
|
||||
".java",
|
||||
".js",
|
||||
".json",
|
||||
".jsonc",
|
||||
".jsx",
|
||||
".kt",
|
||||
".mjs",
|
||||
".py",
|
||||
".rb",
|
||||
".rs",
|
||||
".scss",
|
||||
".sh",
|
||||
".sql",
|
||||
".swift",
|
||||
".toml",
|
||||
".ts",
|
||||
".tsx",
|
||||
".vue",
|
||||
".yaml",
|
||||
".yml",
|
||||
]);
|
||||
|
||||
const excludedPrefixes = [
|
||||
".git/",
|
||||
".github/codeql/",
|
||||
"docs/",
|
||||
"node_modules/",
|
||||
"vendor/",
|
||||
];
|
||||
|
||||
const excludedParts = new Set([
|
||||
"__fixtures__",
|
||||
"__snapshots__",
|
||||
".next",
|
||||
".turbo",
|
||||
"coverage",
|
||||
"dist",
|
||||
"generated",
|
||||
"node_modules",
|
||||
"snapshots",
|
||||
]);
|
||||
|
||||
const excludedFiles = new Set([
|
||||
"package-lock.json",
|
||||
"pnpm-lock.yaml",
|
||||
"yarn.lock",
|
||||
]);
|
||||
|
||||
fs.mkdirSync(outDir, { recursive: true });
|
||||
|
||||
const sourceMeta = readJson(sourceMetaPath) ?? {};
|
||||
const sourceDir = resolveSourceDir();
|
||||
const outPath = path.join(outDir, "source-index.jsonl");
|
||||
const metaPath = path.join(outDir, "source-index-meta.json");
|
||||
|
||||
if (!sourceDir) {
|
||||
if (process.env.DOCS_SOURCE_REPO_DIR) {
|
||||
throw new Error(`DOCS_SOURCE_REPO_DIR not found: ${process.env.DOCS_SOURCE_REPO_DIR}`);
|
||||
}
|
||||
writeEmptyIndex("source checkout not found");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const repoUrl = normalizeRepoUrl(process.env.DOCS_SOURCE_REPO_URL ?? repoUrlFromGit(sourceDir) ?? defaultRepoUrl);
|
||||
const sourceSha = process.env.DOCS_SOURCE_SHA ?? sourceMeta.sha ?? git(sourceDir, ["rev-parse", "HEAD"]);
|
||||
const files = git(sourceDir, ["ls-files"]).split("\n").filter(Boolean).filter(shouldIndexFile).sort(compareFilePriority);
|
||||
|
||||
let bytes = 0;
|
||||
let recordCount = 0;
|
||||
let skippedLarge = 0;
|
||||
let skippedBudget = 0;
|
||||
const output = fs.createWriteStream(outPath, { encoding: "utf8" });
|
||||
|
||||
for (const rel of files) {
|
||||
const full = path.join(sourceDir, rel);
|
||||
let stat;
|
||||
try {
|
||||
stat = fs.statSync(full);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (!stat.isFile()) continue;
|
||||
if (stat.size > maxFileBytes) {
|
||||
skippedLarge += 1;
|
||||
continue;
|
||||
}
|
||||
const text = fs.readFileSync(full, "utf8");
|
||||
if (text.includes("\0") || !text.trim()) continue;
|
||||
const search = searchTextForFile(rel, text);
|
||||
if (!search) continue;
|
||||
const record = {
|
||||
path: rel,
|
||||
url: `${repoUrl}/blob/${sourceSha}/${encodeURI(rel)}`,
|
||||
rawUrl: rawUrlFor(repoUrl, sourceSha, rel),
|
||||
commit: `${repoUrl}/commit/${sourceSha}`,
|
||||
search,
|
||||
};
|
||||
const line = `${JSON.stringify(record)}\n`;
|
||||
const lineBytes = Buffer.byteLength(line);
|
||||
if (bytes + lineBytes > maxIndexBytes) {
|
||||
skippedBudget += 1;
|
||||
continue;
|
||||
}
|
||||
output.write(line);
|
||||
bytes += lineBytes;
|
||||
recordCount += 1;
|
||||
}
|
||||
|
||||
await new Promise((resolve) => output.end(resolve));
|
||||
|
||||
const meta = {
|
||||
repository: sourceMeta.repository ?? "openclaw/openclaw",
|
||||
repoUrl,
|
||||
sha: sourceSha,
|
||||
sourceDir: path.relative(root, sourceDir),
|
||||
records: recordCount,
|
||||
bytes,
|
||||
filesConsidered: files.length,
|
||||
skippedLarge,
|
||||
skippedBudget,
|
||||
generatedAt: new Date().toISOString(),
|
||||
};
|
||||
fs.writeFileSync(metaPath, `${JSON.stringify(meta, null, 2)}\n`, "utf8");
|
||||
console.log(`indexed ${recordCount} source files from ${files.length} files (${Math.round(bytes / 1024)} KiB)`);
|
||||
if (skippedLarge || skippedBudget) {
|
||||
console.log(`source index skips: large=${skippedLarge} budget=${skippedBudget}`);
|
||||
}
|
||||
|
||||
function resolveSourceDir() {
|
||||
const candidates = [
|
||||
process.env.DOCS_SOURCE_REPO_DIR,
|
||||
path.join(root, "source"),
|
||||
path.join(root, "..", "openclaw-source"),
|
||||
path.join(root, "..", "openclaw"),
|
||||
path.join(root, "..", "clawdbot5"),
|
||||
path.join(root, "..", "clawdbot"),
|
||||
].filter(Boolean);
|
||||
for (const candidate of candidates) {
|
||||
const full = path.resolve(candidate);
|
||||
if (!fs.existsSync(path.join(full, ".git"))) continue;
|
||||
try {
|
||||
const files = git(full, ["ls-files", "src"]).split("\n").filter(Boolean);
|
||||
if (files.length > 100) return full;
|
||||
} catch {
|
||||
// Try the next candidate.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function shouldIndexFile(rel) {
|
||||
if (excludedFiles.has(path.basename(rel))) return false;
|
||||
if (["AGENTS.md", "CLAUDE.md"].includes(path.basename(rel))) return false;
|
||||
if (excludedPrefixes.some((prefix) => rel.startsWith(prefix))) return false;
|
||||
if (rel.split("/").some((part) => excludedParts.has(part))) return false;
|
||||
const ext = path.extname(rel);
|
||||
if (includeExts.has(ext)) return true;
|
||||
return !rel.includes("/") && ["Dockerfile", "Makefile", "README.md", "CHANGELOG.md", "CONTRIBUTING.md", "SECURITY.md", "VISION.md"].includes(path.basename(rel));
|
||||
}
|
||||
|
||||
function compareFilePriority(a, b) {
|
||||
return filePriority(a) - filePriority(b) || a.localeCompare(b);
|
||||
}
|
||||
|
||||
function filePriority(rel) {
|
||||
if (/^(src|extensions|packages)\//.test(rel)) return 0;
|
||||
if (/^(apps|ui|scripts|skills|config)\//.test(rel)) return 1;
|
||||
if (/^\.github\//.test(rel)) return 2;
|
||||
if (/^(qa|security)\//.test(rel)) return 3;
|
||||
if (/^(test|patches)\//.test(rel)) return 4;
|
||||
return rel.includes("/") ? 5 : 1;
|
||||
}
|
||||
|
||||
function searchTextForFile(rel, text) {
|
||||
const lines = text.replace(/\r\n/g, "\n").split("\n");
|
||||
const chosen = [];
|
||||
for (let i = 0; i < lines.length; i += 1) {
|
||||
const line = lines[i].trim();
|
||||
if (!line) continue;
|
||||
if (i < 40 || isSearchSignal(line)) chosen.push(`${i + 1}: ${line}`);
|
||||
if (chosen.join("\n").length >= maxSearchChars) break;
|
||||
}
|
||||
return chosen.join("\n").slice(0, maxSearchChars).trim();
|
||||
}
|
||||
|
||||
function isSearchSignal(line) {
|
||||
return /^(#{1,4}\s|import\s|export\s|module\.exports|async\s+function\s|function\s|class\s|interface\s|type\s|enum\s|const\s|let\s|var\s|def\s|class\s|func\s|struct\s|protocol\s|extension\s|describe\s*\(|it\s*\(|test\s*\(|name:\s|command:\s|on:\s|jobs:\s)/.test(line);
|
||||
}
|
||||
|
||||
function rawUrlFor(repoUrl, sha, rel) {
|
||||
const match = repoUrl.match(/^https:\/\/github\.com\/([^/]+)\/([^/]+)$/);
|
||||
if (!match) return "";
|
||||
return `https://raw.githubusercontent.com/${match[1]}/${match[2]}/${sha}/${encodeURI(rel)}`;
|
||||
}
|
||||
|
||||
function languageForPath(rel) {
|
||||
const ext = path.extname(rel).replace(/^\./, "");
|
||||
if (ext) return ext;
|
||||
if (path.basename(rel) === "Dockerfile") return "dockerfile";
|
||||
return "text";
|
||||
}
|
||||
|
||||
function readJson(file) {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(file, "utf8"));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function writeEmptyIndex(reason) {
|
||||
fs.writeFileSync(outPath, "", "utf8");
|
||||
fs.writeFileSync(metaPath, `${JSON.stringify({ records: 0, reason, generatedAt: new Date().toISOString() }, null, 2)}\n`, "utf8");
|
||||
console.warn(`source index skipped: ${reason}`);
|
||||
}
|
||||
|
||||
function repoUrlFromGit(dir) {
|
||||
try {
|
||||
const remotes = git(dir, ["remote", "-v"]).split("\n");
|
||||
const origin = remotes.find((line) => line.startsWith("origin\t") && line.includes("(fetch)")) ?? remotes.find((line) => line.includes("(fetch)"));
|
||||
return origin?.split(/\s+/)[1];
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeRepoUrl(value) {
|
||||
return String(value)
|
||||
.replace(/\.git$/, "")
|
||||
.replace(/^git@github\.com:/, "https://github.com/")
|
||||
.replace(/^ssh:\/\/git@github\.com\//, "https://github.com/")
|
||||
.replace(/\/$/, "");
|
||||
}
|
||||
|
||||
function git(dir, args) {
|
||||
return execFileSync("git", ["-C", dir, ...args], { encoding: "utf8" }).trim();
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user