From fe0587f2a6a5bc8fb9f4faa644c15fc6b5350cca Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 25 Apr 2026 16:16:28 -0700 Subject: [PATCH] feat(ci): classify compatibility policy results --- crabpot.ci-policy.json | 41 ++++ crabpot.ci-policy.schema.json | 53 ++++++ package.json | 1 + scripts/check-ci-policy.mjs | 340 ++++++++++++++++++++++++++++++++++ test/ci-policy.test.mjs | 119 ++++++++++++ 5 files changed, 554 insertions(+) create mode 100644 crabpot.ci-policy.json create mode 100644 crabpot.ci-policy.schema.json create mode 100644 scripts/check-ci-policy.mjs create mode 100644 test/ci-policy.test.mjs diff --git a/crabpot.ci-policy.json b/crabpot.ci-policy.json new file mode 100644 index 0000000..c533e91 --- /dev/null +++ b/crabpot.ci-policy.json @@ -0,0 +1,41 @@ +{ + "$schema": "./crabpot.ci-policy.schema.json", + "version": 1, + "allowedBlocked": [ + { + "id": "channel-runtime-harness", + "seam": "registerChannel", + "reasonIncludes": "includeChannelRuntime=true", + "decision": "allowed-blocked", + "until": "channel runtime harness lands" + }, + { + "id": "service-lifecycle-harness", + "seam": "registerService", + "reasonIncludes": "lifecycle", + "decision": "allowed-blocked", + "until": "service lifecycle harness lands" + } + ], + "expectedWarnings": [ + { + "id": "tool-factory-descriptor", + "seam": "registerTool", + "reasonIncludes": "no object descriptor", + "decision": "expected-warning", + "until": "tool factory capture expansion lands" + } + ], + "thresholds": { + "wallP95RegressionPercent": 50, + "peakRssRegressionMb": 50, + "bootRegressionMs": 500, + "strictMinimumSamples": 3 + }, + "fixtureSets": { + "smoke": ["wecom", "llm-trace-phoenix", "codex-app-server", "opik-openclaw"], + "sdk-alias": ["codex-app-server"], + "side-effect-review": ["hasdata", "llm-trace-phoenix", "web-search-plus"], + "build": ["agentchat", "clawmetry", "opik-openclaw"] + } +} diff --git a/crabpot.ci-policy.schema.json b/crabpot.ci-policy.schema.json new file mode 100644 index 0000000..0c3d636 --- /dev/null +++ b/crabpot.ci-policy.schema.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Crabpot CI policy", + "type": "object", + "required": ["version", "allowedBlocked", "expectedWarnings", "thresholds", "fixtureSets"], + "additionalProperties": false, + "properties": { + "version": { "type": "integer", "minimum": 1 }, + "allowedBlocked": { "$ref": "#/$defs/rules" }, + "expectedWarnings": { "$ref": "#/$defs/rules" }, + "thresholds": { + "type": "object", + "required": [ + "wallP95RegressionPercent", + "peakRssRegressionMb", + "bootRegressionMs", + "strictMinimumSamples" + ], + "additionalProperties": false, + "properties": { + "wallP95RegressionPercent": { "type": "number", "minimum": 0 }, + "peakRssRegressionMb": { "type": "number", "minimum": 0 }, + "bootRegressionMs": { "type": "number", "minimum": 0 }, + "strictMinimumSamples": { "type": "integer", "minimum": 1 } + } + }, + "fixtureSets": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { "type": "string", "pattern": "^[a-z0-9][a-z0-9-]*$" }, + "uniqueItems": true + } + } + }, + "$defs": { + "rules": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "seam", "reasonIncludes", "decision", "until"], + "additionalProperties": false, + "properties": { + "id": { "type": "string", "pattern": "^[a-z0-9][a-z0-9-]*$" }, + "seam": { "type": "string", "minLength": 1 }, + "reasonIncludes": { "type": "string", "minLength": 1 }, + "decision": { "enum": ["allowed-blocked", "expected-warning"] }, + "until": { "type": "string", "minLength": 1 } + } + } + } + } +} diff --git a/package.json b/package.json index db166c7..8ff8d7b 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ }, "scripts": { "check": "npm test && node scripts/sync-fixtures.mjs --check && node scripts/run-contract-smoke.mjs && node scripts/inspect-fixtures.mjs --check && node scripts/generate-report.mjs --check && node scripts/capture-contracts.mjs --check && node scripts/synthetic-probes.mjs --check && node scripts/cold-import-readiness.mjs --check && node scripts/workspace-plan.mjs --check && node scripts/profile-contract-runtime.mjs --check && node scripts/check-contract-coverage.mjs", + "ci:policy": "node scripts/check-ci-policy.mjs", "contract:capture": "node scripts/capture-contracts.mjs", "contract:coverage": "node scripts/check-contract-coverage.mjs", "contract:synthetic": "node scripts/synthetic-probes.mjs", diff --git a/scripts/check-ci-policy.mjs b/scripts/check-ci-policy.mjs new file mode 100644 index 0000000..1bfde66 --- /dev/null +++ b/scripts/check-ci-policy.mjs @@ -0,0 +1,340 @@ +#!/usr/bin/env node +import { existsSync } from "node:fs"; +import { mkdir, readFile, writeFile } from "node:fs/promises"; +import path from "node:path"; +import { pathToFileURL } from "node:url"; +import { repoRoot } from "./manifest-lib.mjs"; +import { buildReport, defaultJsonReportPath } from "./report-lib.mjs"; +import { defaultExecutionResultsJsonPath } from "./summarize-execution-results.mjs"; +import { defaultRefDiffJsonPath } from "./compare-openclaw-refs.mjs"; + +export const defaultCiPolicyPath = path.join(repoRoot, "crabpot.ci-policy.json"); +export const defaultCiPolicyReportJsonPath = path.join(repoRoot, "reports/crabpot-ci-policy.json"); +export const defaultCiPolicyReportMarkdownPath = path.join(repoRoot, "reports/crabpot-ci-policy.md"); + +if (import.meta.url === pathToFileURL(process.argv[1]).href) { + await main(); +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + const report = await buildCiPolicyReport({ + executionResultsPath: args.executionResultsPath, + policyPath: args.policyPath, + refDiffPath: args.refDiffPath, + reportPath: args.reportPath, + strict: args.strict, + }); + const errors = validateCiPolicyReport(report); + + if (args.write) { + await writeCiPolicyReport(report); + } + + if (args.json) { + process.stdout.write(`${JSON.stringify(report, null, 2)}\n`); + } else { + console.log( + `ci policy: ${report.status}; ${report.summary.failCount} fail, ${report.summary.warnCount} warn, ${report.summary.passCount} pass`, + ); + } + + if (errors.length > 0) { + throw new Error(errors.join("\n")); + } +} + +function parseArgs(argv) { + const args = { + executionResultsPath: defaultExecutionResultsJsonPath, + json: false, + policyPath: defaultCiPolicyPath, + refDiffPath: defaultRefDiffJsonPath, + reportPath: defaultJsonReportPath, + strict: false, + write: true, + }; + + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + if (arg === "--check") { + args.write = false; + continue; + } + if (arg === "--json") { + args.json = true; + continue; + } + if (arg === "--policy") { + args.policyPath = path.resolve(argv[index + 1]); + index += 1; + continue; + } + if (arg === "--report") { + args.reportPath = path.resolve(argv[index + 1]); + index += 1; + continue; + } + if (arg === "--execution-results") { + args.executionResultsPath = path.resolve(argv[index + 1]); + index += 1; + continue; + } + if (arg === "--ref-diff") { + args.refDiffPath = path.resolve(argv[index + 1]); + index += 1; + continue; + } + if (arg === "--strict") { + args.strict = true; + continue; + } + if (arg === "--write") { + args.write = true; + } + } + + return args; +} + +export async function buildCiPolicyReport(options = {}) { + const policy = options.policy ?? (await readJson(options.policyPath ?? defaultCiPolicyPath)); + validatePolicy(policy); + const compatibilityReport = + options.compatibilityReport ?? + (await readOptionalJson(options.reportPath ?? defaultJsonReportPath)) ?? + (await buildReport({ generatedAt: "deterministic" })); + const executionResults = + options.executionResults ?? + (await readOptionalJson(options.executionResultsPath ?? defaultExecutionResultsJsonPath)); + const refDiff = options.refDiff ?? (await readOptionalJson(options.refDiffPath ?? defaultRefDiffJsonPath)); + + const checks = [ + ...compatibilityChecks(compatibilityReport), + ...refDiffChecks(refDiff, { strict: options.strict }), + ...executionChecks(executionResults, policy, { strict: options.strict }), + ].sort((left, right) => actionRank(left.action) - actionRank(right.action) || left.id.localeCompare(right.id)); + + return { + generatedAt: "deterministic", + status: checks.some((check) => check.action === "fail") ? "fail" : "pass", + strict: Boolean(options.strict), + policy: { + allowedBlocked: policy.allowedBlocked.length, + expectedWarnings: policy.expectedWarnings.length, + fixtureSets: Object.keys(policy.fixtureSets).sort(), + thresholds: policy.thresholds, + }, + summary: { + checkCount: checks.length, + failCount: checks.filter((check) => check.action === "fail").length, + warnCount: checks.filter((check) => check.action === "warn").length, + passCount: checks.filter((check) => check.action === "pass").length, + }, + checks, + }; +} + +function compatibilityChecks(report) { + const checks = []; + if (!report) { + checks.push({ + id: "compatibility-report.missing", + action: "fail", + message: "compatibility report is missing", + evidence: [defaultJsonReportPath], + }); + return checks; + } + checks.push({ + id: "compatibility-report.breakages", + action: report.summary.breakageCount > 0 ? "fail" : "pass", + message: `${report.summary.breakageCount} hard breakages`, + evidence: (report.breakages ?? []).map((finding) => `${finding.fixture}:${finding.code}`), + }); + checks.push({ + id: "compatibility-report.p1-issues", + action: "pass", + message: `${report.summary.p1IssueCount} P1 issues tracked`, + evidence: (report.issues ?? []) + .filter((issue) => issue.severity === "P1") + .map((issue) => `${issue.fixture}:${issue.code}`), + }); + return checks; +} + +function refDiffChecks(refDiff, options) { + if (!refDiff) { + return [ + { + id: "ref-diff.not-run", + action: "pass", + message: "ref diff artifact was not present for this CI mode", + evidence: [], + }, + ]; + } + + return (refDiff.regressions ?? []).map((regression) => ({ + id: `ref-diff.${regression.code}`, + action: regression.action === "fail" || (options.strict && regression.action === "warn") ? "fail" : "warn", + message: regression.message, + evidence: regression.evidence ?? [], + })); +} + +function executionChecks(executionResults, policy, options) { + if (!executionResults) { + return [ + { + id: "execution-results.not-run", + action: "pass", + message: "isolated execution artifact was not present for this CI mode", + evidence: [], + }, + ]; + } + + const checks = [ + { + id: "execution-results.failures", + action: executionResults.summary.failCount > 0 ? "fail" : "pass", + message: `${executionResults.summary.failCount} failed synthetic probes`, + evidence: failedExecutionEvidence(executionResults), + }, + ]; + + const blocked = executionResults.artifacts.flatMap((artifact) => + (artifact.blocked ?? []).map((item) => ({ artifact, item })), + ); + for (const blockedItem of blocked) { + const expectedWarning = findPolicyMatch(policy.expectedWarnings, blockedItem.item); + const allowedBlocked = findPolicyMatch(policy.allowedBlocked, blockedItem.item); + const match = expectedWarning ?? allowedBlocked; + checks.push({ + id: `execution-results.blocked.${blockedItem.artifact.fixture}.${blockedItem.item.seam}.${blockedItem.item.captureIndex}`, + action: match ? (options.strict ? "fail" : "warn") : "fail", + message: match + ? `${match.decision}: ${blockedItem.item.reason}` + : `unknown blocked synthetic probe: ${blockedItem.item.reason}`, + evidence: [ + blockedItem.artifact.artifactPath, + blockedItem.item.seam, + blockedItem.item.reason, + match?.id ?? "unclassified", + ], + }); + } + return checks; +} + +function findPolicyMatch(rules, item) { + return rules.find((rule) => item.seam === rule.seam && item.reason?.includes(rule.reasonIncludes)); +} + +function failedExecutionEvidence(executionResults) { + return executionResults.artifacts.flatMap((artifact) => + (artifact.failures ?? []).map((failure) => `${artifact.fixture}:${failure.seam}:${failure.error}`), + ); +} + +export function validateCiPolicyReport(report) { + return report.checks + .filter((check) => check.action === "fail") + .map((check) => `${check.id}: ${check.message}: ${check.evidence.join(", ")}`); +} + +export async function writeCiPolicyReport(report, options = {}) { + const jsonPath = options.jsonPath ?? defaultCiPolicyReportJsonPath; + const markdownPath = options.markdownPath ?? defaultCiPolicyReportMarkdownPath; + await mkdir(path.dirname(jsonPath), { recursive: true }); + await mkdir(path.dirname(markdownPath), { recursive: true }); + await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8"); + await writeFile(markdownPath, `${renderCiPolicyMarkdown(report)}\n`, "utf8"); + return { jsonPath, markdownPath }; +} + +export function renderCiPolicyMarkdown(report) { + return [ + "# Crabpot CI Policy", + "", + `Generated: ${report.generatedAt}`, + `Status: ${report.status.toUpperCase()}`, + `Strict: ${report.strict}`, + "", + "## Summary", + "", + markdownTable( + [ + ["Checks", report.summary.checkCount], + ["Fail", report.summary.failCount], + ["Warn", report.summary.warnCount], + ["Pass", report.summary.passCount], + ["Allowed blocked rules", report.policy.allowedBlocked], + ["Expected warning rules", report.policy.expectedWarnings], + ["Fixture sets", report.policy.fixtureSets.join(", ")], + ], + ["Metric", "Value"], + ), + "", + "## Checks", + "", + markdownTable( + report.checks.map((check) => [ + check.action, + check.id, + check.message, + check.evidence.join(", ") || "-", + ]), + ["Action", "ID", "Message", "Evidence"], + ), + ].join("\n"); +} + +async function readJson(jsonPath) { + return JSON.parse(await readFile(jsonPath, "utf8")); +} + +async function readOptionalJson(jsonPath) { + return existsSync(jsonPath) ? readJson(jsonPath) : null; +} + +function validatePolicy(policy) { + const errors = []; + if (policy.version !== 1) { + errors.push("ci policy version must be 1"); + } + for (const key of ["allowedBlocked", "expectedWarnings"]) { + if (!Array.isArray(policy[key])) { + errors.push(`ci policy ${key} must be an array`); + } + } + if (!policy.thresholds || typeof policy.thresholds !== "object") { + errors.push("ci policy thresholds are required"); + } + if (!policy.fixtureSets || typeof policy.fixtureSets !== "object") { + errors.push("ci policy fixtureSets are required"); + } + if (errors.length > 0) { + throw new Error(errors.join("\n")); + } +} + +function actionRank(value) { + return { fail: 0, warn: 1, pass: 2 }[value] ?? 3; +} + +function markdownTable(rows, headers) { + if (rows.length === 0) { + return "_none_"; + } + + const allRows = [headers, ...rows.map((row) => row.map((cell) => String(cell ?? "-")))]; + const widths = headers.map((_, columnIndex) => Math.max(...allRows.map((row) => row[columnIndex].length))); + const renderRow = (row) => `| ${row.map((cell, index) => cell.padEnd(widths[index])).join(" | ")} |`; + return [ + renderRow(headers), + renderRow(widths.map((width) => "-".repeat(width))), + ...rows.map((row) => renderRow(row.map((cell) => String(cell ?? "-")))), + ].join("\n"); +} diff --git a/test/ci-policy.test.mjs b/test/ci-policy.test.mjs new file mode 100644 index 0000000..e9f676e --- /dev/null +++ b/test/ci-policy.test.mjs @@ -0,0 +1,119 @@ +import assert from "node:assert/strict"; +import { test } from "node:test"; +import { + buildCiPolicyReport, + renderCiPolicyMarkdown, + validateCiPolicyReport, +} from "../scripts/check-ci-policy.mjs"; + +const policy = { + version: 1, + allowedBlocked: [ + { + id: "channel-runtime-harness", + seam: "registerChannel", + reasonIncludes: "includeChannelRuntime=true", + decision: "allowed-blocked", + until: "channel runtime harness lands", + }, + ], + expectedWarnings: [ + { + id: "tool-factory-descriptor", + seam: "registerTool", + reasonIncludes: "no object descriptor", + decision: "expected-warning", + until: "tool factory capture expansion lands", + }, + ], + thresholds: { + wallP95RegressionPercent: 50, + peakRssRegressionMb: 50, + bootRegressionMs: 500, + strictMinimumSamples: 3, + }, + fixtureSets: { + smoke: ["wecom"], + }, +}; + +test("ci policy allows known blocked probes but fails unknown blockers", async () => { + const report = await buildCiPolicyReport({ + policy, + compatibilityReport: compatibilityReport(), + executionResults: executionResults([ + { + seam: "registerChannel", + reason: "captured registration requires includeChannelRuntime=true", + }, + { + seam: "registerMystery", + reason: "new blocked reason", + }, + ]), + }); + + assert.equal(report.status, "fail"); + assert.ok(report.checks.some((check) => check.action === "warn" && check.id.includes("registerChannel"))); + assert.ok(validateCiPolicyReport(report).some((error) => error.includes("registerMystery"))); + assert.match(renderCiPolicyMarkdown(report), /Crabpot CI Policy/); +}); + +test("ci policy fails ref diff hard regressions", async () => { + const report = await buildCiPolicyReport({ + policy, + compatibilityReport: compatibilityReport(), + refDiff: { + regressions: [ + { + code: "hookNames.removed-used", + action: "fail", + message: "Hook names removed values used by fixtures", + evidence: ["llm_output"], + }, + ], + }, + }); + + assert.equal(report.status, "fail"); + assert.ok(validateCiPolicyReport(report).some((error) => error.includes("hookNames.removed-used"))); +}); + +function compatibilityReport() { + return { + summary: { + breakageCount: 0, + p1IssueCount: 1, + }, + breakages: [], + issues: [ + { + severity: "P1", + fixture: "fixture", + code: "registration-capture-gap", + }, + ], + }; +} + +function executionResults(blocked) { + return { + summary: { + failCount: 0, + }, + artifacts: [ + { + fixture: "fixture", + artifactPath: ".crabpot/results/fixture/result.synthetic.json", + failures: [], + blocked: blocked.map((item, index) => ({ + captureIndex: index, + kind: "registration", + label: item.seam, + status: "blocked", + ...item, + })), + }, + ], + }; +}