feat(ci): classify compatibility policy results
This commit is contained in:
parent
8cdca2d019
commit
fe0587f2a6
41
crabpot.ci-policy.json
Normal file
41
crabpot.ci-policy.json
Normal file
@ -0,0 +1,41 @@
|
||||
{
|
||||
"$schema": "./crabpot.ci-policy.schema.json",
|
||||
"version": 1,
|
||||
"allowedBlocked": [
|
||||
{
|
||||
"id": "channel-runtime-harness",
|
||||
"seam": "registerChannel",
|
||||
"reasonIncludes": "includeChannelRuntime=true",
|
||||
"decision": "allowed-blocked",
|
||||
"until": "channel runtime harness lands"
|
||||
},
|
||||
{
|
||||
"id": "service-lifecycle-harness",
|
||||
"seam": "registerService",
|
||||
"reasonIncludes": "lifecycle",
|
||||
"decision": "allowed-blocked",
|
||||
"until": "service lifecycle harness lands"
|
||||
}
|
||||
],
|
||||
"expectedWarnings": [
|
||||
{
|
||||
"id": "tool-factory-descriptor",
|
||||
"seam": "registerTool",
|
||||
"reasonIncludes": "no object descriptor",
|
||||
"decision": "expected-warning",
|
||||
"until": "tool factory capture expansion lands"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"wallP95RegressionPercent": 50,
|
||||
"peakRssRegressionMb": 50,
|
||||
"bootRegressionMs": 500,
|
||||
"strictMinimumSamples": 3
|
||||
},
|
||||
"fixtureSets": {
|
||||
"smoke": ["wecom", "llm-trace-phoenix", "codex-app-server", "opik-openclaw"],
|
||||
"sdk-alias": ["codex-app-server"],
|
||||
"side-effect-review": ["hasdata", "llm-trace-phoenix", "web-search-plus"],
|
||||
"build": ["agentchat", "clawmetry", "opik-openclaw"]
|
||||
}
|
||||
}
|
||||
53
crabpot.ci-policy.schema.json
Normal file
53
crabpot.ci-policy.schema.json
Normal file
@ -0,0 +1,53 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Crabpot CI policy",
|
||||
"type": "object",
|
||||
"required": ["version", "allowedBlocked", "expectedWarnings", "thresholds", "fixtureSets"],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"version": { "type": "integer", "minimum": 1 },
|
||||
"allowedBlocked": { "$ref": "#/$defs/rules" },
|
||||
"expectedWarnings": { "$ref": "#/$defs/rules" },
|
||||
"thresholds": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"wallP95RegressionPercent",
|
||||
"peakRssRegressionMb",
|
||||
"bootRegressionMs",
|
||||
"strictMinimumSamples"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"wallP95RegressionPercent": { "type": "number", "minimum": 0 },
|
||||
"peakRssRegressionMb": { "type": "number", "minimum": 0 },
|
||||
"bootRegressionMs": { "type": "number", "minimum": 0 },
|
||||
"strictMinimumSamples": { "type": "integer", "minimum": 1 }
|
||||
}
|
||||
},
|
||||
"fixtureSets": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "array",
|
||||
"items": { "type": "string", "pattern": "^[a-z0-9][a-z0-9-]*$" },
|
||||
"uniqueItems": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"$defs": {
|
||||
"rules": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["id", "seam", "reasonIncludes", "decision", "until"],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"id": { "type": "string", "pattern": "^[a-z0-9][a-z0-9-]*$" },
|
||||
"seam": { "type": "string", "minLength": 1 },
|
||||
"reasonIncludes": { "type": "string", "minLength": 1 },
|
||||
"decision": { "enum": ["allowed-blocked", "expected-warning"] },
|
||||
"until": { "type": "string", "minLength": 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -9,6 +9,7 @@
|
||||
},
|
||||
"scripts": {
|
||||
"check": "npm test && node scripts/sync-fixtures.mjs --check && node scripts/run-contract-smoke.mjs && node scripts/inspect-fixtures.mjs --check && node scripts/generate-report.mjs --check && node scripts/capture-contracts.mjs --check && node scripts/synthetic-probes.mjs --check && node scripts/cold-import-readiness.mjs --check && node scripts/workspace-plan.mjs --check && node scripts/profile-contract-runtime.mjs --check && node scripts/check-contract-coverage.mjs",
|
||||
"ci:policy": "node scripts/check-ci-policy.mjs",
|
||||
"contract:capture": "node scripts/capture-contracts.mjs",
|
||||
"contract:coverage": "node scripts/check-contract-coverage.mjs",
|
||||
"contract:synthetic": "node scripts/synthetic-probes.mjs",
|
||||
|
||||
340
scripts/check-ci-policy.mjs
Normal file
340
scripts/check-ci-policy.mjs
Normal file
@ -0,0 +1,340 @@
|
||||
#!/usr/bin/env node
|
||||
import { existsSync } from "node:fs";
|
||||
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { pathToFileURL } from "node:url";
|
||||
import { repoRoot } from "./manifest-lib.mjs";
|
||||
import { buildReport, defaultJsonReportPath } from "./report-lib.mjs";
|
||||
import { defaultExecutionResultsJsonPath } from "./summarize-execution-results.mjs";
|
||||
import { defaultRefDiffJsonPath } from "./compare-openclaw-refs.mjs";
|
||||
|
||||
export const defaultCiPolicyPath = path.join(repoRoot, "crabpot.ci-policy.json");
|
||||
export const defaultCiPolicyReportJsonPath = path.join(repoRoot, "reports/crabpot-ci-policy.json");
|
||||
export const defaultCiPolicyReportMarkdownPath = path.join(repoRoot, "reports/crabpot-ci-policy.md");
|
||||
|
||||
if (import.meta.url === pathToFileURL(process.argv[1]).href) {
|
||||
await main();
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
const report = await buildCiPolicyReport({
|
||||
executionResultsPath: args.executionResultsPath,
|
||||
policyPath: args.policyPath,
|
||||
refDiffPath: args.refDiffPath,
|
||||
reportPath: args.reportPath,
|
||||
strict: args.strict,
|
||||
});
|
||||
const errors = validateCiPolicyReport(report);
|
||||
|
||||
if (args.write) {
|
||||
await writeCiPolicyReport(report);
|
||||
}
|
||||
|
||||
if (args.json) {
|
||||
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
||||
} else {
|
||||
console.log(
|
||||
`ci policy: ${report.status}; ${report.summary.failCount} fail, ${report.summary.warnCount} warn, ${report.summary.passCount} pass`,
|
||||
);
|
||||
}
|
||||
|
||||
if (errors.length > 0) {
|
||||
throw new Error(errors.join("\n"));
|
||||
}
|
||||
}
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = {
|
||||
executionResultsPath: defaultExecutionResultsJsonPath,
|
||||
json: false,
|
||||
policyPath: defaultCiPolicyPath,
|
||||
refDiffPath: defaultRefDiffJsonPath,
|
||||
reportPath: defaultJsonReportPath,
|
||||
strict: false,
|
||||
write: true,
|
||||
};
|
||||
|
||||
for (let index = 0; index < argv.length; index += 1) {
|
||||
const arg = argv[index];
|
||||
if (arg === "--check") {
|
||||
args.write = false;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--json") {
|
||||
args.json = true;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--policy") {
|
||||
args.policyPath = path.resolve(argv[index + 1]);
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--report") {
|
||||
args.reportPath = path.resolve(argv[index + 1]);
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--execution-results") {
|
||||
args.executionResultsPath = path.resolve(argv[index + 1]);
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--ref-diff") {
|
||||
args.refDiffPath = path.resolve(argv[index + 1]);
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--strict") {
|
||||
args.strict = true;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--write") {
|
||||
args.write = true;
|
||||
}
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
export async function buildCiPolicyReport(options = {}) {
|
||||
const policy = options.policy ?? (await readJson(options.policyPath ?? defaultCiPolicyPath));
|
||||
validatePolicy(policy);
|
||||
const compatibilityReport =
|
||||
options.compatibilityReport ??
|
||||
(await readOptionalJson(options.reportPath ?? defaultJsonReportPath)) ??
|
||||
(await buildReport({ generatedAt: "deterministic" }));
|
||||
const executionResults =
|
||||
options.executionResults ??
|
||||
(await readOptionalJson(options.executionResultsPath ?? defaultExecutionResultsJsonPath));
|
||||
const refDiff = options.refDiff ?? (await readOptionalJson(options.refDiffPath ?? defaultRefDiffJsonPath));
|
||||
|
||||
const checks = [
|
||||
...compatibilityChecks(compatibilityReport),
|
||||
...refDiffChecks(refDiff, { strict: options.strict }),
|
||||
...executionChecks(executionResults, policy, { strict: options.strict }),
|
||||
].sort((left, right) => actionRank(left.action) - actionRank(right.action) || left.id.localeCompare(right.id));
|
||||
|
||||
return {
|
||||
generatedAt: "deterministic",
|
||||
status: checks.some((check) => check.action === "fail") ? "fail" : "pass",
|
||||
strict: Boolean(options.strict),
|
||||
policy: {
|
||||
allowedBlocked: policy.allowedBlocked.length,
|
||||
expectedWarnings: policy.expectedWarnings.length,
|
||||
fixtureSets: Object.keys(policy.fixtureSets).sort(),
|
||||
thresholds: policy.thresholds,
|
||||
},
|
||||
summary: {
|
||||
checkCount: checks.length,
|
||||
failCount: checks.filter((check) => check.action === "fail").length,
|
||||
warnCount: checks.filter((check) => check.action === "warn").length,
|
||||
passCount: checks.filter((check) => check.action === "pass").length,
|
||||
},
|
||||
checks,
|
||||
};
|
||||
}
|
||||
|
||||
function compatibilityChecks(report) {
|
||||
const checks = [];
|
||||
if (!report) {
|
||||
checks.push({
|
||||
id: "compatibility-report.missing",
|
||||
action: "fail",
|
||||
message: "compatibility report is missing",
|
||||
evidence: [defaultJsonReportPath],
|
||||
});
|
||||
return checks;
|
||||
}
|
||||
checks.push({
|
||||
id: "compatibility-report.breakages",
|
||||
action: report.summary.breakageCount > 0 ? "fail" : "pass",
|
||||
message: `${report.summary.breakageCount} hard breakages`,
|
||||
evidence: (report.breakages ?? []).map((finding) => `${finding.fixture}:${finding.code}`),
|
||||
});
|
||||
checks.push({
|
||||
id: "compatibility-report.p1-issues",
|
||||
action: "pass",
|
||||
message: `${report.summary.p1IssueCount} P1 issues tracked`,
|
||||
evidence: (report.issues ?? [])
|
||||
.filter((issue) => issue.severity === "P1")
|
||||
.map((issue) => `${issue.fixture}:${issue.code}`),
|
||||
});
|
||||
return checks;
|
||||
}
|
||||
|
||||
function refDiffChecks(refDiff, options) {
|
||||
if (!refDiff) {
|
||||
return [
|
||||
{
|
||||
id: "ref-diff.not-run",
|
||||
action: "pass",
|
||||
message: "ref diff artifact was not present for this CI mode",
|
||||
evidence: [],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return (refDiff.regressions ?? []).map((regression) => ({
|
||||
id: `ref-diff.${regression.code}`,
|
||||
action: regression.action === "fail" || (options.strict && regression.action === "warn") ? "fail" : "warn",
|
||||
message: regression.message,
|
||||
evidence: regression.evidence ?? [],
|
||||
}));
|
||||
}
|
||||
|
||||
function executionChecks(executionResults, policy, options) {
|
||||
if (!executionResults) {
|
||||
return [
|
||||
{
|
||||
id: "execution-results.not-run",
|
||||
action: "pass",
|
||||
message: "isolated execution artifact was not present for this CI mode",
|
||||
evidence: [],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const checks = [
|
||||
{
|
||||
id: "execution-results.failures",
|
||||
action: executionResults.summary.failCount > 0 ? "fail" : "pass",
|
||||
message: `${executionResults.summary.failCount} failed synthetic probes`,
|
||||
evidence: failedExecutionEvidence(executionResults),
|
||||
},
|
||||
];
|
||||
|
||||
const blocked = executionResults.artifacts.flatMap((artifact) =>
|
||||
(artifact.blocked ?? []).map((item) => ({ artifact, item })),
|
||||
);
|
||||
for (const blockedItem of blocked) {
|
||||
const expectedWarning = findPolicyMatch(policy.expectedWarnings, blockedItem.item);
|
||||
const allowedBlocked = findPolicyMatch(policy.allowedBlocked, blockedItem.item);
|
||||
const match = expectedWarning ?? allowedBlocked;
|
||||
checks.push({
|
||||
id: `execution-results.blocked.${blockedItem.artifact.fixture}.${blockedItem.item.seam}.${blockedItem.item.captureIndex}`,
|
||||
action: match ? (options.strict ? "fail" : "warn") : "fail",
|
||||
message: match
|
||||
? `${match.decision}: ${blockedItem.item.reason}`
|
||||
: `unknown blocked synthetic probe: ${blockedItem.item.reason}`,
|
||||
evidence: [
|
||||
blockedItem.artifact.artifactPath,
|
||||
blockedItem.item.seam,
|
||||
blockedItem.item.reason,
|
||||
match?.id ?? "unclassified",
|
||||
],
|
||||
});
|
||||
}
|
||||
return checks;
|
||||
}
|
||||
|
||||
function findPolicyMatch(rules, item) {
|
||||
return rules.find((rule) => item.seam === rule.seam && item.reason?.includes(rule.reasonIncludes));
|
||||
}
|
||||
|
||||
function failedExecutionEvidence(executionResults) {
|
||||
return executionResults.artifacts.flatMap((artifact) =>
|
||||
(artifact.failures ?? []).map((failure) => `${artifact.fixture}:${failure.seam}:${failure.error}`),
|
||||
);
|
||||
}
|
||||
|
||||
export function validateCiPolicyReport(report) {
|
||||
return report.checks
|
||||
.filter((check) => check.action === "fail")
|
||||
.map((check) => `${check.id}: ${check.message}: ${check.evidence.join(", ")}`);
|
||||
}
|
||||
|
||||
export async function writeCiPolicyReport(report, options = {}) {
|
||||
const jsonPath = options.jsonPath ?? defaultCiPolicyReportJsonPath;
|
||||
const markdownPath = options.markdownPath ?? defaultCiPolicyReportMarkdownPath;
|
||||
await mkdir(path.dirname(jsonPath), { recursive: true });
|
||||
await mkdir(path.dirname(markdownPath), { recursive: true });
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
await writeFile(markdownPath, `${renderCiPolicyMarkdown(report)}\n`, "utf8");
|
||||
return { jsonPath, markdownPath };
|
||||
}
|
||||
|
||||
export function renderCiPolicyMarkdown(report) {
|
||||
return [
|
||||
"# Crabpot CI Policy",
|
||||
"",
|
||||
`Generated: ${report.generatedAt}`,
|
||||
`Status: ${report.status.toUpperCase()}`,
|
||||
`Strict: ${report.strict}`,
|
||||
"",
|
||||
"## Summary",
|
||||
"",
|
||||
markdownTable(
|
||||
[
|
||||
["Checks", report.summary.checkCount],
|
||||
["Fail", report.summary.failCount],
|
||||
["Warn", report.summary.warnCount],
|
||||
["Pass", report.summary.passCount],
|
||||
["Allowed blocked rules", report.policy.allowedBlocked],
|
||||
["Expected warning rules", report.policy.expectedWarnings],
|
||||
["Fixture sets", report.policy.fixtureSets.join(", ")],
|
||||
],
|
||||
["Metric", "Value"],
|
||||
),
|
||||
"",
|
||||
"## Checks",
|
||||
"",
|
||||
markdownTable(
|
||||
report.checks.map((check) => [
|
||||
check.action,
|
||||
check.id,
|
||||
check.message,
|
||||
check.evidence.join(", ") || "-",
|
||||
]),
|
||||
["Action", "ID", "Message", "Evidence"],
|
||||
),
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
async function readJson(jsonPath) {
|
||||
return JSON.parse(await readFile(jsonPath, "utf8"));
|
||||
}
|
||||
|
||||
async function readOptionalJson(jsonPath) {
|
||||
return existsSync(jsonPath) ? readJson(jsonPath) : null;
|
||||
}
|
||||
|
||||
function validatePolicy(policy) {
|
||||
const errors = [];
|
||||
if (policy.version !== 1) {
|
||||
errors.push("ci policy version must be 1");
|
||||
}
|
||||
for (const key of ["allowedBlocked", "expectedWarnings"]) {
|
||||
if (!Array.isArray(policy[key])) {
|
||||
errors.push(`ci policy ${key} must be an array`);
|
||||
}
|
||||
}
|
||||
if (!policy.thresholds || typeof policy.thresholds !== "object") {
|
||||
errors.push("ci policy thresholds are required");
|
||||
}
|
||||
if (!policy.fixtureSets || typeof policy.fixtureSets !== "object") {
|
||||
errors.push("ci policy fixtureSets are required");
|
||||
}
|
||||
if (errors.length > 0) {
|
||||
throw new Error(errors.join("\n"));
|
||||
}
|
||||
}
|
||||
|
||||
function actionRank(value) {
|
||||
return { fail: 0, warn: 1, pass: 2 }[value] ?? 3;
|
||||
}
|
||||
|
||||
function markdownTable(rows, headers) {
|
||||
if (rows.length === 0) {
|
||||
return "_none_";
|
||||
}
|
||||
|
||||
const allRows = [headers, ...rows.map((row) => row.map((cell) => String(cell ?? "-")))];
|
||||
const widths = headers.map((_, columnIndex) => Math.max(...allRows.map((row) => row[columnIndex].length)));
|
||||
const renderRow = (row) => `| ${row.map((cell, index) => cell.padEnd(widths[index])).join(" | ")} |`;
|
||||
return [
|
||||
renderRow(headers),
|
||||
renderRow(widths.map((width) => "-".repeat(width))),
|
||||
...rows.map((row) => renderRow(row.map((cell) => String(cell ?? "-")))),
|
||||
].join("\n");
|
||||
}
|
||||
119
test/ci-policy.test.mjs
Normal file
119
test/ci-policy.test.mjs
Normal file
@ -0,0 +1,119 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { test } from "node:test";
|
||||
import {
|
||||
buildCiPolicyReport,
|
||||
renderCiPolicyMarkdown,
|
||||
validateCiPolicyReport,
|
||||
} from "../scripts/check-ci-policy.mjs";
|
||||
|
||||
const policy = {
|
||||
version: 1,
|
||||
allowedBlocked: [
|
||||
{
|
||||
id: "channel-runtime-harness",
|
||||
seam: "registerChannel",
|
||||
reasonIncludes: "includeChannelRuntime=true",
|
||||
decision: "allowed-blocked",
|
||||
until: "channel runtime harness lands",
|
||||
},
|
||||
],
|
||||
expectedWarnings: [
|
||||
{
|
||||
id: "tool-factory-descriptor",
|
||||
seam: "registerTool",
|
||||
reasonIncludes: "no object descriptor",
|
||||
decision: "expected-warning",
|
||||
until: "tool factory capture expansion lands",
|
||||
},
|
||||
],
|
||||
thresholds: {
|
||||
wallP95RegressionPercent: 50,
|
||||
peakRssRegressionMb: 50,
|
||||
bootRegressionMs: 500,
|
||||
strictMinimumSamples: 3,
|
||||
},
|
||||
fixtureSets: {
|
||||
smoke: ["wecom"],
|
||||
},
|
||||
};
|
||||
|
||||
test("ci policy allows known blocked probes but fails unknown blockers", async () => {
|
||||
const report = await buildCiPolicyReport({
|
||||
policy,
|
||||
compatibilityReport: compatibilityReport(),
|
||||
executionResults: executionResults([
|
||||
{
|
||||
seam: "registerChannel",
|
||||
reason: "captured registration requires includeChannelRuntime=true",
|
||||
},
|
||||
{
|
||||
seam: "registerMystery",
|
||||
reason: "new blocked reason",
|
||||
},
|
||||
]),
|
||||
});
|
||||
|
||||
assert.equal(report.status, "fail");
|
||||
assert.ok(report.checks.some((check) => check.action === "warn" && check.id.includes("registerChannel")));
|
||||
assert.ok(validateCiPolicyReport(report).some((error) => error.includes("registerMystery")));
|
||||
assert.match(renderCiPolicyMarkdown(report), /Crabpot CI Policy/);
|
||||
});
|
||||
|
||||
test("ci policy fails ref diff hard regressions", async () => {
|
||||
const report = await buildCiPolicyReport({
|
||||
policy,
|
||||
compatibilityReport: compatibilityReport(),
|
||||
refDiff: {
|
||||
regressions: [
|
||||
{
|
||||
code: "hookNames.removed-used",
|
||||
action: "fail",
|
||||
message: "Hook names removed values used by fixtures",
|
||||
evidence: ["llm_output"],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(report.status, "fail");
|
||||
assert.ok(validateCiPolicyReport(report).some((error) => error.includes("hookNames.removed-used")));
|
||||
});
|
||||
|
||||
function compatibilityReport() {
|
||||
return {
|
||||
summary: {
|
||||
breakageCount: 0,
|
||||
p1IssueCount: 1,
|
||||
},
|
||||
breakages: [],
|
||||
issues: [
|
||||
{
|
||||
severity: "P1",
|
||||
fixture: "fixture",
|
||||
code: "registration-capture-gap",
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
function executionResults(blocked) {
|
||||
return {
|
||||
summary: {
|
||||
failCount: 0,
|
||||
},
|
||||
artifacts: [
|
||||
{
|
||||
fixture: "fixture",
|
||||
artifactPath: ".crabpot/results/fixture/result.synthetic.json",
|
||||
failures: [],
|
||||
blocked: blocked.map((item, index) => ({
|
||||
captureIndex: index,
|
||||
kind: "registration",
|
||||
label: item.seam,
|
||||
status: "blocked",
|
||||
...item,
|
||||
})),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user