refactor: add compact report summary

This commit is contained in:
Shakker 2026-05-07 15:19:07 +01:00
parent 2340429db6
commit 1c9e2db3b9
No known key found for this signature in database
8 changed files with 662 additions and 802 deletions

View File

@ -30,8 +30,9 @@ Unit tests can say code passed. Kova answers the release question:
and plugin load failures.
- **Failure containment**: provider timeouts, malformed responses, streaming
stalls, recovery, gateway health after failure, and leaked child processes.
- **Human and agent reports**: concise Markdown for people, structured JSON for
agents/CI, plus artifact bundles for handoff.
- **Human and agent reports**: verdict-first Markdown for people, compact
`*.summary.json` for agents/CI, full JSON evidence for audits, plus artifact
bundles for handoff.
Kova uses OCM to create isolated OpenClaw labs. Kova is not testing OCM. OCM is
the harness; OpenClaw is the product under test.

View File

@ -118,9 +118,11 @@ or baseline comparison is not blocking. Do not save baselines from
`--profile-on-failure` runs; those are instrumented diagnostic runs and their
resource numbers can include profiler overhead.
5. Read the generated JSON report first. Use the Markdown report for the human
summary. For failures, start with `failureBrief` in `report summarize --json`
or the `Failure Brief` section from `report paste`.
5. Read the generated `*.summary.json` first. Use the Markdown report for the
human decision summary and the full JSON report only when raw phase, command,
or collector evidence is needed. For failures, start with `decision`,
`findings`, and `failureBrief` in `report summarize --json` or the `Failure
Brief` section from `report paste`.
6. Produce a compact handoff when needed:

View File

@ -18,7 +18,8 @@ kova.report.v1
"runId": "kova-2026-04-29T000000Z",
"outputPaths": {
"markdown": "/path/to/report.md",
"json": "/path/to/report.json"
"json": "/path/to/report.json",
"summary": "/path/to/report.summary.json"
},
"mode": "dry-run",
"profile": null,
@ -82,8 +83,9 @@ lists every file staged into the bundle with relative path, byte size, and
SHA-256 digest so agents can inspect evidence coverage without scraping raw log
output or unpacking blindly.
`outputPaths` records the Markdown and JSON paths for the report itself. The
matrix receipt also includes bundle and checksum paths after bundling.
`outputPaths` records the Markdown, full JSON, and compact summary JSON paths
for the report itself. The matrix receipt also includes bundle and checksum
paths after bundling.
`gate` is normally `null`. When `kova matrix run --gate` is used, it contains
the release gate verdict, blocking/warning counts, required scenario policy, and
@ -566,11 +568,39 @@ scan only CLI help and manifests.
## Summary Output
`kova report summarize <report.json> --json` returns a compact agent-facing
view of each scenario with status, cleanup, failed command, concise failure
reason, violations, and a small measurement summary. Agents should use this
before reading the full report when they only need pass/fail and high-signal
performance evidence.
Each run also writes `<run>.summary.json`. `kova report summarize
<report.json> --json` prints the same compact agent-facing contract:
```json
{
"schemaVersion": "kova.report.summary.v1",
"decision": {
"verdict": "FAIL",
"reason": "gateway peak RSS 701.8 MB exceeded threshold 700 MB",
"blockingFindingCount": 1,
"warningFindingCount": 0
},
"run": {
"repeat": 3,
"parallel": 1,
"auth": {}
},
"coverage": {
"recordCount": 3,
"scenarioCount": 1,
"stateCount": 1
},
"findings": [],
"groups": [],
"samples": [],
"artifacts": []
}
```
Agents should use the summary before reading the full report when they only
need pass/fail, findings, aggregate performance, sample-level evidence, and
artifact paths. The full `kova.report.v1` JSON remains the audit trail with raw
records, phases, commands, and collector evidence.
When a report contains failures, the structured summary also includes
`failureBrief` with:

View File

@ -30,7 +30,7 @@ import { reportsDir } from "../paths.mjs";
import { loadRegistryContext } from "../registries/context.mjs";
import { loadProfile } from "../registries/profiles.mjs";
import { validateScenarioRun } from "../registries/scenarios.mjs";
import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
import { buildReportSummary, renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
import { bundleReport, retainGateArtifacts } from "../reporting/artifacts.mjs";
import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "../runner.mjs";
import { resolveTarget } from "../targets.mjs";
@ -68,6 +68,7 @@ export async function runMatrixRun(flags) {
const runId = createRunId();
const reportPath = join(reportRoot, `${runId}-${profile.id}.md`);
const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`);
const summaryPath = join(reportRoot, `${runId}-${profile.id}.summary.json`);
const targetSetup = { completed: false };
const runEntry = async (entry) => {
const context = {
@ -129,7 +130,8 @@ export async function runMatrixRun(flags) {
runId,
outputPaths: {
markdown: reportPath,
json: jsonPath
json: jsonPath,
summary: summaryPath
},
mode: flags.execute === true ? "execution" : "dry-run",
profile: profileSummary(profile),
@ -181,6 +183,7 @@ export async function runMatrixRun(flags) {
}
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
const bundle = await bundleReport(jsonPath, { outputDir: reportRoot });
const retainedGateArtifacts = gate && gate.verdict !== "SHIP"
? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle)
@ -195,6 +198,7 @@ export async function runMatrixRun(flags) {
profile: profileSummary(profile),
reportPath,
jsonPath,
summaryPath,
bundlePath: bundle.outputPath,
checksumPath: bundle.checksumPath,
retainedGateArtifacts,
@ -228,12 +232,15 @@ async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) {
report.retainedGateArtifacts = {
status: "pending"
};
const summaryPath = report.outputPaths?.summary ?? jsonPath.replace(/\.json$/, ".summary.json");
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
const retained = await retainGateArtifacts(jsonPath, bundle);
report.retainedGateArtifacts = retained;
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir });
return retained;
}

View File

@ -3,7 +3,7 @@ import { relative } from "node:path";
import { required, resolveFromCwd } from "../cli.mjs";
import { bundleReport } from "../reporting/artifacts.mjs";
import { compareReports, renderCompareFixerSummary, renderCompareSummary } from "../reporting/compare.mjs";
import { renderPasteSummary, renderReportSummary } from "../reporting/report.mjs";
import { buildReportSummary, renderPasteSummary, renderReportSummary } from "../reporting/report.mjs";
export async function runReportCommand(flags) {
const [subcommand, firstPath, secondPath] = flags._;
@ -11,11 +11,7 @@ export async function runReportCommand(flags) {
if (subcommand === "summarize") {
const report = await readReport(required(firstPath, "report path"));
if (flags.json) {
console.log(JSON.stringify({
schemaVersion: "kova.report.summary.v1",
generatedAt: new Date().toISOString(),
summary: renderReportSummary(report, { structured: true })
}, null, 2));
console.log(JSON.stringify(buildReportSummary(report), null, 2));
return;
}

View File

@ -24,7 +24,7 @@ import { reportsDir } from "../paths.mjs";
import { loadRegistryContext } from "../registries/context.mjs";
import { loadScenarios, validateScenarioRun } from "../registries/scenarios.mjs";
import { loadState } from "../registries/states.mjs";
import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
import { buildReportSummary, renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
import { buildDryRunRecord, createRunId, executeScenario } from "../runner.mjs";
import { resolveTarget } from "../targets.mjs";
@ -50,6 +50,7 @@ export async function runScenarioCommand(flags) {
const runId = createRunId();
const reportPath = join(reportRoot, `${runId}.md`);
const jsonPath = join(reportRoot, `${runId}.json`);
const summaryPath = join(reportRoot, `${runId}.summary.json`);
const repeat = positiveIntegerFlag(flags, "repeat", 1);
const auth = await resolveRunAuthContext(flags);
const regressionThresholds = await loadRegressionThresholds(flags);
@ -111,6 +112,11 @@ export async function runScenarioCommand(flags) {
schemaVersion: reportSchemaVersion,
generatedAt: new Date().toISOString(),
runId,
outputPaths: {
markdown: reportPath,
json: jsonPath,
summary: summaryPath
},
mode: context.execute ? "execution" : "dry-run",
target,
from: flags.from ?? null,
@ -152,6 +158,7 @@ export async function runScenarioCommand(flags) {
}
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
const mode = context.execute ? "execution" : "dry-run";
if (flags.json) {
@ -162,6 +169,7 @@ export async function runScenarioCommand(flags) {
runId,
reportPath,
jsonPath,
summaryPath,
performance: summarizePerformanceReceipt(report.performance, report.baseline),
summary: report.summary
}, null, 2));

File diff suppressed because it is too large Load Diff

View File

@ -1499,7 +1499,7 @@ exit 2
assertEqual(report.summary?.statuses?.BLOCKED, 1, "failed local-build scenario status");
assertEqual(report.records?.[0]?.cleanup, "already-absent", "already absent env cleanup status");
assertEqual(report.targetCleanup?.status, "already-absent", "already absent local-build target cleanup status");
assertEqual(summary.summary?.scenarios?.[0]?.failureReason, "dependency install failed", "summary failure reason");
assertEqual(summary.scenarios?.[0]?.failureReason, "dependency install failed", "summary failure reason");
if (!/runtime remove kova-local-\d+ --json/.test(log)) {
throw new Error(`runtime remove was not called after failed build; log:\n${log}`);
}
@ -4341,12 +4341,12 @@ function diagnosticsTimelineEvaluationCheck() {
records: [openSpanRecord]
}, { structured: true });
assertEqual(
reportSummary.scenarios[0]?.measurements?.openclawOpenRequiredSpanCount,
reportSummary.scenarios[0]?.measurements?.diagnostics?.openRequiredSpanCount,
1,
"structured report open span evidence"
);
assertEqual(
reportSummary.scenarios[0]?.measurements?.openclawOpenSpans?.[0]?.name,
reportSummary.scenarios[0]?.measurements?.diagnostics?.openSpans?.[0]?.name,
"runtimeDeps.stage",
"structured report open span name"
);
@ -5576,10 +5576,10 @@ function markdownFailureCardsCheck() {
violations: [{ message: "gateway readiness exceeded threshold" }]
}]
});
assertEqual(rendered.includes("## Failure Cards"), true, "markdown failure cards section");
assertEqual(rendered.includes("FAIL gateway-performance: gateway readiness exceeded threshold"), true, "failure card summary");
assertEqual(rendered.includes("likely owner: gateway-runtime"), true, "failure card owner");
assertEqual(rendered.includes("evidence: readinessHealthReadyMs: 45000"), true, "failure card evidence");
assertEqual(rendered.includes("## Findings"), true, "markdown findings section");
assertEqual(rendered.includes("gateway-performance"), true, "finding scenario");
assertEqual(rendered.includes("gateway readiness exceeded threshold"), true, "finding summary");
assertEqual(rendered.includes("gateway-runtime"), true, "finding owner");
assertEqual(rendered.includes("## Resource Roles"), true, "markdown resource roles section");
assertEqual(rendered.includes("gateway: RSS 1100 MB; CPU 220%"), true, "markdown resource role summary");
return {