refactor: add compact report summary

2026-05-07 15:19:07 +01:00 · 2026-05-07 15:19:07 +01:00 · 1c9e2db3b9
commit 1c9e2db3b9
parent 2340429db6
8 changed files with 662 additions and 802 deletions
--- a/README.md
+++ b/README.md
@ -30,8 +30,9 @@ Unit tests can say code passed. Kova answers the release question:
  and plugin load failures.
 - **Failure containment**: provider timeouts, malformed responses, streaming
  stalls, recovery, gateway health after failure, and leaked child processes.
- **Human and agent reports**: concise Markdown for people, structured JSON for
-  agents/CI, plus artifact bundles for handoff.
+- **Human and agent reports**: verdict-first Markdown for people, compact
+  `*.summary.json` for agents/CI, full JSON evidence for audits, plus artifact
+  bundles for handoff.

 Kova uses OCM to create isolated OpenClaw labs. Kova is not testing OCM. OCM is
 the harness; OpenClaw is the product under test.
--- a/docs/AGENT_USAGE.md
+++ b/docs/AGENT_USAGE.md
@ -118,9 +118,11 @@ or baseline comparison is not blocking. Do not save baselines from
 `--profile-on-failure` runs; those are instrumented diagnostic runs and their
 resource numbers can include profiler overhead.

-5. Read the generated JSON report first. Use the Markdown report for the human
-summary. For failures, start with `failureBrief` in `report summarize --json`
-or the `Failure Brief` section from `report paste`.
+5. Read the generated `*.summary.json` first. Use the Markdown report for the
+human decision summary and the full JSON report only when raw phase, command,
+or collector evidence is needed. For failures, start with `decision`,
+`findings`, and `failureBrief` in `report summarize --json` or the `Failure
+Brief` section from `report paste`.

 6. Produce a compact handoff when needed:

--- a/docs/REPORT_SCHEMA.md
+++ b/docs/REPORT_SCHEMA.md
@ -18,7 +18,8 @@ kova.report.v1
  "runId": "kova-2026-04-29T000000Z",
  "outputPaths": {
    "markdown": "/path/to/report.md",
-    "json": "/path/to/report.json"
+    "json": "/path/to/report.json",
+    "summary": "/path/to/report.summary.json"
  },
  "mode": "dry-run",
  "profile": null,
@ -82,8 +83,9 @@ lists every file staged into the bundle with relative path, byte size, and
 SHA-256 digest so agents can inspect evidence coverage without scraping raw log
 output or unpacking blindly.

-`outputPaths` records the Markdown and JSON paths for the report itself. The
-matrix receipt also includes bundle and checksum paths after bundling.
+`outputPaths` records the Markdown, full JSON, and compact summary JSON paths
+for the report itself. The matrix receipt also includes bundle and checksum
+paths after bundling.

 `gate` is normally `null`. When `kova matrix run --gate` is used, it contains
 the release gate verdict, blocking/warning counts, required scenario policy, and
@ -566,11 +568,39 @@ scan only CLI help and manifests.

 ## Summary Output

-`kova report summarize <report.json> --json` returns a compact agent-facing
-view of each scenario with status, cleanup, failed command, concise failure
-reason, violations, and a small measurement summary. Agents should use this
-before reading the full report when they only need pass/fail and high-signal
-performance evidence.
+Each run also writes `<run>.summary.json`. `kova report summarize
+<report.json> --json` prints the same compact agent-facing contract:
+
+```json
+{
+  "schemaVersion": "kova.report.summary.v1",
+  "decision": {
+    "verdict": "FAIL",
+    "reason": "gateway peak RSS 701.8 MB exceeded threshold 700 MB",
+    "blockingFindingCount": 1,
+    "warningFindingCount": 0
+  },
+  "run": {
+    "repeat": 3,
+    "parallel": 1,
+    "auth": {}
+  },
+  "coverage": {
+    "recordCount": 3,
+    "scenarioCount": 1,
+    "stateCount": 1
+  },
+  "findings": [],
+  "groups": [],
+  "samples": [],
+  "artifacts": []
+}
+```
+
+Agents should use the summary before reading the full report when they only
+need pass/fail, findings, aggregate performance, sample-level evidence, and
+artifact paths. The full `kova.report.v1` JSON remains the audit trail with raw
+records, phases, commands, and collector evidence.

 When a report contains failures, the structured summary also includes
 `failureBrief` with:
--- a/src/commands/matrix-run.mjs
+++ b/src/commands/matrix-run.mjs
@ -30,7 +30,7 @@ import { reportsDir } from "../paths.mjs";
 import { loadRegistryContext } from "../registries/context.mjs";
 import { loadProfile } from "../registries/profiles.mjs";
 import { validateScenarioRun } from "../registries/scenarios.mjs";
-import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
+import { buildReportSummary, renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
 import { bundleReport, retainGateArtifacts } from "../reporting/artifacts.mjs";
 import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "../runner.mjs";
 import { resolveTarget } from "../targets.mjs";
@ -68,6 +68,7 @@ export async function runMatrixRun(flags) {
  const runId = createRunId();
  const reportPath = join(reportRoot, `${runId}-${profile.id}.md`);
  const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`);
+  const summaryPath = join(reportRoot, `${runId}-${profile.id}.summary.json`);
  const targetSetup = { completed: false };
  const runEntry = async (entry) => {
    const context = {
@ -129,7 +130,8 @@ export async function runMatrixRun(flags) {
    runId,
    outputPaths: {
      markdown: reportPath,
-      json: jsonPath
+      json: jsonPath,
+      summary: summaryPath
    },
    mode: flags.execute === true ? "execution" : "dry-run",
    profile: profileSummary(profile),
@ -181,6 +183,7 @@ export async function runMatrixRun(flags) {
  }
  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+  await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
  const bundle = await bundleReport(jsonPath, { outputDir: reportRoot });
  const retainedGateArtifacts = gate && gate.verdict !== "SHIP"
    ? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle)
@ -195,6 +198,7 @@ export async function runMatrixRun(flags) {
      profile: profileSummary(profile),
      reportPath,
      jsonPath,
+      summaryPath,
      bundlePath: bundle.outputPath,
      checksumPath: bundle.checksumPath,
      retainedGateArtifacts,
@ -228,12 +232,15 @@ async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) {
  report.retainedGateArtifacts = {
    status: "pending"
  };
+  const summaryPath = report.outputPaths?.summary ?? jsonPath.replace(/\.json$/, ".summary.json");
  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+  await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
  const retained = await retainGateArtifacts(jsonPath, bundle);
  report.retainedGateArtifacts = retained;
  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+  await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
  await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir });
  return retained;
 }
--- a/src/commands/report.mjs
+++ b/src/commands/report.mjs
@ -3,7 +3,7 @@ import { relative } from "node:path";
 import { required, resolveFromCwd } from "../cli.mjs";
 import { bundleReport } from "../reporting/artifacts.mjs";
 import { compareReports, renderCompareFixerSummary, renderCompareSummary } from "../reporting/compare.mjs";
-import { renderPasteSummary, renderReportSummary } from "../reporting/report.mjs";
+import { buildReportSummary, renderPasteSummary, renderReportSummary } from "../reporting/report.mjs";

 export async function runReportCommand(flags) {
  const [subcommand, firstPath, secondPath] = flags._;
@ -11,11 +11,7 @@ export async function runReportCommand(flags) {
  if (subcommand === "summarize") {
    const report = await readReport(required(firstPath, "report path"));
    if (flags.json) {
-      console.log(JSON.stringify({
-        schemaVersion: "kova.report.summary.v1",
-        generatedAt: new Date().toISOString(),
-        summary: renderReportSummary(report, { structured: true })
-      }, null, 2));
+      console.log(JSON.stringify(buildReportSummary(report), null, 2));
      return;
    }

--- a/src/commands/run.mjs
+++ b/src/commands/run.mjs
@ -24,7 +24,7 @@ import { reportsDir } from "../paths.mjs";
 import { loadRegistryContext } from "../registries/context.mjs";
 import { loadScenarios, validateScenarioRun } from "../registries/scenarios.mjs";
 import { loadState } from "../registries/states.mjs";
-import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
+import { buildReportSummary, renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
 import { buildDryRunRecord, createRunId, executeScenario } from "../runner.mjs";
 import { resolveTarget } from "../targets.mjs";

@ -50,6 +50,7 @@ export async function runScenarioCommand(flags) {
  const runId = createRunId();
  const reportPath = join(reportRoot, `${runId}.md`);
  const jsonPath = join(reportRoot, `${runId}.json`);
+  const summaryPath = join(reportRoot, `${runId}.summary.json`);
  const repeat = positiveIntegerFlag(flags, "repeat", 1);
  const auth = await resolveRunAuthContext(flags);
  const regressionThresholds = await loadRegressionThresholds(flags);
@ -111,6 +112,11 @@ export async function runScenarioCommand(flags) {
    schemaVersion: reportSchemaVersion,
    generatedAt: new Date().toISOString(),
    runId,
+    outputPaths: {
+      markdown: reportPath,
+      json: jsonPath,
+      summary: summaryPath
+    },
    mode: context.execute ? "execution" : "dry-run",
    target,
    from: flags.from ?? null,
@ -152,6 +158,7 @@ export async function runScenarioCommand(flags) {
  }
  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+  await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");

  const mode = context.execute ? "execution" : "dry-run";
  if (flags.json) {
@ -162,6 +169,7 @@ export async function runScenarioCommand(flags) {
      runId,
      reportPath,
      jsonPath,
+      summaryPath,
      performance: summarizePerformanceReceipt(report.performance, report.baseline),
      summary: report.summary
    }, null, 2));
--- a/src/reporting/report.mjs
+++ b/src/reporting/report.mjs
--- a/src/selfcheck.mjs
+++ b/src/selfcheck.mjs
@ -1499,7 +1499,7 @@ exit 2
    assertEqual(report.summary?.statuses?.BLOCKED, 1, "failed local-build scenario status");
    assertEqual(report.records?.[0]?.cleanup, "already-absent", "already absent env cleanup status");
    assertEqual(report.targetCleanup?.status, "already-absent", "already absent local-build target cleanup status");
-    assertEqual(summary.summary?.scenarios?.[0]?.failureReason, "dependency install failed", "summary failure reason");
+    assertEqual(summary.scenarios?.[0]?.failureReason, "dependency install failed", "summary failure reason");
    if (!/runtime remove kova-local-\d+ --json/.test(log)) {
      throw new Error(`runtime remove was not called after failed build; log:\n${log}`);
    }
@ -4341,12 +4341,12 @@ function diagnosticsTimelineEvaluationCheck() {
      records: [openSpanRecord]
    }, { structured: true });
    assertEqual(
-      reportSummary.scenarios[0]?.measurements?.openclawOpenRequiredSpanCount,
+      reportSummary.scenarios[0]?.measurements?.diagnostics?.openRequiredSpanCount,
      1,
      "structured report open span evidence"
    );
    assertEqual(
-      reportSummary.scenarios[0]?.measurements?.openclawOpenSpans?.[0]?.name,
+      reportSummary.scenarios[0]?.measurements?.diagnostics?.openSpans?.[0]?.name,
      "runtimeDeps.stage",
      "structured report open span name"
    );
@ -5576,10 +5576,10 @@ function markdownFailureCardsCheck() {
        violations: [{ message: "gateway readiness exceeded threshold" }]
      }]
    });
-    assertEqual(rendered.includes("## Failure Cards"), true, "markdown failure cards section");
-    assertEqual(rendered.includes("FAIL gateway-performance: gateway readiness exceeded threshold"), true, "failure card summary");
-    assertEqual(rendered.includes("likely owner: gateway-runtime"), true, "failure card owner");
-    assertEqual(rendered.includes("evidence: readinessHealthReadyMs: 45000"), true, "failure card evidence");
+    assertEqual(rendered.includes("## Findings"), true, "markdown findings section");
+    assertEqual(rendered.includes("gateway-performance"), true, "finding scenario");
+    assertEqual(rendered.includes("gateway readiness exceeded threshold"), true, "finding summary");
+    assertEqual(rendered.includes("gateway-runtime"), true, "finding owner");
    assertEqual(rendered.includes("## Resource Roles"), true, "markdown resource roles section");
    assertEqual(rendered.includes("gateway: RSS 1100 MB; CPU 220%"), true, "markdown resource role summary");
    return {