refactor: add compact report summary
This commit is contained in:
parent
2340429db6
commit
1c9e2db3b9
@ -30,8 +30,9 @@ Unit tests can say code passed. Kova answers the release question:
|
||||
and plugin load failures.
|
||||
- **Failure containment**: provider timeouts, malformed responses, streaming
|
||||
stalls, recovery, gateway health after failure, and leaked child processes.
|
||||
- **Human and agent reports**: concise Markdown for people, structured JSON for
|
||||
agents/CI, plus artifact bundles for handoff.
|
||||
- **Human and agent reports**: verdict-first Markdown for people, compact
|
||||
`*.summary.json` for agents/CI, full JSON evidence for audits, plus artifact
|
||||
bundles for handoff.
|
||||
|
||||
Kova uses OCM to create isolated OpenClaw labs. Kova is not testing OCM. OCM is
|
||||
the harness; OpenClaw is the product under test.
|
||||
|
||||
@ -118,9 +118,11 @@ or baseline comparison is not blocking. Do not save baselines from
|
||||
`--profile-on-failure` runs; those are instrumented diagnostic runs and their
|
||||
resource numbers can include profiler overhead.
|
||||
|
||||
5. Read the generated JSON report first. Use the Markdown report for the human
|
||||
summary. For failures, start with `failureBrief` in `report summarize --json`
|
||||
or the `Failure Brief` section from `report paste`.
|
||||
5. Read the generated `*.summary.json` first. Use the Markdown report for the
|
||||
human decision summary and the full JSON report only when raw phase, command,
|
||||
or collector evidence is needed. For failures, start with `decision`,
|
||||
`findings`, and `failureBrief` in `report summarize --json` or the `Failure
|
||||
Brief` section from `report paste`.
|
||||
|
||||
6. Produce a compact handoff when needed:
|
||||
|
||||
|
||||
@ -18,7 +18,8 @@ kova.report.v1
|
||||
"runId": "kova-2026-04-29T000000Z",
|
||||
"outputPaths": {
|
||||
"markdown": "/path/to/report.md",
|
||||
"json": "/path/to/report.json"
|
||||
"json": "/path/to/report.json",
|
||||
"summary": "/path/to/report.summary.json"
|
||||
},
|
||||
"mode": "dry-run",
|
||||
"profile": null,
|
||||
@ -82,8 +83,9 @@ lists every file staged into the bundle with relative path, byte size, and
|
||||
SHA-256 digest so agents can inspect evidence coverage without scraping raw log
|
||||
output or unpacking blindly.
|
||||
|
||||
`outputPaths` records the Markdown and JSON paths for the report itself. The
|
||||
matrix receipt also includes bundle and checksum paths after bundling.
|
||||
`outputPaths` records the Markdown, full JSON, and compact summary JSON paths
|
||||
for the report itself. The matrix receipt also includes bundle and checksum
|
||||
paths after bundling.
|
||||
|
||||
`gate` is normally `null`. When `kova matrix run --gate` is used, it contains
|
||||
the release gate verdict, blocking/warning counts, required scenario policy, and
|
||||
@ -566,11 +568,39 @@ scan only CLI help and manifests.
|
||||
|
||||
## Summary Output
|
||||
|
||||
`kova report summarize <report.json> --json` returns a compact agent-facing
|
||||
view of each scenario with status, cleanup, failed command, concise failure
|
||||
reason, violations, and a small measurement summary. Agents should use this
|
||||
before reading the full report when they only need pass/fail and high-signal
|
||||
performance evidence.
|
||||
Each run also writes `<run>.summary.json`. `kova report summarize
|
||||
<report.json> --json` prints the same compact agent-facing contract:
|
||||
|
||||
```json
|
||||
{
|
||||
"schemaVersion": "kova.report.summary.v1",
|
||||
"decision": {
|
||||
"verdict": "FAIL",
|
||||
"reason": "gateway peak RSS 701.8 MB exceeded threshold 700 MB",
|
||||
"blockingFindingCount": 1,
|
||||
"warningFindingCount": 0
|
||||
},
|
||||
"run": {
|
||||
"repeat": 3,
|
||||
"parallel": 1,
|
||||
"auth": {}
|
||||
},
|
||||
"coverage": {
|
||||
"recordCount": 3,
|
||||
"scenarioCount": 1,
|
||||
"stateCount": 1
|
||||
},
|
||||
"findings": [],
|
||||
"groups": [],
|
||||
"samples": [],
|
||||
"artifacts": []
|
||||
}
|
||||
```
|
||||
|
||||
Agents should use the summary before reading the full report when they only
|
||||
need pass/fail, findings, aggregate performance, sample-level evidence, and
|
||||
artifact paths. The full `kova.report.v1` JSON remains the audit trail with raw
|
||||
records, phases, commands, and collector evidence.
|
||||
|
||||
When a report contains failures, the structured summary also includes
|
||||
`failureBrief` with:
|
||||
|
||||
@ -30,7 +30,7 @@ import { reportsDir } from "../paths.mjs";
|
||||
import { loadRegistryContext } from "../registries/context.mjs";
|
||||
import { loadProfile } from "../registries/profiles.mjs";
|
||||
import { validateScenarioRun } from "../registries/scenarios.mjs";
|
||||
import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
|
||||
import { buildReportSummary, renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
|
||||
import { bundleReport, retainGateArtifacts } from "../reporting/artifacts.mjs";
|
||||
import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "../runner.mjs";
|
||||
import { resolveTarget } from "../targets.mjs";
|
||||
@ -68,6 +68,7 @@ export async function runMatrixRun(flags) {
|
||||
const runId = createRunId();
|
||||
const reportPath = join(reportRoot, `${runId}-${profile.id}.md`);
|
||||
const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`);
|
||||
const summaryPath = join(reportRoot, `${runId}-${profile.id}.summary.json`);
|
||||
const targetSetup = { completed: false };
|
||||
const runEntry = async (entry) => {
|
||||
const context = {
|
||||
@ -129,7 +130,8 @@ export async function runMatrixRun(flags) {
|
||||
runId,
|
||||
outputPaths: {
|
||||
markdown: reportPath,
|
||||
json: jsonPath
|
||||
json: jsonPath,
|
||||
summary: summaryPath
|
||||
},
|
||||
mode: flags.execute === true ? "execution" : "dry-run",
|
||||
profile: profileSummary(profile),
|
||||
@ -181,6 +183,7 @@ export async function runMatrixRun(flags) {
|
||||
}
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
|
||||
const bundle = await bundleReport(jsonPath, { outputDir: reportRoot });
|
||||
const retainedGateArtifacts = gate && gate.verdict !== "SHIP"
|
||||
? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle)
|
||||
@ -195,6 +198,7 @@ export async function runMatrixRun(flags) {
|
||||
profile: profileSummary(profile),
|
||||
reportPath,
|
||||
jsonPath,
|
||||
summaryPath,
|
||||
bundlePath: bundle.outputPath,
|
||||
checksumPath: bundle.checksumPath,
|
||||
retainedGateArtifacts,
|
||||
@ -228,12 +232,15 @@ async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) {
|
||||
report.retainedGateArtifacts = {
|
||||
status: "pending"
|
||||
};
|
||||
const summaryPath = report.outputPaths?.summary ?? jsonPath.replace(/\.json$/, ".summary.json");
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
|
||||
const retained = await retainGateArtifacts(jsonPath, bundle);
|
||||
report.retainedGateArtifacts = retained;
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
|
||||
await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir });
|
||||
return retained;
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@ import { relative } from "node:path";
|
||||
import { required, resolveFromCwd } from "../cli.mjs";
|
||||
import { bundleReport } from "../reporting/artifacts.mjs";
|
||||
import { compareReports, renderCompareFixerSummary, renderCompareSummary } from "../reporting/compare.mjs";
|
||||
import { renderPasteSummary, renderReportSummary } from "../reporting/report.mjs";
|
||||
import { buildReportSummary, renderPasteSummary, renderReportSummary } from "../reporting/report.mjs";
|
||||
|
||||
export async function runReportCommand(flags) {
|
||||
const [subcommand, firstPath, secondPath] = flags._;
|
||||
@ -11,11 +11,7 @@ export async function runReportCommand(flags) {
|
||||
if (subcommand === "summarize") {
|
||||
const report = await readReport(required(firstPath, "report path"));
|
||||
if (flags.json) {
|
||||
console.log(JSON.stringify({
|
||||
schemaVersion: "kova.report.summary.v1",
|
||||
generatedAt: new Date().toISOString(),
|
||||
summary: renderReportSummary(report, { structured: true })
|
||||
}, null, 2));
|
||||
console.log(JSON.stringify(buildReportSummary(report), null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@ import { reportsDir } from "../paths.mjs";
|
||||
import { loadRegistryContext } from "../registries/context.mjs";
|
||||
import { loadScenarios, validateScenarioRun } from "../registries/scenarios.mjs";
|
||||
import { loadState } from "../registries/states.mjs";
|
||||
import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
|
||||
import { buildReportSummary, renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
|
||||
import { buildDryRunRecord, createRunId, executeScenario } from "../runner.mjs";
|
||||
import { resolveTarget } from "../targets.mjs";
|
||||
|
||||
@ -50,6 +50,7 @@ export async function runScenarioCommand(flags) {
|
||||
const runId = createRunId();
|
||||
const reportPath = join(reportRoot, `${runId}.md`);
|
||||
const jsonPath = join(reportRoot, `${runId}.json`);
|
||||
const summaryPath = join(reportRoot, `${runId}.summary.json`);
|
||||
const repeat = positiveIntegerFlag(flags, "repeat", 1);
|
||||
const auth = await resolveRunAuthContext(flags);
|
||||
const regressionThresholds = await loadRegressionThresholds(flags);
|
||||
@ -111,6 +112,11 @@ export async function runScenarioCommand(flags) {
|
||||
schemaVersion: reportSchemaVersion,
|
||||
generatedAt: new Date().toISOString(),
|
||||
runId,
|
||||
outputPaths: {
|
||||
markdown: reportPath,
|
||||
json: jsonPath,
|
||||
summary: summaryPath
|
||||
},
|
||||
mode: context.execute ? "execution" : "dry-run",
|
||||
target,
|
||||
from: flags.from ?? null,
|
||||
@ -152,6 +158,7 @@ export async function runScenarioCommand(flags) {
|
||||
}
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
await writeFile(summaryPath, `${JSON.stringify(buildReportSummary(report), null, 2)}\n`, "utf8");
|
||||
|
||||
const mode = context.execute ? "execution" : "dry-run";
|
||||
if (flags.json) {
|
||||
@ -162,6 +169,7 @@ export async function runScenarioCommand(flags) {
|
||||
runId,
|
||||
reportPath,
|
||||
jsonPath,
|
||||
summaryPath,
|
||||
performance: summarizePerformanceReceipt(report.performance, report.baseline),
|
||||
summary: report.summary
|
||||
}, null, 2));
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1499,7 +1499,7 @@ exit 2
|
||||
assertEqual(report.summary?.statuses?.BLOCKED, 1, "failed local-build scenario status");
|
||||
assertEqual(report.records?.[0]?.cleanup, "already-absent", "already absent env cleanup status");
|
||||
assertEqual(report.targetCleanup?.status, "already-absent", "already absent local-build target cleanup status");
|
||||
assertEqual(summary.summary?.scenarios?.[0]?.failureReason, "dependency install failed", "summary failure reason");
|
||||
assertEqual(summary.scenarios?.[0]?.failureReason, "dependency install failed", "summary failure reason");
|
||||
if (!/runtime remove kova-local-\d+ --json/.test(log)) {
|
||||
throw new Error(`runtime remove was not called after failed build; log:\n${log}`);
|
||||
}
|
||||
@ -4341,12 +4341,12 @@ function diagnosticsTimelineEvaluationCheck() {
|
||||
records: [openSpanRecord]
|
||||
}, { structured: true });
|
||||
assertEqual(
|
||||
reportSummary.scenarios[0]?.measurements?.openclawOpenRequiredSpanCount,
|
||||
reportSummary.scenarios[0]?.measurements?.diagnostics?.openRequiredSpanCount,
|
||||
1,
|
||||
"structured report open span evidence"
|
||||
);
|
||||
assertEqual(
|
||||
reportSummary.scenarios[0]?.measurements?.openclawOpenSpans?.[0]?.name,
|
||||
reportSummary.scenarios[0]?.measurements?.diagnostics?.openSpans?.[0]?.name,
|
||||
"runtimeDeps.stage",
|
||||
"structured report open span name"
|
||||
);
|
||||
@ -5576,10 +5576,10 @@ function markdownFailureCardsCheck() {
|
||||
violations: [{ message: "gateway readiness exceeded threshold" }]
|
||||
}]
|
||||
});
|
||||
assertEqual(rendered.includes("## Failure Cards"), true, "markdown failure cards section");
|
||||
assertEqual(rendered.includes("FAIL gateway-performance: gateway readiness exceeded threshold"), true, "failure card summary");
|
||||
assertEqual(rendered.includes("likely owner: gateway-runtime"), true, "failure card owner");
|
||||
assertEqual(rendered.includes("evidence: readinessHealthReadyMs: 45000"), true, "failure card evidence");
|
||||
assertEqual(rendered.includes("## Findings"), true, "markdown findings section");
|
||||
assertEqual(rendered.includes("gateway-performance"), true, "finding scenario");
|
||||
assertEqual(rendered.includes("gateway readiness exceeded threshold"), true, "finding summary");
|
||||
assertEqual(rendered.includes("gateway-runtime"), true, "finding owner");
|
||||
assertEqual(rendered.includes("## Resource Roles"), true, "markdown resource roles section");
|
||||
assertEqual(rendered.includes("gateway: RSS 1100 MB; CPU 220%"), true, "markdown resource role summary");
|
||||
return {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user