diff --git a/src/commands/matrix-run.mjs b/src/commands/matrix-run.mjs new file mode 100644 index 0000000..956ee3a --- /dev/null +++ b/src/commands/matrix-run.mjs @@ -0,0 +1,316 @@ +import { mkdir, writeFile } from "node:fs/promises"; +import { join, relative } from "node:path"; +import { authReportSummary, resolveRunAuthContext } from "../auth.mjs"; +import { required, resolveFromCwd } from "../cli.mjs"; +import { + cleanupTargetRuntimeIfNeeded, + loadRegressionThresholds, + positiveIntegerFlag, + positiveIntegerValue, + profileIntegerFlag, + summarizePerformanceReceipt, + validateBaselineExecutionFlags +} from "./run-support.mjs"; +import { applyMatrixControls, expandProfile } from "../matrix/expand.mjs"; +import { evaluateGate, preflightGateRun } from "../matrix/gate.mjs"; +import { matrixControlSummary } from "../matrix/controls.mjs"; +import { profileSummary, validateProfileTarget } from "../matrix/profile.mjs"; +import { assertResolvedCoverageIsRunnable, resolveCoverageObligations } from "../matrix/resolver.mjs"; +import { + comparePerformanceToBaseline, + loadBaselineStore, + resolveBaselinePath, + reviewBaselineUpdate, + saveBaselineStore, + updateBaselineStore +} from "../performance/baselines.mjs"; +import { buildPerformanceSummary } from "../performance/stats.mjs"; +import { platformInfo } from "../platform.mjs"; +import { reportsDir } from "../paths.mjs"; +import { loadRegistryContext } from "../registries/context.mjs"; +import { loadProfile } from "../registries/profiles.mjs"; +import { validateScenarioRun } from "../registries/scenarios.mjs"; +import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs"; +import { bundleReport, retainGateArtifacts } from "../reporting/artifacts.mjs"; +import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "../runner.mjs"; +import { resolveTarget } from "../targets.mjs"; + +const reportSchemaVersion = "kova.report.v1"; + +export async function runMatrixRun(flags) { + const registry = await loadRegistryContext(); + const profile = await loadProfile(required(flags.profile, "--profile")); + validateProfileExecutionFlags(profile, flags); + const target = required(flags.target, "--target"); + validateBaselineExecutionFlags(flags); + const targetPlan = resolveTarget(target, "target"); + validateProfileTarget(profile, targetPlan); + const fromPlan = flags.from ? resolveTarget(flags.from, "from") : null; + const entries = applyMatrixControls(await expandProfile(profile), flags, platformInfo()); + const resolvedCoverage = resolveCoverageObligations({ + profile, + entries, + surfaces: registry.surfaces, + targetPlan + }); + assertResolvedCoverageIsRunnable(resolvedCoverage); + const controls = matrixControlSummary(flags, targetPlan); + const auth = await resolveRunAuthContext(flags); + const regressionThresholds = await loadRegressionThresholds(flags); + const baselinePath = resolveBaselinePath(flags.baseline); + const saveBaselinePath = resolveBaselinePath(flags.save_baseline); + const baselineStore = baselinePath ? await loadBaselineStore(baselinePath) : null; + preflightGateRun({ entries, flags }); + for (const entry of entries.filter((item) => !item.skipReason)) { + validateScenarioRun(entry.scenario, flags, { targetPlan, fromPlan }); + } + const reportRoot = flags.report_dir ? resolveFromCwd(flags.report_dir) : reportsDir; + const runId = createRunId(); + const reportPath = join(reportRoot, `${runId}-${profile.id}.md`); + const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`); + const targetSetup = { completed: false }; + const runEntry = async (entry) => { + const context = { + target, + targetPlan, + profile, + from: flags.from, + fromPlan, + state: entry.state, + sourceEnv: flags.source_env, + runId, + controls, + execute: flags.execute === true, + keepEnv: flags.keep_env === true, + retainOnFailure: flags.retain_on_failure === true, + timeoutMs: resolveEntryTimeout(entry, flags), + healthSamples: profileIntegerFlag(flags, "health_samples", flags.deep_profile === true ? 10 : 3), + healthIntervalMs: positiveIntegerFlag(flags, "health_interval_ms", 250), + readinessIntervalMs: profileIntegerFlag(flags, "readiness_interval_ms", flags.deep_profile === true ? 100 : 250), + heapSnapshot: flags.heap_snapshot === true || flags.deep_profile === true, + diagnosticReport: flags.deep_profile === true, + nodeProfile: flags.node_profile === true || flags.deep_profile === true, + deepProfile: flags.deep_profile === true, + profileOnFailure: flags.profile_on_failure === true, + resourceSampleIntervalMs: profileIntegerFlag(flags, "resource_sample_interval_ms", flags.deep_profile === true ? 250 : 1000), + processRoles: registry.processRoles, + surfacesById: Object.fromEntries(registry.surfaces.map((surface) => [surface.id, surface])), + targetSetup, + auth + }; + + if (entry.skipReason) { + return buildRepeatRecords(entry, context, (iterationContext) => buildSkippedRecord(entry.scenario, iterationContext, entry.skipReason)); + } + + return buildRepeatRecords(entry, context, async (iterationContext) => + iterationContext.execute + ? executeScenario(entry.scenario, iterationContext) + : buildDryRunRecord(entry.scenario, iterationContext) + ); + }; + + const records = flags.execute === true + ? await runMatrixEntries(entries, runEntry, controls) + : (await Promise.all(entries.map((entry) => runEntry(entry)))).flat(); + const targetCleanup = await cleanupTargetRuntimeIfNeeded(targetPlan, records, { + execute: flags.execute === true, + timeoutMs: positiveIntegerFlag(flags, "timeout_ms", 120000) + }); + const performance = buildPerformanceSummary(records, { + repeat: controls.repeat, + regressionThresholds + }); + const platform = platformInfo(); + const reportBase = { + schemaVersion: reportSchemaVersion, + generatedAt: new Date().toISOString(), + runId, + outputPaths: { + markdown: reportPath, + json: jsonPath + }, + mode: flags.execute === true ? "execution" : "dry-run", + profile: profileSummary(profile), + target, + from: flags.from ?? null, + controls, + auth: authReportSummary(auth), + state: null, + platform, + targetCleanup, + performance, + baseline: null, + gate: null, + summary: summarizeRecords(records), + records + }; + const baselineComparison = comparePerformanceToBaseline(reportBase, baselineStore, { targetPlan, regressionThresholds }); + if (baselineComparison) { + reportBase.baseline = { + path: baselinePath, + comparison: baselineComparison + }; + } + const gate = flags.gate === true + ? evaluateGate({ + mode: flags.execute === true ? "execution" : "dry-run", + controls, + performance, + baseline: reportBase.baseline, + platform: reportBase.platform, + records + }, profile, { resolvedCoverage }) + : null; + + await mkdir(reportRoot, { recursive: true }); + const report = { + ...reportBase, + gate + }; + if (saveBaselinePath) { + const existingStore = await loadBaselineStore(saveBaselinePath); + const review = reviewBaselineUpdate(report, { reviewedGood: flags.reviewed_good === true }); + const updatedStore = updateBaselineStore(existingStore, report, { targetPlan, reviewedGood: flags.reviewed_good === true }); + report.baseline = { + ...(report.baseline ?? {}), + review, + saved: await saveBaselineStore(saveBaselinePath, updatedStore) + }; + } + await writeFile(reportPath, renderMarkdownReport(report), "utf8"); + await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8"); + const bundle = await bundleReport(jsonPath, { outputDir: reportRoot }); + const retainedGateArtifacts = gate && gate.verdict !== "SHIP" + ? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) + : null; + + if (flags.json) { + console.log(JSON.stringify({ + schemaVersion: "kova.matrix.run.receipt.v1", + generatedAt: new Date().toISOString(), + mode: report.mode, + runId, + profile: profileSummary(profile), + reportPath, + jsonPath, + bundlePath: bundle.outputPath, + checksumPath: bundle.checksumPath, + retainedGateArtifacts, + gate: summarizeGateReceipt(gate), + performance: summarizePerformanceReceipt(report.performance, report.baseline), + summary: report.summary + }, null, 2)); + failGateIfNeeded(gate); + return; + } + + console.log(`Kova matrix ${report.mode} report written: ${relative(process.cwd(), reportPath)}`); + console.log(`Kova matrix ${report.mode} data written: ${relative(process.cwd(), jsonPath)}`); + console.log(`Kova matrix bundle written: ${relative(process.cwd(), bundle.outputPath)}`); + if (retainedGateArtifacts) { + console.log(`Kova failed gate artifacts retained: ${relative(process.cwd(), retainedGateArtifacts.outputDir)}`); + } + if (gate) { + console.log(`Kova gate outcome: ${gate.outcome ?? gate.verdict}`); + } + failGateIfNeeded(gate); +} + +function validateProfileExecutionFlags(profile, flags) { + if (flags.execute === true && profile.id === "exhaustive" && flags.allow_exhaustive !== true) { + throw new Error("executing profile 'exhaustive' requires --allow-exhaustive"); + } +} + +async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) { + report.retainedGateArtifacts = { + status: "pending" + }; + await writeFile(reportPath, renderMarkdownReport(report), "utf8"); + await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8"); + const retained = await retainGateArtifacts(jsonPath, bundle); + report.retainedGateArtifacts = retained; + await writeFile(reportPath, renderMarkdownReport(report), "utf8"); + await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8"); + await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir }); + return retained; +} + +function resolveEntryTimeout(entry, flags) { + return positiveIntegerValue(flags.timeout_ms ?? entry.timeoutMs ?? entry.scenario.timeoutMs ?? 120000, "--timeout-ms"); +} + +async function buildRepeatRecords(entry, context, callback) { + const total = positiveIntegerValue(context.controls?.repeat ?? 1, "repeat"); + const records = []; + for (let index = 1; index <= total; index += 1) { + records.push(await callback({ + ...context, + repeat: { + index, + total + } + })); + } + return records; +} + +function failGateIfNeeded(gate) { + if (gate && gate.verdict !== "SHIP") { + throw new Error(`gate outcome: ${gate.outcome ?? gate.verdict}`); + } +} + +function summarizeGateReceipt(gate) { + if (!gate) { + return null; + } + return { + schemaVersion: gate.schemaVersion, + enabled: gate.enabled, + profileId: gate.profileId, + policyId: gate.policyId, + purpose: gate.purpose ?? null, + verdict: gate.verdict, + outcome: gate.outcome ?? null, + ok: gate.ok, + complete: gate.complete, + partial: gate.partial, + missingRequiredCount: gate.missingRequiredCount, + blockingCount: gate.blockingCount, + warningCount: gate.warningCount, + infoCount: gate.infoCount, + subsystemCount: gate.subsystems?.length ?? 0, + fixerSummaryCount: gate.fixerSummaries?.length ?? 0, + baselineRegressionCount: gate.baseline?.regressionCount ?? null, + missingBaselineCount: gate.baseline?.missingBaselineCount ?? null + }; +} + +async function runMatrixEntries(entries, runEntry, controls) { + if (controls.parallel <= 1) { + const records = []; + for (const entry of entries) { + const entryRecords = await runEntry(entry); + records.push(...entryRecords); + if (controls.failFast && entryRecords.some((record) => record.status === "FAIL" || record.status === "BLOCKED")) { + break; + } + } + return records; + } + + const records = new Array(entries.length); + let nextIndex = 0; + async function worker() { + while (nextIndex < entries.length) { + const index = nextIndex; + nextIndex += 1; + records[index] = await runEntry(entries[index]); + } + } + + await Promise.all(Array.from({ length: controls.parallel }, () => worker())); + return records.filter(Boolean).flat(); +} diff --git a/src/commands/run-support.mjs b/src/commands/run-support.mjs new file mode 100644 index 0000000..9010945 --- /dev/null +++ b/src/commands/run-support.mjs @@ -0,0 +1,120 @@ +import { readFile } from "node:fs/promises"; +import { runCleanupCommand } from "../cleanup.mjs"; +import { resolveFromCwd } from "../cli.mjs"; +import { ocmRuntimeRemoveJson } from "../ocm/commands.mjs"; + +export async function loadRegressionThresholds(flags) { + if (!flags.regression_thresholds) { + return null; + } + if (flags.regression_thresholds === true) { + throw new Error("--regression-thresholds requires a JSON file path"); + } + return JSON.parse(await readFile(resolveFromCwd(String(flags.regression_thresholds)), "utf8")); +} + +export function validateBaselineExecutionFlags(flags) { + if ((flags.baseline || flags.save_baseline) && flags.execute !== true) { + throw new Error("--baseline and --save-baseline require --execute so baseline evidence comes from real OpenClaw runs"); + } + if (flags.save_baseline && flags.reviewed_good !== true) { + throw new Error("--save-baseline requires --reviewed-good after reviewing a passing, stable execution report"); + } +} + +export async function cleanupTargetRuntimeIfNeeded(targetPlan, records, options) { + if (targetPlan.kind !== "local-build") { + return null; + } + + const command = ocmRuntimeRemoveJson(targetPlan.runtimeName); + if (!options.execute) { + return { + status: "planned", + runtimeName: targetPlan.runtimeName, + command + }; + } + + if (records.some((record) => record.cleanup === "retained")) { + return { + status: "retained", + runtimeName: targetPlan.runtimeName, + command, + reason: "one or more envs were retained" + }; + } + + const result = await runCleanupCommand(command, { timeoutMs: options.timeoutMs }); + const cleanupStatus = classifyTargetRuntimeCleanup(result); + return { + status: cleanupStatus.status, + runtimeName: targetPlan.runtimeName, + command, + reason: cleanupStatus.reason, + result: { + status: result.status, + durationMs: result.durationMs, + timedOut: result.timedOut, + stdout: result.stdout, + stderr: result.stderr, + attempts: result.attempts ?? [] + } + }; +} + +function classifyTargetRuntimeCleanup(result) { + if (result.status === 0) { + return { status: "removed" }; + } + + const output = `${result.stdout}\n${result.stderr}`; + if (/\bruntime\b[\s\S]*\bdoes not exist\b/i.test(output) || /\bnot found\b/i.test(output)) { + return { + status: "already-absent", + reason: "target runtime was not present when cleanup ran" + }; + } + + return { status: "remove-failed" }; +} + +export function positiveIntegerFlag(flags, key, defaultValue) { + if (flags[key] === undefined) { + return defaultValue; + } + return positiveIntegerValue(flags[key], `--${key.replaceAll("_", "-")}`); +} + +export function profileIntegerFlag(flags, key, defaultValue) { + return positiveIntegerFlag(flags, key, defaultValue); +} + +export function positiveIntegerValue(raw, label) { + if (raw === true) { + throw new Error(`${label} requires a positive integer value`); + } + const value = Number(raw); + if (!Number.isInteger(value) || value < 1) { + throw new Error(`${label} must be a positive integer, got ${JSON.stringify(raw)}`); + } + return value; +} + +export function summarizePerformanceReceipt(performance, baseline) { + if (!performance) { + return null; + } + return { + schemaVersion: performance.schemaVersion, + repeat: performance.repeat, + groupCount: performance.groupCount, + unstableGroupCount: performance.unstableGroupCount, + profiledRunCount: performance.profiledRunCount ?? 0, + baselineRegressionCount: baseline?.comparison?.regressionCount ?? null, + missingBaselineCount: baseline?.comparison?.missingBaselineCount ?? null, + baselineReviewOk: baseline?.review?.ok ?? null, + baselineReviewBlockerCount: baseline?.review?.blockerCount ?? null, + savedBaselinePath: baseline?.saved?.path ?? null + }; +} diff --git a/src/main.mjs b/src/main.mjs index 6ebf359..c3834c0 100644 --- a/src/main.mjs +++ b/src/main.mjs @@ -1,18 +1,23 @@ import { mkdir, readFile, readdir, rm, stat, writeFile } from "node:fs/promises"; import { join, relative } from "node:path"; -import { bundleReport, retainGateArtifacts } from "./reporting/artifacts.mjs"; +import { bundleReport } from "./reporting/artifacts.mjs"; import { authReportSummary, resolveRunAuthContext } from "./auth.mjs"; import { runCleanupCommand } from "./cleanup.mjs"; import { runCommand } from "./commands.mjs"; import { runMatrixPlan } from "./commands/matrix-plan.mjs"; +import { runMatrixRun } from "./commands/matrix-run.mjs"; +import { + cleanupTargetRuntimeIfNeeded, + loadRegressionThresholds, + positiveIntegerFlag, + profileIntegerFlag, + summarizePerformanceReceipt, + validateBaselineExecutionFlags +} from "./commands/run-support.mjs"; import { compareReports, renderCompareFixerSummary, renderCompareSummary } from "./reporting/compare.mjs"; import { parseFlags, printHelp, required, resolveFromCwd } from "./cli.mjs"; -import { matrixControlSummary } from "./matrix/controls.mjs"; -import { applyMatrixControls, expandProfile } from "./matrix/expand.mjs"; -import { evaluateGate, preflightGateRun } from "./matrix/gate.mjs"; -import { profileSummary, validateProfileTarget } from "./matrix/profile.mjs"; +import { profileSummary } from "./matrix/profile.mjs"; import { buildCoverage } from "./matrix/coverage.mjs"; -import { assertResolvedCoverageIsRunnable, resolveCoverageObligations } from "./matrix/resolver.mjs"; import { comparePerformanceToBaseline, loadBaselineStore, @@ -25,15 +30,14 @@ import { buildPerformanceSummary } from "./performance/stats.mjs"; import { platformInfo } from "./platform.mjs"; import { artifactsDir, repoRoot, reportsDir } from "./paths.mjs"; import { loadRegistryContext } from "./registries/context.mjs"; -import { loadProfile } from "./registries/profiles.mjs"; import { loadScenarios, validateScenarioRun } from "./registries/scenarios.mjs"; import { loadState } from "./registries/states.mjs"; import { renderMarkdownReport, renderPasteSummary, renderReportSummary, summarizeRecords } from "./reporting/report.mjs"; -import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "./runner.mjs"; +import { buildDryRunRecord, createRunId, executeScenario } from "./runner.mjs"; import { runSelfCheck } from "./selfcheck.mjs"; import { runSetup } from "./setup.mjs"; import { resolveTarget } from "./targets.mjs"; -import { ocmEnvDestroy, ocmEnvListJson, ocmRuntimeRemoveJson } from "./ocm/commands.mjs"; +import { ocmEnvDestroy, ocmEnvListJson } from "./ocm/commands.mjs"; const reportSchemaVersion = "kova.report.v1"; @@ -160,7 +164,7 @@ async function matrixCommand(flags) { } if (subcommand === "run") { - await matrixRun(flags); + await runMatrixRun(flags); return; } @@ -235,312 +239,6 @@ async function readReport(path) { return JSON.parse(await readFile(resolveFromCwd(path), "utf8")); } -async function loadRegressionThresholds(flags) { - if (!flags.regression_thresholds) { - return null; - } - if (flags.regression_thresholds === true) { - throw new Error("--regression-thresholds requires a JSON file path"); - } - return JSON.parse(await readFile(resolveFromCwd(String(flags.regression_thresholds)), "utf8")); -} - -async function matrixRun(flags) { - const registry = await loadRegistryContext(); - const profile = await loadProfile(required(flags.profile, "--profile")); - validateProfileExecutionFlags(profile, flags); - const target = required(flags.target, "--target"); - validateBaselineExecutionFlags(flags); - const targetPlan = resolveTarget(target, "target"); - validateProfileTarget(profile, targetPlan); - const fromPlan = flags.from ? resolveTarget(flags.from, "from") : null; - const entries = applyMatrixControls(await expandProfile(profile), flags, platformInfo()); - const resolvedCoverage = resolveCoverageObligations({ - profile, - entries, - surfaces: registry.surfaces, - targetPlan - }); - assertResolvedCoverageIsRunnable(resolvedCoverage); - const controls = matrixControlSummary(flags, targetPlan); - const auth = await resolveRunAuthContext(flags); - const regressionThresholds = await loadRegressionThresholds(flags); - const baselinePath = resolveBaselinePath(flags.baseline); - const saveBaselinePath = resolveBaselinePath(flags.save_baseline); - const baselineStore = baselinePath ? await loadBaselineStore(baselinePath) : null; - preflightGateRun({ entries, flags }); - for (const entry of entries.filter((item) => !item.skipReason)) { - validateScenarioRun(entry.scenario, flags, { targetPlan, fromPlan }); - } - const reportRoot = flags.report_dir ? resolveFromCwd(flags.report_dir) : reportsDir; - const runId = createRunId(); - const reportPath = join(reportRoot, `${runId}-${profile.id}.md`); - const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`); - const targetSetup = { completed: false }; - const runEntry = async (entry) => { - const context = { - target, - targetPlan, - profile, - from: flags.from, - fromPlan, - state: entry.state, - sourceEnv: flags.source_env, - runId, - controls, - execute: flags.execute === true, - keepEnv: flags.keep_env === true, - retainOnFailure: flags.retain_on_failure === true, - timeoutMs: resolveEntryTimeout(entry, flags), - healthSamples: profileIntegerFlag(flags, "health_samples", flags.deep_profile === true ? 10 : 3), - healthIntervalMs: positiveIntegerFlag(flags, "health_interval_ms", 250), - readinessIntervalMs: profileIntegerFlag(flags, "readiness_interval_ms", flags.deep_profile === true ? 100 : 250), - heapSnapshot: flags.heap_snapshot === true || flags.deep_profile === true, - diagnosticReport: flags.deep_profile === true, - nodeProfile: flags.node_profile === true || flags.deep_profile === true, - deepProfile: flags.deep_profile === true, - profileOnFailure: flags.profile_on_failure === true, - resourceSampleIntervalMs: profileIntegerFlag(flags, "resource_sample_interval_ms", flags.deep_profile === true ? 250 : 1000), - processRoles: registry.processRoles, - surfacesById: Object.fromEntries(registry.surfaces.map((surface) => [surface.id, surface])), - targetSetup, - auth - }; - - if (entry.skipReason) { - return buildRepeatRecords(entry, context, (iterationContext) => buildSkippedRecord(entry.scenario, iterationContext, entry.skipReason)); - } - - return buildRepeatRecords(entry, context, async (iterationContext) => - iterationContext.execute - ? executeScenario(entry.scenario, iterationContext) - : buildDryRunRecord(entry.scenario, iterationContext) - ); - }; - - const records = flags.execute === true - ? await runMatrixEntries(entries, runEntry, controls) - : (await Promise.all(entries.map((entry) => runEntry(entry)))).flat(); - const targetCleanup = await cleanupTargetRuntimeIfNeeded(targetPlan, records, { - execute: flags.execute === true, - timeoutMs: positiveIntegerFlag(flags, "timeout_ms", 120000) - }); - const performance = buildPerformanceSummary(records, { - repeat: controls.repeat, - regressionThresholds - }); - const platform = platformInfo(); - const reportBase = { - schemaVersion: reportSchemaVersion, - generatedAt: new Date().toISOString(), - runId, - outputPaths: { - markdown: reportPath, - json: jsonPath - }, - mode: flags.execute === true ? "execution" : "dry-run", - profile: profileSummary(profile), - target, - from: flags.from ?? null, - controls, - auth: authReportSummary(auth), - state: null, - platform, - targetCleanup, - performance, - baseline: null, - gate: null, - summary: summarizeRecords(records), - records - }; - const baselineComparison = comparePerformanceToBaseline(reportBase, baselineStore, { targetPlan, regressionThresholds }); - if (baselineComparison) { - reportBase.baseline = { - path: baselinePath, - comparison: baselineComparison - }; - } - const gate = flags.gate === true - ? evaluateGate({ - mode: flags.execute === true ? "execution" : "dry-run", - controls, - performance, - baseline: reportBase.baseline, - platform: reportBase.platform, - records - }, profile, { resolvedCoverage }) - : null; - - await mkdir(reportRoot, { recursive: true }); - const report = { - ...reportBase, - gate - }; - if (saveBaselinePath) { - const existingStore = await loadBaselineStore(saveBaselinePath); - const review = reviewBaselineUpdate(report, { reviewedGood: flags.reviewed_good === true }); - const updatedStore = updateBaselineStore(existingStore, report, { targetPlan, reviewedGood: flags.reviewed_good === true }); - report.baseline = { - ...(report.baseline ?? {}), - review, - saved: await saveBaselineStore(saveBaselinePath, updatedStore) - }; - } - await writeFile(reportPath, renderMarkdownReport(report), "utf8"); - await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8"); - const bundle = await bundleReport(jsonPath, { outputDir: reportRoot }); - const retainedGateArtifacts = gate && gate.verdict !== "SHIP" - ? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) - : null; - - if (flags.json) { - console.log(JSON.stringify({ - schemaVersion: "kova.matrix.run.receipt.v1", - generatedAt: new Date().toISOString(), - mode: report.mode, - runId, - profile: profileSummary(profile), - reportPath, - jsonPath, - bundlePath: bundle.outputPath, - checksumPath: bundle.checksumPath, - retainedGateArtifacts, - gate: summarizeGateReceipt(gate), - performance: summarizePerformanceReceipt(report.performance, report.baseline), - summary: report.summary - }, null, 2)); - failGateIfNeeded(gate); - return; - } - - console.log(`Kova matrix ${report.mode} report written: ${relative(process.cwd(), reportPath)}`); - console.log(`Kova matrix ${report.mode} data written: ${relative(process.cwd(), jsonPath)}`); - console.log(`Kova matrix bundle written: ${relative(process.cwd(), bundle.outputPath)}`); - if (retainedGateArtifacts) { - console.log(`Kova failed gate artifacts retained: ${relative(process.cwd(), retainedGateArtifacts.outputDir)}`); - } - if (gate) { - console.log(`Kova gate outcome: ${gate.outcome ?? gate.verdict}`); - } - failGateIfNeeded(gate); -} - -function validateProfileExecutionFlags(profile, flags) { - if (flags.execute === true && profile.id === "exhaustive" && flags.allow_exhaustive !== true) { - throw new Error("executing profile 'exhaustive' requires --allow-exhaustive"); - } -} - -async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) { - report.retainedGateArtifacts = { - status: "pending" - }; - await writeFile(reportPath, renderMarkdownReport(report), "utf8"); - await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8"); - const retained = await retainGateArtifacts(jsonPath, bundle); - report.retainedGateArtifacts = retained; - await writeFile(reportPath, renderMarkdownReport(report), "utf8"); - await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8"); - await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir }); - return retained; -} - -function resolveEntryTimeout(entry, flags) { - return positiveIntegerValue(flags.timeout_ms ?? entry.timeoutMs ?? entry.scenario.timeoutMs ?? 120000, "--timeout-ms"); -} - -async function buildRepeatRecords(entry, context, callback) { - const total = positiveIntegerValue(context.controls?.repeat ?? 1, "repeat"); - const records = []; - for (let index = 1; index <= total; index += 1) { - records.push(await callback({ - ...context, - repeat: { - index, - total - } - })); - } - return records; -} - -function failGateIfNeeded(gate) { - if (gate && gate.verdict !== "SHIP") { - throw new Error(`gate outcome: ${gate.outcome ?? gate.verdict}`); - } -} - -function summarizeGateReceipt(gate) { - if (!gate) { - return null; - } - return { - schemaVersion: gate.schemaVersion, - enabled: gate.enabled, - profileId: gate.profileId, - policyId: gate.policyId, - purpose: gate.purpose ?? null, - verdict: gate.verdict, - outcome: gate.outcome ?? null, - ok: gate.ok, - complete: gate.complete, - partial: gate.partial, - missingRequiredCount: gate.missingRequiredCount, - blockingCount: gate.blockingCount, - warningCount: gate.warningCount, - infoCount: gate.infoCount, - subsystemCount: gate.subsystems?.length ?? 0, - fixerSummaryCount: gate.fixerSummaries?.length ?? 0, - baselineRegressionCount: gate.baseline?.regressionCount ?? null, - missingBaselineCount: gate.baseline?.missingBaselineCount ?? null - }; -} - -function summarizePerformanceReceipt(performance, baseline) { - if (!performance) { - return null; - } - return { - schemaVersion: performance.schemaVersion, - repeat: performance.repeat, - groupCount: performance.groupCount, - unstableGroupCount: performance.unstableGroupCount, - profiledRunCount: performance.profiledRunCount ?? 0, - baselineRegressionCount: baseline?.comparison?.regressionCount ?? null, - missingBaselineCount: baseline?.comparison?.missingBaselineCount ?? null, - baselineReviewOk: baseline?.review?.ok ?? null, - baselineReviewBlockerCount: baseline?.review?.blockerCount ?? null, - savedBaselinePath: baseline?.saved?.path ?? null - }; -} - -async function runMatrixEntries(entries, runEntry, controls) { - if (controls.parallel <= 1) { - const records = []; - for (const entry of entries) { - const entryRecords = await runEntry(entry); - records.push(...entryRecords); - if (controls.failFast && entryRecords.some((record) => record.status === "FAIL" || record.status === "BLOCKED")) { - break; - } - } - return records; - } - - const records = new Array(entries.length); - let nextIndex = 0; - async function worker() { - while (nextIndex < entries.length) { - const index = nextIndex; - nextIndex += 1; - records[index] = await runEntry(entries[index]); - } - } - - await Promise.all(Array.from({ length: controls.parallel }, () => worker())); - return records.filter(Boolean).flat(); -} - async function cleanupCommand(flags) { const [subcommand] = flags._; if (subcommand === "envs") { @@ -850,91 +548,3 @@ function resolveRunTimeout(scenarios, flags) { .filter((timeout) => typeof timeout === "number"); return scenarioTimeouts.length === 0 ? 120000 : Math.max(...scenarioTimeouts); } - -function validateBaselineExecutionFlags(flags) { - if ((flags.baseline || flags.save_baseline) && flags.execute !== true) { - throw new Error("--baseline and --save-baseline require --execute so baseline evidence comes from real OpenClaw runs"); - } - if (flags.save_baseline && flags.reviewed_good !== true) { - throw new Error("--save-baseline requires --reviewed-good after reviewing a passing, stable execution report"); - } -} - -async function cleanupTargetRuntimeIfNeeded(targetPlan, records, options) { - if (targetPlan.kind !== "local-build") { - return null; - } - - const command = ocmRuntimeRemoveJson(targetPlan.runtimeName); - if (!options.execute) { - return { - status: "planned", - runtimeName: targetPlan.runtimeName, - command - }; - } - - if (records.some((record) => record.cleanup === "retained")) { - return { - status: "retained", - runtimeName: targetPlan.runtimeName, - command, - reason: "one or more envs were retained" - }; - } - - const result = await runCleanupCommand(command, { timeoutMs: options.timeoutMs }); - const cleanupStatus = classifyTargetRuntimeCleanup(result); - return { - status: cleanupStatus.status, - runtimeName: targetPlan.runtimeName, - command, - reason: cleanupStatus.reason, - result: { - status: result.status, - durationMs: result.durationMs, - timedOut: result.timedOut, - stdout: result.stdout, - stderr: result.stderr, - attempts: result.attempts ?? [] - } - }; -} - -function classifyTargetRuntimeCleanup(result) { - if (result.status === 0) { - return { status: "removed" }; - } - - const output = `${result.stdout}\n${result.stderr}`; - if (/\bruntime\b[\s\S]*\bdoes not exist\b/i.test(output) || /\bnot found\b/i.test(output)) { - return { - status: "already-absent", - reason: "target runtime was not present when cleanup ran" - }; - } - - return { status: "remove-failed" }; -} - -function positiveIntegerFlag(flags, key, defaultValue) { - if (flags[key] === undefined) { - return defaultValue; - } - return positiveIntegerValue(flags[key], `--${key.replaceAll("_", "-")}`); -} - -function profileIntegerFlag(flags, key, defaultValue) { - return positiveIntegerFlag(flags, key, defaultValue); -} - -function positiveIntegerValue(raw, label) { - if (raw === true) { - throw new Error(`${label} requires a positive integer value`); - } - const value = Number(raw); - if (!Number.isInteger(value) || value < 1) { - throw new Error(`${label} must be a positive integer, got ${JSON.stringify(raw)}`); - } - return value; -}