refactor: extract matrix run command
This commit is contained in:
parent
fd575c146b
commit
8bf308bc32
316
src/commands/matrix-run.mjs
Normal file
316
src/commands/matrix-run.mjs
Normal file
@ -0,0 +1,316 @@
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { join, relative } from "node:path";
|
||||
import { authReportSummary, resolveRunAuthContext } from "../auth.mjs";
|
||||
import { required, resolveFromCwd } from "../cli.mjs";
|
||||
import {
|
||||
cleanupTargetRuntimeIfNeeded,
|
||||
loadRegressionThresholds,
|
||||
positiveIntegerFlag,
|
||||
positiveIntegerValue,
|
||||
profileIntegerFlag,
|
||||
summarizePerformanceReceipt,
|
||||
validateBaselineExecutionFlags
|
||||
} from "./run-support.mjs";
|
||||
import { applyMatrixControls, expandProfile } from "../matrix/expand.mjs";
|
||||
import { evaluateGate, preflightGateRun } from "../matrix/gate.mjs";
|
||||
import { matrixControlSummary } from "../matrix/controls.mjs";
|
||||
import { profileSummary, validateProfileTarget } from "../matrix/profile.mjs";
|
||||
import { assertResolvedCoverageIsRunnable, resolveCoverageObligations } from "../matrix/resolver.mjs";
|
||||
import {
|
||||
comparePerformanceToBaseline,
|
||||
loadBaselineStore,
|
||||
resolveBaselinePath,
|
||||
reviewBaselineUpdate,
|
||||
saveBaselineStore,
|
||||
updateBaselineStore
|
||||
} from "../performance/baselines.mjs";
|
||||
import { buildPerformanceSummary } from "../performance/stats.mjs";
|
||||
import { platformInfo } from "../platform.mjs";
|
||||
import { reportsDir } from "../paths.mjs";
|
||||
import { loadRegistryContext } from "../registries/context.mjs";
|
||||
import { loadProfile } from "../registries/profiles.mjs";
|
||||
import { validateScenarioRun } from "../registries/scenarios.mjs";
|
||||
import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
|
||||
import { bundleReport, retainGateArtifacts } from "../reporting/artifacts.mjs";
|
||||
import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "../runner.mjs";
|
||||
import { resolveTarget } from "../targets.mjs";
|
||||
|
||||
const reportSchemaVersion = "kova.report.v1";
|
||||
|
||||
export async function runMatrixRun(flags) {
|
||||
const registry = await loadRegistryContext();
|
||||
const profile = await loadProfile(required(flags.profile, "--profile"));
|
||||
validateProfileExecutionFlags(profile, flags);
|
||||
const target = required(flags.target, "--target");
|
||||
validateBaselineExecutionFlags(flags);
|
||||
const targetPlan = resolveTarget(target, "target");
|
||||
validateProfileTarget(profile, targetPlan);
|
||||
const fromPlan = flags.from ? resolveTarget(flags.from, "from") : null;
|
||||
const entries = applyMatrixControls(await expandProfile(profile), flags, platformInfo());
|
||||
const resolvedCoverage = resolveCoverageObligations({
|
||||
profile,
|
||||
entries,
|
||||
surfaces: registry.surfaces,
|
||||
targetPlan
|
||||
});
|
||||
assertResolvedCoverageIsRunnable(resolvedCoverage);
|
||||
const controls = matrixControlSummary(flags, targetPlan);
|
||||
const auth = await resolveRunAuthContext(flags);
|
||||
const regressionThresholds = await loadRegressionThresholds(flags);
|
||||
const baselinePath = resolveBaselinePath(flags.baseline);
|
||||
const saveBaselinePath = resolveBaselinePath(flags.save_baseline);
|
||||
const baselineStore = baselinePath ? await loadBaselineStore(baselinePath) : null;
|
||||
preflightGateRun({ entries, flags });
|
||||
for (const entry of entries.filter((item) => !item.skipReason)) {
|
||||
validateScenarioRun(entry.scenario, flags, { targetPlan, fromPlan });
|
||||
}
|
||||
const reportRoot = flags.report_dir ? resolveFromCwd(flags.report_dir) : reportsDir;
|
||||
const runId = createRunId();
|
||||
const reportPath = join(reportRoot, `${runId}-${profile.id}.md`);
|
||||
const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`);
|
||||
const targetSetup = { completed: false };
|
||||
const runEntry = async (entry) => {
|
||||
const context = {
|
||||
target,
|
||||
targetPlan,
|
||||
profile,
|
||||
from: flags.from,
|
||||
fromPlan,
|
||||
state: entry.state,
|
||||
sourceEnv: flags.source_env,
|
||||
runId,
|
||||
controls,
|
||||
execute: flags.execute === true,
|
||||
keepEnv: flags.keep_env === true,
|
||||
retainOnFailure: flags.retain_on_failure === true,
|
||||
timeoutMs: resolveEntryTimeout(entry, flags),
|
||||
healthSamples: profileIntegerFlag(flags, "health_samples", flags.deep_profile === true ? 10 : 3),
|
||||
healthIntervalMs: positiveIntegerFlag(flags, "health_interval_ms", 250),
|
||||
readinessIntervalMs: profileIntegerFlag(flags, "readiness_interval_ms", flags.deep_profile === true ? 100 : 250),
|
||||
heapSnapshot: flags.heap_snapshot === true || flags.deep_profile === true,
|
||||
diagnosticReport: flags.deep_profile === true,
|
||||
nodeProfile: flags.node_profile === true || flags.deep_profile === true,
|
||||
deepProfile: flags.deep_profile === true,
|
||||
profileOnFailure: flags.profile_on_failure === true,
|
||||
resourceSampleIntervalMs: profileIntegerFlag(flags, "resource_sample_interval_ms", flags.deep_profile === true ? 250 : 1000),
|
||||
processRoles: registry.processRoles,
|
||||
surfacesById: Object.fromEntries(registry.surfaces.map((surface) => [surface.id, surface])),
|
||||
targetSetup,
|
||||
auth
|
||||
};
|
||||
|
||||
if (entry.skipReason) {
|
||||
return buildRepeatRecords(entry, context, (iterationContext) => buildSkippedRecord(entry.scenario, iterationContext, entry.skipReason));
|
||||
}
|
||||
|
||||
return buildRepeatRecords(entry, context, async (iterationContext) =>
|
||||
iterationContext.execute
|
||||
? executeScenario(entry.scenario, iterationContext)
|
||||
: buildDryRunRecord(entry.scenario, iterationContext)
|
||||
);
|
||||
};
|
||||
|
||||
const records = flags.execute === true
|
||||
? await runMatrixEntries(entries, runEntry, controls)
|
||||
: (await Promise.all(entries.map((entry) => runEntry(entry)))).flat();
|
||||
const targetCleanup = await cleanupTargetRuntimeIfNeeded(targetPlan, records, {
|
||||
execute: flags.execute === true,
|
||||
timeoutMs: positiveIntegerFlag(flags, "timeout_ms", 120000)
|
||||
});
|
||||
const performance = buildPerformanceSummary(records, {
|
||||
repeat: controls.repeat,
|
||||
regressionThresholds
|
||||
});
|
||||
const platform = platformInfo();
|
||||
const reportBase = {
|
||||
schemaVersion: reportSchemaVersion,
|
||||
generatedAt: new Date().toISOString(),
|
||||
runId,
|
||||
outputPaths: {
|
||||
markdown: reportPath,
|
||||
json: jsonPath
|
||||
},
|
||||
mode: flags.execute === true ? "execution" : "dry-run",
|
||||
profile: profileSummary(profile),
|
||||
target,
|
||||
from: flags.from ?? null,
|
||||
controls,
|
||||
auth: authReportSummary(auth),
|
||||
state: null,
|
||||
platform,
|
||||
targetCleanup,
|
||||
performance,
|
||||
baseline: null,
|
||||
gate: null,
|
||||
summary: summarizeRecords(records),
|
||||
records
|
||||
};
|
||||
const baselineComparison = comparePerformanceToBaseline(reportBase, baselineStore, { targetPlan, regressionThresholds });
|
||||
if (baselineComparison) {
|
||||
reportBase.baseline = {
|
||||
path: baselinePath,
|
||||
comparison: baselineComparison
|
||||
};
|
||||
}
|
||||
const gate = flags.gate === true
|
||||
? evaluateGate({
|
||||
mode: flags.execute === true ? "execution" : "dry-run",
|
||||
controls,
|
||||
performance,
|
||||
baseline: reportBase.baseline,
|
||||
platform: reportBase.platform,
|
||||
records
|
||||
}, profile, { resolvedCoverage })
|
||||
: null;
|
||||
|
||||
await mkdir(reportRoot, { recursive: true });
|
||||
const report = {
|
||||
...reportBase,
|
||||
gate
|
||||
};
|
||||
if (saveBaselinePath) {
|
||||
const existingStore = await loadBaselineStore(saveBaselinePath);
|
||||
const review = reviewBaselineUpdate(report, { reviewedGood: flags.reviewed_good === true });
|
||||
const updatedStore = updateBaselineStore(existingStore, report, { targetPlan, reviewedGood: flags.reviewed_good === true });
|
||||
report.baseline = {
|
||||
...(report.baseline ?? {}),
|
||||
review,
|
||||
saved: await saveBaselineStore(saveBaselinePath, updatedStore)
|
||||
};
|
||||
}
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
const bundle = await bundleReport(jsonPath, { outputDir: reportRoot });
|
||||
const retainedGateArtifacts = gate && gate.verdict !== "SHIP"
|
||||
? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle)
|
||||
: null;
|
||||
|
||||
if (flags.json) {
|
||||
console.log(JSON.stringify({
|
||||
schemaVersion: "kova.matrix.run.receipt.v1",
|
||||
generatedAt: new Date().toISOString(),
|
||||
mode: report.mode,
|
||||
runId,
|
||||
profile: profileSummary(profile),
|
||||
reportPath,
|
||||
jsonPath,
|
||||
bundlePath: bundle.outputPath,
|
||||
checksumPath: bundle.checksumPath,
|
||||
retainedGateArtifacts,
|
||||
gate: summarizeGateReceipt(gate),
|
||||
performance: summarizePerformanceReceipt(report.performance, report.baseline),
|
||||
summary: report.summary
|
||||
}, null, 2));
|
||||
failGateIfNeeded(gate);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Kova matrix ${report.mode} report written: ${relative(process.cwd(), reportPath)}`);
|
||||
console.log(`Kova matrix ${report.mode} data written: ${relative(process.cwd(), jsonPath)}`);
|
||||
console.log(`Kova matrix bundle written: ${relative(process.cwd(), bundle.outputPath)}`);
|
||||
if (retainedGateArtifacts) {
|
||||
console.log(`Kova failed gate artifacts retained: ${relative(process.cwd(), retainedGateArtifacts.outputDir)}`);
|
||||
}
|
||||
if (gate) {
|
||||
console.log(`Kova gate outcome: ${gate.outcome ?? gate.verdict}`);
|
||||
}
|
||||
failGateIfNeeded(gate);
|
||||
}
|
||||
|
||||
function validateProfileExecutionFlags(profile, flags) {
|
||||
if (flags.execute === true && profile.id === "exhaustive" && flags.allow_exhaustive !== true) {
|
||||
throw new Error("executing profile 'exhaustive' requires --allow-exhaustive");
|
||||
}
|
||||
}
|
||||
|
||||
async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) {
|
||||
report.retainedGateArtifacts = {
|
||||
status: "pending"
|
||||
};
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
const retained = await retainGateArtifacts(jsonPath, bundle);
|
||||
report.retainedGateArtifacts = retained;
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir });
|
||||
return retained;
|
||||
}
|
||||
|
||||
function resolveEntryTimeout(entry, flags) {
|
||||
return positiveIntegerValue(flags.timeout_ms ?? entry.timeoutMs ?? entry.scenario.timeoutMs ?? 120000, "--timeout-ms");
|
||||
}
|
||||
|
||||
async function buildRepeatRecords(entry, context, callback) {
|
||||
const total = positiveIntegerValue(context.controls?.repeat ?? 1, "repeat");
|
||||
const records = [];
|
||||
for (let index = 1; index <= total; index += 1) {
|
||||
records.push(await callback({
|
||||
...context,
|
||||
repeat: {
|
||||
index,
|
||||
total
|
||||
}
|
||||
}));
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
function failGateIfNeeded(gate) {
|
||||
if (gate && gate.verdict !== "SHIP") {
|
||||
throw new Error(`gate outcome: ${gate.outcome ?? gate.verdict}`);
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeGateReceipt(gate) {
|
||||
if (!gate) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
schemaVersion: gate.schemaVersion,
|
||||
enabled: gate.enabled,
|
||||
profileId: gate.profileId,
|
||||
policyId: gate.policyId,
|
||||
purpose: gate.purpose ?? null,
|
||||
verdict: gate.verdict,
|
||||
outcome: gate.outcome ?? null,
|
||||
ok: gate.ok,
|
||||
complete: gate.complete,
|
||||
partial: gate.partial,
|
||||
missingRequiredCount: gate.missingRequiredCount,
|
||||
blockingCount: gate.blockingCount,
|
||||
warningCount: gate.warningCount,
|
||||
infoCount: gate.infoCount,
|
||||
subsystemCount: gate.subsystems?.length ?? 0,
|
||||
fixerSummaryCount: gate.fixerSummaries?.length ?? 0,
|
||||
baselineRegressionCount: gate.baseline?.regressionCount ?? null,
|
||||
missingBaselineCount: gate.baseline?.missingBaselineCount ?? null
|
||||
};
|
||||
}
|
||||
|
||||
async function runMatrixEntries(entries, runEntry, controls) {
|
||||
if (controls.parallel <= 1) {
|
||||
const records = [];
|
||||
for (const entry of entries) {
|
||||
const entryRecords = await runEntry(entry);
|
||||
records.push(...entryRecords);
|
||||
if (controls.failFast && entryRecords.some((record) => record.status === "FAIL" || record.status === "BLOCKED")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
const records = new Array(entries.length);
|
||||
let nextIndex = 0;
|
||||
async function worker() {
|
||||
while (nextIndex < entries.length) {
|
||||
const index = nextIndex;
|
||||
nextIndex += 1;
|
||||
records[index] = await runEntry(entries[index]);
|
||||
}
|
||||
}
|
||||
|
||||
await Promise.all(Array.from({ length: controls.parallel }, () => worker()));
|
||||
return records.filter(Boolean).flat();
|
||||
}
|
||||
120
src/commands/run-support.mjs
Normal file
120
src/commands/run-support.mjs
Normal file
@ -0,0 +1,120 @@
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { runCleanupCommand } from "../cleanup.mjs";
|
||||
import { resolveFromCwd } from "../cli.mjs";
|
||||
import { ocmRuntimeRemoveJson } from "../ocm/commands.mjs";
|
||||
|
||||
export async function loadRegressionThresholds(flags) {
|
||||
if (!flags.regression_thresholds) {
|
||||
return null;
|
||||
}
|
||||
if (flags.regression_thresholds === true) {
|
||||
throw new Error("--regression-thresholds requires a JSON file path");
|
||||
}
|
||||
return JSON.parse(await readFile(resolveFromCwd(String(flags.regression_thresholds)), "utf8"));
|
||||
}
|
||||
|
||||
export function validateBaselineExecutionFlags(flags) {
|
||||
if ((flags.baseline || flags.save_baseline) && flags.execute !== true) {
|
||||
throw new Error("--baseline and --save-baseline require --execute so baseline evidence comes from real OpenClaw runs");
|
||||
}
|
||||
if (flags.save_baseline && flags.reviewed_good !== true) {
|
||||
throw new Error("--save-baseline requires --reviewed-good after reviewing a passing, stable execution report");
|
||||
}
|
||||
}
|
||||
|
||||
export async function cleanupTargetRuntimeIfNeeded(targetPlan, records, options) {
|
||||
if (targetPlan.kind !== "local-build") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const command = ocmRuntimeRemoveJson(targetPlan.runtimeName);
|
||||
if (!options.execute) {
|
||||
return {
|
||||
status: "planned",
|
||||
runtimeName: targetPlan.runtimeName,
|
||||
command
|
||||
};
|
||||
}
|
||||
|
||||
if (records.some((record) => record.cleanup === "retained")) {
|
||||
return {
|
||||
status: "retained",
|
||||
runtimeName: targetPlan.runtimeName,
|
||||
command,
|
||||
reason: "one or more envs were retained"
|
||||
};
|
||||
}
|
||||
|
||||
const result = await runCleanupCommand(command, { timeoutMs: options.timeoutMs });
|
||||
const cleanupStatus = classifyTargetRuntimeCleanup(result);
|
||||
return {
|
||||
status: cleanupStatus.status,
|
||||
runtimeName: targetPlan.runtimeName,
|
||||
command,
|
||||
reason: cleanupStatus.reason,
|
||||
result: {
|
||||
status: result.status,
|
||||
durationMs: result.durationMs,
|
||||
timedOut: result.timedOut,
|
||||
stdout: result.stdout,
|
||||
stderr: result.stderr,
|
||||
attempts: result.attempts ?? []
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function classifyTargetRuntimeCleanup(result) {
|
||||
if (result.status === 0) {
|
||||
return { status: "removed" };
|
||||
}
|
||||
|
||||
const output = `${result.stdout}\n${result.stderr}`;
|
||||
if (/\bruntime\b[\s\S]*\bdoes not exist\b/i.test(output) || /\bnot found\b/i.test(output)) {
|
||||
return {
|
||||
status: "already-absent",
|
||||
reason: "target runtime was not present when cleanup ran"
|
||||
};
|
||||
}
|
||||
|
||||
return { status: "remove-failed" };
|
||||
}
|
||||
|
||||
export function positiveIntegerFlag(flags, key, defaultValue) {
|
||||
if (flags[key] === undefined) {
|
||||
return defaultValue;
|
||||
}
|
||||
return positiveIntegerValue(flags[key], `--${key.replaceAll("_", "-")}`);
|
||||
}
|
||||
|
||||
export function profileIntegerFlag(flags, key, defaultValue) {
|
||||
return positiveIntegerFlag(flags, key, defaultValue);
|
||||
}
|
||||
|
||||
export function positiveIntegerValue(raw, label) {
|
||||
if (raw === true) {
|
||||
throw new Error(`${label} requires a positive integer value`);
|
||||
}
|
||||
const value = Number(raw);
|
||||
if (!Number.isInteger(value) || value < 1) {
|
||||
throw new Error(`${label} must be a positive integer, got ${JSON.stringify(raw)}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function summarizePerformanceReceipt(performance, baseline) {
|
||||
if (!performance) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
schemaVersion: performance.schemaVersion,
|
||||
repeat: performance.repeat,
|
||||
groupCount: performance.groupCount,
|
||||
unstableGroupCount: performance.unstableGroupCount,
|
||||
profiledRunCount: performance.profiledRunCount ?? 0,
|
||||
baselineRegressionCount: baseline?.comparison?.regressionCount ?? null,
|
||||
missingBaselineCount: baseline?.comparison?.missingBaselineCount ?? null,
|
||||
baselineReviewOk: baseline?.review?.ok ?? null,
|
||||
baselineReviewBlockerCount: baseline?.review?.blockerCount ?? null,
|
||||
savedBaselinePath: baseline?.saved?.path ?? null
|
||||
};
|
||||
}
|
||||
418
src/main.mjs
418
src/main.mjs
@ -1,18 +1,23 @@
|
||||
import { mkdir, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
|
||||
import { join, relative } from "node:path";
|
||||
import { bundleReport, retainGateArtifacts } from "./reporting/artifacts.mjs";
|
||||
import { bundleReport } from "./reporting/artifacts.mjs";
|
||||
import { authReportSummary, resolveRunAuthContext } from "./auth.mjs";
|
||||
import { runCleanupCommand } from "./cleanup.mjs";
|
||||
import { runCommand } from "./commands.mjs";
|
||||
import { runMatrixPlan } from "./commands/matrix-plan.mjs";
|
||||
import { runMatrixRun } from "./commands/matrix-run.mjs";
|
||||
import {
|
||||
cleanupTargetRuntimeIfNeeded,
|
||||
loadRegressionThresholds,
|
||||
positiveIntegerFlag,
|
||||
profileIntegerFlag,
|
||||
summarizePerformanceReceipt,
|
||||
validateBaselineExecutionFlags
|
||||
} from "./commands/run-support.mjs";
|
||||
import { compareReports, renderCompareFixerSummary, renderCompareSummary } from "./reporting/compare.mjs";
|
||||
import { parseFlags, printHelp, required, resolveFromCwd } from "./cli.mjs";
|
||||
import { matrixControlSummary } from "./matrix/controls.mjs";
|
||||
import { applyMatrixControls, expandProfile } from "./matrix/expand.mjs";
|
||||
import { evaluateGate, preflightGateRun } from "./matrix/gate.mjs";
|
||||
import { profileSummary, validateProfileTarget } from "./matrix/profile.mjs";
|
||||
import { profileSummary } from "./matrix/profile.mjs";
|
||||
import { buildCoverage } from "./matrix/coverage.mjs";
|
||||
import { assertResolvedCoverageIsRunnable, resolveCoverageObligations } from "./matrix/resolver.mjs";
|
||||
import {
|
||||
comparePerformanceToBaseline,
|
||||
loadBaselineStore,
|
||||
@ -25,15 +30,14 @@ import { buildPerformanceSummary } from "./performance/stats.mjs";
|
||||
import { platformInfo } from "./platform.mjs";
|
||||
import { artifactsDir, repoRoot, reportsDir } from "./paths.mjs";
|
||||
import { loadRegistryContext } from "./registries/context.mjs";
|
||||
import { loadProfile } from "./registries/profiles.mjs";
|
||||
import { loadScenarios, validateScenarioRun } from "./registries/scenarios.mjs";
|
||||
import { loadState } from "./registries/states.mjs";
|
||||
import { renderMarkdownReport, renderPasteSummary, renderReportSummary, summarizeRecords } from "./reporting/report.mjs";
|
||||
import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "./runner.mjs";
|
||||
import { buildDryRunRecord, createRunId, executeScenario } from "./runner.mjs";
|
||||
import { runSelfCheck } from "./selfcheck.mjs";
|
||||
import { runSetup } from "./setup.mjs";
|
||||
import { resolveTarget } from "./targets.mjs";
|
||||
import { ocmEnvDestroy, ocmEnvListJson, ocmRuntimeRemoveJson } from "./ocm/commands.mjs";
|
||||
import { ocmEnvDestroy, ocmEnvListJson } from "./ocm/commands.mjs";
|
||||
|
||||
const reportSchemaVersion = "kova.report.v1";
|
||||
|
||||
@ -160,7 +164,7 @@ async function matrixCommand(flags) {
|
||||
}
|
||||
|
||||
if (subcommand === "run") {
|
||||
await matrixRun(flags);
|
||||
await runMatrixRun(flags);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -235,312 +239,6 @@ async function readReport(path) {
|
||||
return JSON.parse(await readFile(resolveFromCwd(path), "utf8"));
|
||||
}
|
||||
|
||||
async function loadRegressionThresholds(flags) {
|
||||
if (!flags.regression_thresholds) {
|
||||
return null;
|
||||
}
|
||||
if (flags.regression_thresholds === true) {
|
||||
throw new Error("--regression-thresholds requires a JSON file path");
|
||||
}
|
||||
return JSON.parse(await readFile(resolveFromCwd(String(flags.regression_thresholds)), "utf8"));
|
||||
}
|
||||
|
||||
async function matrixRun(flags) {
|
||||
const registry = await loadRegistryContext();
|
||||
const profile = await loadProfile(required(flags.profile, "--profile"));
|
||||
validateProfileExecutionFlags(profile, flags);
|
||||
const target = required(flags.target, "--target");
|
||||
validateBaselineExecutionFlags(flags);
|
||||
const targetPlan = resolveTarget(target, "target");
|
||||
validateProfileTarget(profile, targetPlan);
|
||||
const fromPlan = flags.from ? resolveTarget(flags.from, "from") : null;
|
||||
const entries = applyMatrixControls(await expandProfile(profile), flags, platformInfo());
|
||||
const resolvedCoverage = resolveCoverageObligations({
|
||||
profile,
|
||||
entries,
|
||||
surfaces: registry.surfaces,
|
||||
targetPlan
|
||||
});
|
||||
assertResolvedCoverageIsRunnable(resolvedCoverage);
|
||||
const controls = matrixControlSummary(flags, targetPlan);
|
||||
const auth = await resolveRunAuthContext(flags);
|
||||
const regressionThresholds = await loadRegressionThresholds(flags);
|
||||
const baselinePath = resolveBaselinePath(flags.baseline);
|
||||
const saveBaselinePath = resolveBaselinePath(flags.save_baseline);
|
||||
const baselineStore = baselinePath ? await loadBaselineStore(baselinePath) : null;
|
||||
preflightGateRun({ entries, flags });
|
||||
for (const entry of entries.filter((item) => !item.skipReason)) {
|
||||
validateScenarioRun(entry.scenario, flags, { targetPlan, fromPlan });
|
||||
}
|
||||
const reportRoot = flags.report_dir ? resolveFromCwd(flags.report_dir) : reportsDir;
|
||||
const runId = createRunId();
|
||||
const reportPath = join(reportRoot, `${runId}-${profile.id}.md`);
|
||||
const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`);
|
||||
const targetSetup = { completed: false };
|
||||
const runEntry = async (entry) => {
|
||||
const context = {
|
||||
target,
|
||||
targetPlan,
|
||||
profile,
|
||||
from: flags.from,
|
||||
fromPlan,
|
||||
state: entry.state,
|
||||
sourceEnv: flags.source_env,
|
||||
runId,
|
||||
controls,
|
||||
execute: flags.execute === true,
|
||||
keepEnv: flags.keep_env === true,
|
||||
retainOnFailure: flags.retain_on_failure === true,
|
||||
timeoutMs: resolveEntryTimeout(entry, flags),
|
||||
healthSamples: profileIntegerFlag(flags, "health_samples", flags.deep_profile === true ? 10 : 3),
|
||||
healthIntervalMs: positiveIntegerFlag(flags, "health_interval_ms", 250),
|
||||
readinessIntervalMs: profileIntegerFlag(flags, "readiness_interval_ms", flags.deep_profile === true ? 100 : 250),
|
||||
heapSnapshot: flags.heap_snapshot === true || flags.deep_profile === true,
|
||||
diagnosticReport: flags.deep_profile === true,
|
||||
nodeProfile: flags.node_profile === true || flags.deep_profile === true,
|
||||
deepProfile: flags.deep_profile === true,
|
||||
profileOnFailure: flags.profile_on_failure === true,
|
||||
resourceSampleIntervalMs: profileIntegerFlag(flags, "resource_sample_interval_ms", flags.deep_profile === true ? 250 : 1000),
|
||||
processRoles: registry.processRoles,
|
||||
surfacesById: Object.fromEntries(registry.surfaces.map((surface) => [surface.id, surface])),
|
||||
targetSetup,
|
||||
auth
|
||||
};
|
||||
|
||||
if (entry.skipReason) {
|
||||
return buildRepeatRecords(entry, context, (iterationContext) => buildSkippedRecord(entry.scenario, iterationContext, entry.skipReason));
|
||||
}
|
||||
|
||||
return buildRepeatRecords(entry, context, async (iterationContext) =>
|
||||
iterationContext.execute
|
||||
? executeScenario(entry.scenario, iterationContext)
|
||||
: buildDryRunRecord(entry.scenario, iterationContext)
|
||||
);
|
||||
};
|
||||
|
||||
const records = flags.execute === true
|
||||
? await runMatrixEntries(entries, runEntry, controls)
|
||||
: (await Promise.all(entries.map((entry) => runEntry(entry)))).flat();
|
||||
const targetCleanup = await cleanupTargetRuntimeIfNeeded(targetPlan, records, {
|
||||
execute: flags.execute === true,
|
||||
timeoutMs: positiveIntegerFlag(flags, "timeout_ms", 120000)
|
||||
});
|
||||
const performance = buildPerformanceSummary(records, {
|
||||
repeat: controls.repeat,
|
||||
regressionThresholds
|
||||
});
|
||||
const platform = platformInfo();
|
||||
const reportBase = {
|
||||
schemaVersion: reportSchemaVersion,
|
||||
generatedAt: new Date().toISOString(),
|
||||
runId,
|
||||
outputPaths: {
|
||||
markdown: reportPath,
|
||||
json: jsonPath
|
||||
},
|
||||
mode: flags.execute === true ? "execution" : "dry-run",
|
||||
profile: profileSummary(profile),
|
||||
target,
|
||||
from: flags.from ?? null,
|
||||
controls,
|
||||
auth: authReportSummary(auth),
|
||||
state: null,
|
||||
platform,
|
||||
targetCleanup,
|
||||
performance,
|
||||
baseline: null,
|
||||
gate: null,
|
||||
summary: summarizeRecords(records),
|
||||
records
|
||||
};
|
||||
const baselineComparison = comparePerformanceToBaseline(reportBase, baselineStore, { targetPlan, regressionThresholds });
|
||||
if (baselineComparison) {
|
||||
reportBase.baseline = {
|
||||
path: baselinePath,
|
||||
comparison: baselineComparison
|
||||
};
|
||||
}
|
||||
const gate = flags.gate === true
|
||||
? evaluateGate({
|
||||
mode: flags.execute === true ? "execution" : "dry-run",
|
||||
controls,
|
||||
performance,
|
||||
baseline: reportBase.baseline,
|
||||
platform: reportBase.platform,
|
||||
records
|
||||
}, profile, { resolvedCoverage })
|
||||
: null;
|
||||
|
||||
await mkdir(reportRoot, { recursive: true });
|
||||
const report = {
|
||||
...reportBase,
|
||||
gate
|
||||
};
|
||||
if (saveBaselinePath) {
|
||||
const existingStore = await loadBaselineStore(saveBaselinePath);
|
||||
const review = reviewBaselineUpdate(report, { reviewedGood: flags.reviewed_good === true });
|
||||
const updatedStore = updateBaselineStore(existingStore, report, { targetPlan, reviewedGood: flags.reviewed_good === true });
|
||||
report.baseline = {
|
||||
...(report.baseline ?? {}),
|
||||
review,
|
||||
saved: await saveBaselineStore(saveBaselinePath, updatedStore)
|
||||
};
|
||||
}
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
const bundle = await bundleReport(jsonPath, { outputDir: reportRoot });
|
||||
const retainedGateArtifacts = gate && gate.verdict !== "SHIP"
|
||||
? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle)
|
||||
: null;
|
||||
|
||||
if (flags.json) {
|
||||
console.log(JSON.stringify({
|
||||
schemaVersion: "kova.matrix.run.receipt.v1",
|
||||
generatedAt: new Date().toISOString(),
|
||||
mode: report.mode,
|
||||
runId,
|
||||
profile: profileSummary(profile),
|
||||
reportPath,
|
||||
jsonPath,
|
||||
bundlePath: bundle.outputPath,
|
||||
checksumPath: bundle.checksumPath,
|
||||
retainedGateArtifacts,
|
||||
gate: summarizeGateReceipt(gate),
|
||||
performance: summarizePerformanceReceipt(report.performance, report.baseline),
|
||||
summary: report.summary
|
||||
}, null, 2));
|
||||
failGateIfNeeded(gate);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Kova matrix ${report.mode} report written: ${relative(process.cwd(), reportPath)}`);
|
||||
console.log(`Kova matrix ${report.mode} data written: ${relative(process.cwd(), jsonPath)}`);
|
||||
console.log(`Kova matrix bundle written: ${relative(process.cwd(), bundle.outputPath)}`);
|
||||
if (retainedGateArtifacts) {
|
||||
console.log(`Kova failed gate artifacts retained: ${relative(process.cwd(), retainedGateArtifacts.outputDir)}`);
|
||||
}
|
||||
if (gate) {
|
||||
console.log(`Kova gate outcome: ${gate.outcome ?? gate.verdict}`);
|
||||
}
|
||||
failGateIfNeeded(gate);
|
||||
}
|
||||
|
||||
function validateProfileExecutionFlags(profile, flags) {
|
||||
if (flags.execute === true && profile.id === "exhaustive" && flags.allow_exhaustive !== true) {
|
||||
throw new Error("executing profile 'exhaustive' requires --allow-exhaustive");
|
||||
}
|
||||
}
|
||||
|
||||
async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) {
|
||||
report.retainedGateArtifacts = {
|
||||
status: "pending"
|
||||
};
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
const retained = await retainGateArtifacts(jsonPath, bundle);
|
||||
report.retainedGateArtifacts = retained;
|
||||
await writeFile(reportPath, renderMarkdownReport(report), "utf8");
|
||||
await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir });
|
||||
return retained;
|
||||
}
|
||||
|
||||
function resolveEntryTimeout(entry, flags) {
|
||||
return positiveIntegerValue(flags.timeout_ms ?? entry.timeoutMs ?? entry.scenario.timeoutMs ?? 120000, "--timeout-ms");
|
||||
}
|
||||
|
||||
async function buildRepeatRecords(entry, context, callback) {
|
||||
const total = positiveIntegerValue(context.controls?.repeat ?? 1, "repeat");
|
||||
const records = [];
|
||||
for (let index = 1; index <= total; index += 1) {
|
||||
records.push(await callback({
|
||||
...context,
|
||||
repeat: {
|
||||
index,
|
||||
total
|
||||
}
|
||||
}));
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
function failGateIfNeeded(gate) {
|
||||
if (gate && gate.verdict !== "SHIP") {
|
||||
throw new Error(`gate outcome: ${gate.outcome ?? gate.verdict}`);
|
||||
}
|
||||
}
|
||||
|
||||
function summarizeGateReceipt(gate) {
|
||||
if (!gate) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
schemaVersion: gate.schemaVersion,
|
||||
enabled: gate.enabled,
|
||||
profileId: gate.profileId,
|
||||
policyId: gate.policyId,
|
||||
purpose: gate.purpose ?? null,
|
||||
verdict: gate.verdict,
|
||||
outcome: gate.outcome ?? null,
|
||||
ok: gate.ok,
|
||||
complete: gate.complete,
|
||||
partial: gate.partial,
|
||||
missingRequiredCount: gate.missingRequiredCount,
|
||||
blockingCount: gate.blockingCount,
|
||||
warningCount: gate.warningCount,
|
||||
infoCount: gate.infoCount,
|
||||
subsystemCount: gate.subsystems?.length ?? 0,
|
||||
fixerSummaryCount: gate.fixerSummaries?.length ?? 0,
|
||||
baselineRegressionCount: gate.baseline?.regressionCount ?? null,
|
||||
missingBaselineCount: gate.baseline?.missingBaselineCount ?? null
|
||||
};
|
||||
}
|
||||
|
||||
function summarizePerformanceReceipt(performance, baseline) {
|
||||
if (!performance) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
schemaVersion: performance.schemaVersion,
|
||||
repeat: performance.repeat,
|
||||
groupCount: performance.groupCount,
|
||||
unstableGroupCount: performance.unstableGroupCount,
|
||||
profiledRunCount: performance.profiledRunCount ?? 0,
|
||||
baselineRegressionCount: baseline?.comparison?.regressionCount ?? null,
|
||||
missingBaselineCount: baseline?.comparison?.missingBaselineCount ?? null,
|
||||
baselineReviewOk: baseline?.review?.ok ?? null,
|
||||
baselineReviewBlockerCount: baseline?.review?.blockerCount ?? null,
|
||||
savedBaselinePath: baseline?.saved?.path ?? null
|
||||
};
|
||||
}
|
||||
|
||||
async function runMatrixEntries(entries, runEntry, controls) {
|
||||
if (controls.parallel <= 1) {
|
||||
const records = [];
|
||||
for (const entry of entries) {
|
||||
const entryRecords = await runEntry(entry);
|
||||
records.push(...entryRecords);
|
||||
if (controls.failFast && entryRecords.some((record) => record.status === "FAIL" || record.status === "BLOCKED")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
const records = new Array(entries.length);
|
||||
let nextIndex = 0;
|
||||
async function worker() {
|
||||
while (nextIndex < entries.length) {
|
||||
const index = nextIndex;
|
||||
nextIndex += 1;
|
||||
records[index] = await runEntry(entries[index]);
|
||||
}
|
||||
}
|
||||
|
||||
await Promise.all(Array.from({ length: controls.parallel }, () => worker()));
|
||||
return records.filter(Boolean).flat();
|
||||
}
|
||||
|
||||
async function cleanupCommand(flags) {
|
||||
const [subcommand] = flags._;
|
||||
if (subcommand === "envs") {
|
||||
@ -850,91 +548,3 @@ function resolveRunTimeout(scenarios, flags) {
|
||||
.filter((timeout) => typeof timeout === "number");
|
||||
return scenarioTimeouts.length === 0 ? 120000 : Math.max(...scenarioTimeouts);
|
||||
}
|
||||
|
||||
function validateBaselineExecutionFlags(flags) {
|
||||
if ((flags.baseline || flags.save_baseline) && flags.execute !== true) {
|
||||
throw new Error("--baseline and --save-baseline require --execute so baseline evidence comes from real OpenClaw runs");
|
||||
}
|
||||
if (flags.save_baseline && flags.reviewed_good !== true) {
|
||||
throw new Error("--save-baseline requires --reviewed-good after reviewing a passing, stable execution report");
|
||||
}
|
||||
}
|
||||
|
||||
async function cleanupTargetRuntimeIfNeeded(targetPlan, records, options) {
|
||||
if (targetPlan.kind !== "local-build") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const command = ocmRuntimeRemoveJson(targetPlan.runtimeName);
|
||||
if (!options.execute) {
|
||||
return {
|
||||
status: "planned",
|
||||
runtimeName: targetPlan.runtimeName,
|
||||
command
|
||||
};
|
||||
}
|
||||
|
||||
if (records.some((record) => record.cleanup === "retained")) {
|
||||
return {
|
||||
status: "retained",
|
||||
runtimeName: targetPlan.runtimeName,
|
||||
command,
|
||||
reason: "one or more envs were retained"
|
||||
};
|
||||
}
|
||||
|
||||
const result = await runCleanupCommand(command, { timeoutMs: options.timeoutMs });
|
||||
const cleanupStatus = classifyTargetRuntimeCleanup(result);
|
||||
return {
|
||||
status: cleanupStatus.status,
|
||||
runtimeName: targetPlan.runtimeName,
|
||||
command,
|
||||
reason: cleanupStatus.reason,
|
||||
result: {
|
||||
status: result.status,
|
||||
durationMs: result.durationMs,
|
||||
timedOut: result.timedOut,
|
||||
stdout: result.stdout,
|
||||
stderr: result.stderr,
|
||||
attempts: result.attempts ?? []
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function classifyTargetRuntimeCleanup(result) {
|
||||
if (result.status === 0) {
|
||||
return { status: "removed" };
|
||||
}
|
||||
|
||||
const output = `${result.stdout}\n${result.stderr}`;
|
||||
if (/\bruntime\b[\s\S]*\bdoes not exist\b/i.test(output) || /\bnot found\b/i.test(output)) {
|
||||
return {
|
||||
status: "already-absent",
|
||||
reason: "target runtime was not present when cleanup ran"
|
||||
};
|
||||
}
|
||||
|
||||
return { status: "remove-failed" };
|
||||
}
|
||||
|
||||
function positiveIntegerFlag(flags, key, defaultValue) {
|
||||
if (flags[key] === undefined) {
|
||||
return defaultValue;
|
||||
}
|
||||
return positiveIntegerValue(flags[key], `--${key.replaceAll("_", "-")}`);
|
||||
}
|
||||
|
||||
function profileIntegerFlag(flags, key, defaultValue) {
|
||||
return positiveIntegerFlag(flags, key, defaultValue);
|
||||
}
|
||||
|
||||
function positiveIntegerValue(raw, label) {
|
||||
if (raw === true) {
|
||||
throw new Error(`${label} requires a positive integer value`);
|
||||
}
|
||||
const value = Number(raw);
|
||||
if (!Number.isInteger(value) || value < 1) {
|
||||
throw new Error(`${label} must be a positive integer, got ${JSON.stringify(raw)}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user