refactor: extract matrix run command

2026-05-04 23:55:05 +01:00 · 2026-05-04 23:55:05 +01:00 · 8bf308bc32
commit 8bf308bc32
parent fd575c146b
3 changed files with 450 additions and 404 deletions
--- a/src/commands/matrix-run.mjs
+++ b/src/commands/matrix-run.mjs
@ -0,0 +1,316 @@
+import { mkdir, writeFile } from "node:fs/promises";
+import { join, relative } from "node:path";
+import { authReportSummary, resolveRunAuthContext } from "../auth.mjs";
+import { required, resolveFromCwd } from "../cli.mjs";
+import {
+  cleanupTargetRuntimeIfNeeded,
+  loadRegressionThresholds,
+  positiveIntegerFlag,
+  positiveIntegerValue,
+  profileIntegerFlag,
+  summarizePerformanceReceipt,
+  validateBaselineExecutionFlags
+} from "./run-support.mjs";
+import { applyMatrixControls, expandProfile } from "../matrix/expand.mjs";
+import { evaluateGate, preflightGateRun } from "../matrix/gate.mjs";
+import { matrixControlSummary } from "../matrix/controls.mjs";
+import { profileSummary, validateProfileTarget } from "../matrix/profile.mjs";
+import { assertResolvedCoverageIsRunnable, resolveCoverageObligations } from "../matrix/resolver.mjs";
+import {
+  comparePerformanceToBaseline,
+  loadBaselineStore,
+  resolveBaselinePath,
+  reviewBaselineUpdate,
+  saveBaselineStore,
+  updateBaselineStore
+} from "../performance/baselines.mjs";
+import { buildPerformanceSummary } from "../performance/stats.mjs";
+import { platformInfo } from "../platform.mjs";
+import { reportsDir } from "../paths.mjs";
+import { loadRegistryContext } from "../registries/context.mjs";
+import { loadProfile } from "../registries/profiles.mjs";
+import { validateScenarioRun } from "../registries/scenarios.mjs";
+import { renderMarkdownReport, summarizeRecords } from "../reporting/report.mjs";
+import { bundleReport, retainGateArtifacts } from "../reporting/artifacts.mjs";
+import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "../runner.mjs";
+import { resolveTarget } from "../targets.mjs";
+
+const reportSchemaVersion = "kova.report.v1";
+
+export async function runMatrixRun(flags) {
+  const registry = await loadRegistryContext();
+  const profile = await loadProfile(required(flags.profile, "--profile"));
+  validateProfileExecutionFlags(profile, flags);
+  const target = required(flags.target, "--target");
+  validateBaselineExecutionFlags(flags);
+  const targetPlan = resolveTarget(target, "target");
+  validateProfileTarget(profile, targetPlan);
+  const fromPlan = flags.from ? resolveTarget(flags.from, "from") : null;
+  const entries = applyMatrixControls(await expandProfile(profile), flags, platformInfo());
+  const resolvedCoverage = resolveCoverageObligations({
+    profile,
+    entries,
+    surfaces: registry.surfaces,
+    targetPlan
+  });
+  assertResolvedCoverageIsRunnable(resolvedCoverage);
+  const controls = matrixControlSummary(flags, targetPlan);
+  const auth = await resolveRunAuthContext(flags);
+  const regressionThresholds = await loadRegressionThresholds(flags);
+  const baselinePath = resolveBaselinePath(flags.baseline);
+  const saveBaselinePath = resolveBaselinePath(flags.save_baseline);
+  const baselineStore = baselinePath ? await loadBaselineStore(baselinePath) : null;
+  preflightGateRun({ entries, flags });
+  for (const entry of entries.filter((item) => !item.skipReason)) {
+    validateScenarioRun(entry.scenario, flags, { targetPlan, fromPlan });
+  }
+  const reportRoot = flags.report_dir ? resolveFromCwd(flags.report_dir) : reportsDir;
+  const runId = createRunId();
+  const reportPath = join(reportRoot, `${runId}-${profile.id}.md`);
+  const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`);
+  const targetSetup = { completed: false };
+  const runEntry = async (entry) => {
+    const context = {
+      target,
+      targetPlan,
+      profile,
+      from: flags.from,
+      fromPlan,
+      state: entry.state,
+      sourceEnv: flags.source_env,
+      runId,
+      controls,
+      execute: flags.execute === true,
+      keepEnv: flags.keep_env === true,
+      retainOnFailure: flags.retain_on_failure === true,
+      timeoutMs: resolveEntryTimeout(entry, flags),
+      healthSamples: profileIntegerFlag(flags, "health_samples", flags.deep_profile === true ? 10 : 3),
+      healthIntervalMs: positiveIntegerFlag(flags, "health_interval_ms", 250),
+      readinessIntervalMs: profileIntegerFlag(flags, "readiness_interval_ms", flags.deep_profile === true ? 100 : 250),
+      heapSnapshot: flags.heap_snapshot === true || flags.deep_profile === true,
+      diagnosticReport: flags.deep_profile === true,
+      nodeProfile: flags.node_profile === true || flags.deep_profile === true,
+      deepProfile: flags.deep_profile === true,
+      profileOnFailure: flags.profile_on_failure === true,
+      resourceSampleIntervalMs: profileIntegerFlag(flags, "resource_sample_interval_ms", flags.deep_profile === true ? 250 : 1000),
+      processRoles: registry.processRoles,
+      surfacesById: Object.fromEntries(registry.surfaces.map((surface) => [surface.id, surface])),
+      targetSetup,
+      auth
+    };
+
+    if (entry.skipReason) {
+      return buildRepeatRecords(entry, context, (iterationContext) => buildSkippedRecord(entry.scenario, iterationContext, entry.skipReason));
+    }
+
+    return buildRepeatRecords(entry, context, async (iterationContext) =>
+      iterationContext.execute
+        ? executeScenario(entry.scenario, iterationContext)
+        : buildDryRunRecord(entry.scenario, iterationContext)
+    );
+  };
+
+  const records = flags.execute === true
+    ? await runMatrixEntries(entries, runEntry, controls)
+    : (await Promise.all(entries.map((entry) => runEntry(entry)))).flat();
+  const targetCleanup = await cleanupTargetRuntimeIfNeeded(targetPlan, records, {
+    execute: flags.execute === true,
+    timeoutMs: positiveIntegerFlag(flags, "timeout_ms", 120000)
+  });
+  const performance = buildPerformanceSummary(records, {
+    repeat: controls.repeat,
+    regressionThresholds
+  });
+  const platform = platformInfo();
+  const reportBase = {
+    schemaVersion: reportSchemaVersion,
+    generatedAt: new Date().toISOString(),
+    runId,
+    outputPaths: {
+      markdown: reportPath,
+      json: jsonPath
+    },
+    mode: flags.execute === true ? "execution" : "dry-run",
+    profile: profileSummary(profile),
+    target,
+    from: flags.from ?? null,
+    controls,
+    auth: authReportSummary(auth),
+    state: null,
+    platform,
+    targetCleanup,
+    performance,
+    baseline: null,
+    gate: null,
+    summary: summarizeRecords(records),
+    records
+  };
+  const baselineComparison = comparePerformanceToBaseline(reportBase, baselineStore, { targetPlan, regressionThresholds });
+  if (baselineComparison) {
+    reportBase.baseline = {
+      path: baselinePath,
+      comparison: baselineComparison
+    };
+  }
+  const gate = flags.gate === true
+    ? evaluateGate({
+      mode: flags.execute === true ? "execution" : "dry-run",
+      controls,
+      performance,
+      baseline: reportBase.baseline,
+      platform: reportBase.platform,
+      records
+    }, profile, { resolvedCoverage })
+    : null;
+
+  await mkdir(reportRoot, { recursive: true });
+  const report = {
+    ...reportBase,
+    gate
+  };
+  if (saveBaselinePath) {
+    const existingStore = await loadBaselineStore(saveBaselinePath);
+    const review = reviewBaselineUpdate(report, { reviewedGood: flags.reviewed_good === true });
+    const updatedStore = updateBaselineStore(existingStore, report, { targetPlan, reviewedGood: flags.reviewed_good === true });
+    report.baseline = {
+      ...(report.baseline ?? {}),
+      review,
+      saved: await saveBaselineStore(saveBaselinePath, updatedStore)
+    };
+  }
+  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
+  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+  const bundle = await bundleReport(jsonPath, { outputDir: reportRoot });
+  const retainedGateArtifacts = gate && gate.verdict !== "SHIP"
+    ? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle)
+    : null;
+
+  if (flags.json) {
+    console.log(JSON.stringify({
+      schemaVersion: "kova.matrix.run.receipt.v1",
+      generatedAt: new Date().toISOString(),
+      mode: report.mode,
+      runId,
+      profile: profileSummary(profile),
+      reportPath,
+      jsonPath,
+      bundlePath: bundle.outputPath,
+      checksumPath: bundle.checksumPath,
+      retainedGateArtifacts,
+      gate: summarizeGateReceipt(gate),
+      performance: summarizePerformanceReceipt(report.performance, report.baseline),
+      summary: report.summary
+    }, null, 2));
+    failGateIfNeeded(gate);
+    return;
+  }
+
+  console.log(`Kova matrix ${report.mode} report written: ${relative(process.cwd(), reportPath)}`);
+  console.log(`Kova matrix ${report.mode} data written: ${relative(process.cwd(), jsonPath)}`);
+  console.log(`Kova matrix bundle written: ${relative(process.cwd(), bundle.outputPath)}`);
+  if (retainedGateArtifacts) {
+    console.log(`Kova failed gate artifacts retained: ${relative(process.cwd(), retainedGateArtifacts.outputDir)}`);
+  }
+  if (gate) {
+    console.log(`Kova gate outcome: ${gate.outcome ?? gate.verdict}`);
+  }
+  failGateIfNeeded(gate);
+}
+
+function validateProfileExecutionFlags(profile, flags) {
+  if (flags.execute === true && profile.id === "exhaustive" && flags.allow_exhaustive !== true) {
+    throw new Error("executing profile 'exhaustive' requires --allow-exhaustive");
+  }
+}
+
+async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) {
+  report.retainedGateArtifacts = {
+    status: "pending"
+  };
+  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
+  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+  const retained = await retainGateArtifacts(jsonPath, bundle);
+  report.retainedGateArtifacts = retained;
+  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
+  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+  await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir });
+  return retained;
+}
+
+function resolveEntryTimeout(entry, flags) {
+  return positiveIntegerValue(flags.timeout_ms ?? entry.timeoutMs ?? entry.scenario.timeoutMs ?? 120000, "--timeout-ms");
+}
+
+async function buildRepeatRecords(entry, context, callback) {
+  const total = positiveIntegerValue(context.controls?.repeat ?? 1, "repeat");
+  const records = [];
+  for (let index = 1; index <= total; index += 1) {
+    records.push(await callback({
+      ...context,
+      repeat: {
+        index,
+        total
+      }
+    }));
+  }
+  return records;
+}
+
+function failGateIfNeeded(gate) {
+  if (gate && gate.verdict !== "SHIP") {
+    throw new Error(`gate outcome: ${gate.outcome ?? gate.verdict}`);
+  }
+}
+
+function summarizeGateReceipt(gate) {
+  if (!gate) {
+    return null;
+  }
+  return {
+    schemaVersion: gate.schemaVersion,
+    enabled: gate.enabled,
+    profileId: gate.profileId,
+    policyId: gate.policyId,
+    purpose: gate.purpose ?? null,
+    verdict: gate.verdict,
+    outcome: gate.outcome ?? null,
+    ok: gate.ok,
+    complete: gate.complete,
+    partial: gate.partial,
+    missingRequiredCount: gate.missingRequiredCount,
+    blockingCount: gate.blockingCount,
+    warningCount: gate.warningCount,
+    infoCount: gate.infoCount,
+    subsystemCount: gate.subsystems?.length ?? 0,
+    fixerSummaryCount: gate.fixerSummaries?.length ?? 0,
+    baselineRegressionCount: gate.baseline?.regressionCount ?? null,
+    missingBaselineCount: gate.baseline?.missingBaselineCount ?? null
+  };
+}
+
+async function runMatrixEntries(entries, runEntry, controls) {
+  if (controls.parallel <= 1) {
+    const records = [];
+    for (const entry of entries) {
+      const entryRecords = await runEntry(entry);
+      records.push(...entryRecords);
+      if (controls.failFast && entryRecords.some((record) => record.status === "FAIL" || record.status === "BLOCKED")) {
+        break;
+      }
+    }
+    return records;
+  }
+
+  const records = new Array(entries.length);
+  let nextIndex = 0;
+  async function worker() {
+    while (nextIndex < entries.length) {
+      const index = nextIndex;
+      nextIndex += 1;
+      records[index] = await runEntry(entries[index]);
+    }
+  }
+
+  await Promise.all(Array.from({ length: controls.parallel }, () => worker()));
+  return records.filter(Boolean).flat();
+}
--- a/src/commands/run-support.mjs
+++ b/src/commands/run-support.mjs
@ -0,0 +1,120 @@
+import { readFile } from "node:fs/promises";
+import { runCleanupCommand } from "../cleanup.mjs";
+import { resolveFromCwd } from "../cli.mjs";
+import { ocmRuntimeRemoveJson } from "../ocm/commands.mjs";
+
+export async function loadRegressionThresholds(flags) {
+  if (!flags.regression_thresholds) {
+    return null;
+  }
+  if (flags.regression_thresholds === true) {
+    throw new Error("--regression-thresholds requires a JSON file path");
+  }
+  return JSON.parse(await readFile(resolveFromCwd(String(flags.regression_thresholds)), "utf8"));
+}
+
+export function validateBaselineExecutionFlags(flags) {
+  if ((flags.baseline || flags.save_baseline) && flags.execute !== true) {
+    throw new Error("--baseline and --save-baseline require --execute so baseline evidence comes from real OpenClaw runs");
+  }
+  if (flags.save_baseline && flags.reviewed_good !== true) {
+    throw new Error("--save-baseline requires --reviewed-good after reviewing a passing, stable execution report");
+  }
+}
+
+export async function cleanupTargetRuntimeIfNeeded(targetPlan, records, options) {
+  if (targetPlan.kind !== "local-build") {
+    return null;
+  }
+
+  const command = ocmRuntimeRemoveJson(targetPlan.runtimeName);
+  if (!options.execute) {
+    return {
+      status: "planned",
+      runtimeName: targetPlan.runtimeName,
+      command
+    };
+  }
+
+  if (records.some((record) => record.cleanup === "retained")) {
+    return {
+      status: "retained",
+      runtimeName: targetPlan.runtimeName,
+      command,
+      reason: "one or more envs were retained"
+    };
+  }
+
+  const result = await runCleanupCommand(command, { timeoutMs: options.timeoutMs });
+  const cleanupStatus = classifyTargetRuntimeCleanup(result);
+  return {
+    status: cleanupStatus.status,
+    runtimeName: targetPlan.runtimeName,
+    command,
+    reason: cleanupStatus.reason,
+    result: {
+      status: result.status,
+      durationMs: result.durationMs,
+      timedOut: result.timedOut,
+      stdout: result.stdout,
+      stderr: result.stderr,
+      attempts: result.attempts ?? []
+    }
+  };
+}
+
+function classifyTargetRuntimeCleanup(result) {
+  if (result.status === 0) {
+    return { status: "removed" };
+  }
+
+  const output = `${result.stdout}\n${result.stderr}`;
+  if (/\bruntime\b[\s\S]*\bdoes not exist\b/i.test(output) || /\bnot found\b/i.test(output)) {
+    return {
+      status: "already-absent",
+      reason: "target runtime was not present when cleanup ran"
+    };
+  }
+
+  return { status: "remove-failed" };
+}
+
+export function positiveIntegerFlag(flags, key, defaultValue) {
+  if (flags[key] === undefined) {
+    return defaultValue;
+  }
+  return positiveIntegerValue(flags[key], `--${key.replaceAll("_", "-")}`);
+}
+
+export function profileIntegerFlag(flags, key, defaultValue) {
+  return positiveIntegerFlag(flags, key, defaultValue);
+}
+
+export function positiveIntegerValue(raw, label) {
+  if (raw === true) {
+    throw new Error(`${label} requires a positive integer value`);
+  }
+  const value = Number(raw);
+  if (!Number.isInteger(value) || value < 1) {
+    throw new Error(`${label} must be a positive integer, got ${JSON.stringify(raw)}`);
+  }
+  return value;
+}
+
+export function summarizePerformanceReceipt(performance, baseline) {
+  if (!performance) {
+    return null;
+  }
+  return {
+    schemaVersion: performance.schemaVersion,
+    repeat: performance.repeat,
+    groupCount: performance.groupCount,
+    unstableGroupCount: performance.unstableGroupCount,
+    profiledRunCount: performance.profiledRunCount ?? 0,
+    baselineRegressionCount: baseline?.comparison?.regressionCount ?? null,
+    missingBaselineCount: baseline?.comparison?.missingBaselineCount ?? null,
+    baselineReviewOk: baseline?.review?.ok ?? null,
+    baselineReviewBlockerCount: baseline?.review?.blockerCount ?? null,
+    savedBaselinePath: baseline?.saved?.path ?? null
+  };
+}
--- a/src/main.mjs
+++ b/src/main.mjs
@ -1,18 +1,23 @@
 import { mkdir, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
 import { join, relative } from "node:path";
-import { bundleReport, retainGateArtifacts } from "./reporting/artifacts.mjs";
+import { bundleReport } from "./reporting/artifacts.mjs";
 import { authReportSummary, resolveRunAuthContext } from "./auth.mjs";
 import { runCleanupCommand } from "./cleanup.mjs";
 import { runCommand } from "./commands.mjs";
 import { runMatrixPlan } from "./commands/matrix-plan.mjs";
+import { runMatrixRun } from "./commands/matrix-run.mjs";
+import {
+  cleanupTargetRuntimeIfNeeded,
+  loadRegressionThresholds,
+  positiveIntegerFlag,
+  profileIntegerFlag,
+  summarizePerformanceReceipt,
+  validateBaselineExecutionFlags
+} from "./commands/run-support.mjs";
 import { compareReports, renderCompareFixerSummary, renderCompareSummary } from "./reporting/compare.mjs";
 import { parseFlags, printHelp, required, resolveFromCwd } from "./cli.mjs";
-import { matrixControlSummary } from "./matrix/controls.mjs";
-import { applyMatrixControls, expandProfile } from "./matrix/expand.mjs";
-import { evaluateGate, preflightGateRun } from "./matrix/gate.mjs";
-import { profileSummary, validateProfileTarget } from "./matrix/profile.mjs";
+import { profileSummary } from "./matrix/profile.mjs";
 import { buildCoverage } from "./matrix/coverage.mjs";
-import { assertResolvedCoverageIsRunnable, resolveCoverageObligations } from "./matrix/resolver.mjs";
 import {
  comparePerformanceToBaseline,
  loadBaselineStore,
@ -25,15 +30,14 @@ import { buildPerformanceSummary } from "./performance/stats.mjs";
 import { platformInfo } from "./platform.mjs";
 import { artifactsDir, repoRoot, reportsDir } from "./paths.mjs";
 import { loadRegistryContext } from "./registries/context.mjs";
-import { loadProfile } from "./registries/profiles.mjs";
 import { loadScenarios, validateScenarioRun } from "./registries/scenarios.mjs";
 import { loadState } from "./registries/states.mjs";
 import { renderMarkdownReport, renderPasteSummary, renderReportSummary, summarizeRecords } from "./reporting/report.mjs";
-import { buildDryRunRecord, buildSkippedRecord, createRunId, executeScenario } from "./runner.mjs";
+import { buildDryRunRecord, createRunId, executeScenario } from "./runner.mjs";
 import { runSelfCheck } from "./selfcheck.mjs";
 import { runSetup } from "./setup.mjs";
 import { resolveTarget } from "./targets.mjs";
-import { ocmEnvDestroy, ocmEnvListJson, ocmRuntimeRemoveJson } from "./ocm/commands.mjs";
+import { ocmEnvDestroy, ocmEnvListJson } from "./ocm/commands.mjs";

 const reportSchemaVersion = "kova.report.v1";

@ -160,7 +164,7 @@ async function matrixCommand(flags) {
  }

  if (subcommand === "run") {
-    await matrixRun(flags);
+    await runMatrixRun(flags);
    return;
  }

@ -235,312 +239,6 @@ async function readReport(path) {
  return JSON.parse(await readFile(resolveFromCwd(path), "utf8"));
 }

-async function loadRegressionThresholds(flags) {
-  if (!flags.regression_thresholds) {
-    return null;
-  }
-  if (flags.regression_thresholds === true) {
-    throw new Error("--regression-thresholds requires a JSON file path");
-  }
-  return JSON.parse(await readFile(resolveFromCwd(String(flags.regression_thresholds)), "utf8"));
-}
-
-async function matrixRun(flags) {
-  const registry = await loadRegistryContext();
-  const profile = await loadProfile(required(flags.profile, "--profile"));
-  validateProfileExecutionFlags(profile, flags);
-  const target = required(flags.target, "--target");
-  validateBaselineExecutionFlags(flags);
-  const targetPlan = resolveTarget(target, "target");
-  validateProfileTarget(profile, targetPlan);
-  const fromPlan = flags.from ? resolveTarget(flags.from, "from") : null;
-  const entries = applyMatrixControls(await expandProfile(profile), flags, platformInfo());
-  const resolvedCoverage = resolveCoverageObligations({
-    profile,
-    entries,
-    surfaces: registry.surfaces,
-    targetPlan
-  });
-  assertResolvedCoverageIsRunnable(resolvedCoverage);
-  const controls = matrixControlSummary(flags, targetPlan);
-  const auth = await resolveRunAuthContext(flags);
-  const regressionThresholds = await loadRegressionThresholds(flags);
-  const baselinePath = resolveBaselinePath(flags.baseline);
-  const saveBaselinePath = resolveBaselinePath(flags.save_baseline);
-  const baselineStore = baselinePath ? await loadBaselineStore(baselinePath) : null;
-  preflightGateRun({ entries, flags });
-  for (const entry of entries.filter((item) => !item.skipReason)) {
-    validateScenarioRun(entry.scenario, flags, { targetPlan, fromPlan });
-  }
-  const reportRoot = flags.report_dir ? resolveFromCwd(flags.report_dir) : reportsDir;
-  const runId = createRunId();
-  const reportPath = join(reportRoot, `${runId}-${profile.id}.md`);
-  const jsonPath = join(reportRoot, `${runId}-${profile.id}.json`);
-  const targetSetup = { completed: false };
-  const runEntry = async (entry) => {
-    const context = {
-      target,
-      targetPlan,
-      profile,
-      from: flags.from,
-      fromPlan,
-      state: entry.state,
-      sourceEnv: flags.source_env,
-      runId,
-      controls,
-      execute: flags.execute === true,
-      keepEnv: flags.keep_env === true,
-      retainOnFailure: flags.retain_on_failure === true,
-      timeoutMs: resolveEntryTimeout(entry, flags),
-      healthSamples: profileIntegerFlag(flags, "health_samples", flags.deep_profile === true ? 10 : 3),
-      healthIntervalMs: positiveIntegerFlag(flags, "health_interval_ms", 250),
-      readinessIntervalMs: profileIntegerFlag(flags, "readiness_interval_ms", flags.deep_profile === true ? 100 : 250),
-      heapSnapshot: flags.heap_snapshot === true || flags.deep_profile === true,
-      diagnosticReport: flags.deep_profile === true,
-      nodeProfile: flags.node_profile === true || flags.deep_profile === true,
-      deepProfile: flags.deep_profile === true,
-      profileOnFailure: flags.profile_on_failure === true,
-      resourceSampleIntervalMs: profileIntegerFlag(flags, "resource_sample_interval_ms", flags.deep_profile === true ? 250 : 1000),
-      processRoles: registry.processRoles,
-      surfacesById: Object.fromEntries(registry.surfaces.map((surface) => [surface.id, surface])),
-      targetSetup,
-      auth
-    };
-
-    if (entry.skipReason) {
-      return buildRepeatRecords(entry, context, (iterationContext) => buildSkippedRecord(entry.scenario, iterationContext, entry.skipReason));
-    }
-
-    return buildRepeatRecords(entry, context, async (iterationContext) =>
-      iterationContext.execute
-        ? executeScenario(entry.scenario, iterationContext)
-        : buildDryRunRecord(entry.scenario, iterationContext)
-    );
-  };
-
-  const records = flags.execute === true
-    ? await runMatrixEntries(entries, runEntry, controls)
-    : (await Promise.all(entries.map((entry) => runEntry(entry)))).flat();
-  const targetCleanup = await cleanupTargetRuntimeIfNeeded(targetPlan, records, {
-    execute: flags.execute === true,
-    timeoutMs: positiveIntegerFlag(flags, "timeout_ms", 120000)
-  });
-  const performance = buildPerformanceSummary(records, {
-    repeat: controls.repeat,
-    regressionThresholds
-  });
-  const platform = platformInfo();
-  const reportBase = {
-    schemaVersion: reportSchemaVersion,
-    generatedAt: new Date().toISOString(),
-    runId,
-    outputPaths: {
-      markdown: reportPath,
-      json: jsonPath
-    },
-    mode: flags.execute === true ? "execution" : "dry-run",
-    profile: profileSummary(profile),
-    target,
-    from: flags.from ?? null,
-    controls,
-    auth: authReportSummary(auth),
-    state: null,
-    platform,
-    targetCleanup,
-    performance,
-    baseline: null,
-    gate: null,
-    summary: summarizeRecords(records),
-    records
-  };
-  const baselineComparison = comparePerformanceToBaseline(reportBase, baselineStore, { targetPlan, regressionThresholds });
-  if (baselineComparison) {
-    reportBase.baseline = {
-      path: baselinePath,
-      comparison: baselineComparison
-    };
-  }
-  const gate = flags.gate === true
-    ? evaluateGate({
-      mode: flags.execute === true ? "execution" : "dry-run",
-      controls,
-      performance,
-      baseline: reportBase.baseline,
-      platform: reportBase.platform,
-      records
-    }, profile, { resolvedCoverage })
-    : null;
-
-  await mkdir(reportRoot, { recursive: true });
-  const report = {
-    ...reportBase,
-    gate
-  };
-  if (saveBaselinePath) {
-    const existingStore = await loadBaselineStore(saveBaselinePath);
-    const review = reviewBaselineUpdate(report, { reviewedGood: flags.reviewed_good === true });
-    const updatedStore = updateBaselineStore(existingStore, report, { targetPlan, reviewedGood: flags.reviewed_good === true });
-    report.baseline = {
-      ...(report.baseline ?? {}),
-      review,
-      saved: await saveBaselineStore(saveBaselinePath, updatedStore)
-    };
-  }
-  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
-  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
-  const bundle = await bundleReport(jsonPath, { outputDir: reportRoot });
-  const retainedGateArtifacts = gate && gate.verdict !== "SHIP"
-    ? await retainFailedGateArtifacts(report, reportPath, jsonPath, bundle)
-    : null;
-
-  if (flags.json) {
-    console.log(JSON.stringify({
-      schemaVersion: "kova.matrix.run.receipt.v1",
-      generatedAt: new Date().toISOString(),
-      mode: report.mode,
-      runId,
-      profile: profileSummary(profile),
-      reportPath,
-      jsonPath,
-      bundlePath: bundle.outputPath,
-      checksumPath: bundle.checksumPath,
-      retainedGateArtifacts,
-      gate: summarizeGateReceipt(gate),
-      performance: summarizePerformanceReceipt(report.performance, report.baseline),
-      summary: report.summary
-    }, null, 2));
-    failGateIfNeeded(gate);
-    return;
-  }
-
-  console.log(`Kova matrix ${report.mode} report written: ${relative(process.cwd(), reportPath)}`);
-  console.log(`Kova matrix ${report.mode} data written: ${relative(process.cwd(), jsonPath)}`);
-  console.log(`Kova matrix bundle written: ${relative(process.cwd(), bundle.outputPath)}`);
-  if (retainedGateArtifacts) {
-    console.log(`Kova failed gate artifacts retained: ${relative(process.cwd(), retainedGateArtifacts.outputDir)}`);
-  }
-  if (gate) {
-    console.log(`Kova gate outcome: ${gate.outcome ?? gate.verdict}`);
-  }
-  failGateIfNeeded(gate);
-}
-
-function validateProfileExecutionFlags(profile, flags) {
-  if (flags.execute === true && profile.id === "exhaustive" && flags.allow_exhaustive !== true) {
-    throw new Error("executing profile 'exhaustive' requires --allow-exhaustive");
-  }
-}
-
-async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) {
-  report.retainedGateArtifacts = {
-    status: "pending"
-  };
-  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
-  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
-  const retained = await retainGateArtifacts(jsonPath, bundle);
-  report.retainedGateArtifacts = retained;
-  await writeFile(reportPath, renderMarkdownReport(report), "utf8");
-  await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
-  await retainGateArtifacts(jsonPath, bundle, { outputDir: retained.outputDir });
-  return retained;
-}
-
-function resolveEntryTimeout(entry, flags) {
-  return positiveIntegerValue(flags.timeout_ms ?? entry.timeoutMs ?? entry.scenario.timeoutMs ?? 120000, "--timeout-ms");
-}
-
-async function buildRepeatRecords(entry, context, callback) {
-  const total = positiveIntegerValue(context.controls?.repeat ?? 1, "repeat");
-  const records = [];
-  for (let index = 1; index <= total; index += 1) {
-    records.push(await callback({
-      ...context,
-      repeat: {
-        index,
-        total
-      }
-    }));
-  }
-  return records;
-}
-
-function failGateIfNeeded(gate) {
-  if (gate && gate.verdict !== "SHIP") {
-    throw new Error(`gate outcome: ${gate.outcome ?? gate.verdict}`);
-  }
-}
-
-function summarizeGateReceipt(gate) {
-  if (!gate) {
-    return null;
-  }
-  return {
-    schemaVersion: gate.schemaVersion,
-    enabled: gate.enabled,
-    profileId: gate.profileId,
-    policyId: gate.policyId,
-    purpose: gate.purpose ?? null,
-    verdict: gate.verdict,
-    outcome: gate.outcome ?? null,
-    ok: gate.ok,
-    complete: gate.complete,
-    partial: gate.partial,
-    missingRequiredCount: gate.missingRequiredCount,
-    blockingCount: gate.blockingCount,
-    warningCount: gate.warningCount,
-    infoCount: gate.infoCount,
-    subsystemCount: gate.subsystems?.length ?? 0,
-    fixerSummaryCount: gate.fixerSummaries?.length ?? 0,
-    baselineRegressionCount: gate.baseline?.regressionCount ?? null,
-    missingBaselineCount: gate.baseline?.missingBaselineCount ?? null
-  };
-}
-
-function summarizePerformanceReceipt(performance, baseline) {
-  if (!performance) {
-    return null;
-  }
-  return {
-    schemaVersion: performance.schemaVersion,
-    repeat: performance.repeat,
-    groupCount: performance.groupCount,
-    unstableGroupCount: performance.unstableGroupCount,
-    profiledRunCount: performance.profiledRunCount ?? 0,
-    baselineRegressionCount: baseline?.comparison?.regressionCount ?? null,
-    missingBaselineCount: baseline?.comparison?.missingBaselineCount ?? null,
-    baselineReviewOk: baseline?.review?.ok ?? null,
-    baselineReviewBlockerCount: baseline?.review?.blockerCount ?? null,
-    savedBaselinePath: baseline?.saved?.path ?? null
-  };
-}
-
-async function runMatrixEntries(entries, runEntry, controls) {
-  if (controls.parallel <= 1) {
-    const records = [];
-    for (const entry of entries) {
-      const entryRecords = await runEntry(entry);
-      records.push(...entryRecords);
-      if (controls.failFast && entryRecords.some((record) => record.status === "FAIL" || record.status === "BLOCKED")) {
-        break;
-      }
-    }
-    return records;
-  }
-
-  const records = new Array(entries.length);
-  let nextIndex = 0;
-  async function worker() {
-    while (nextIndex < entries.length) {
-      const index = nextIndex;
-      nextIndex += 1;
-      records[index] = await runEntry(entries[index]);
-    }
-  }
-
-  await Promise.all(Array.from({ length: controls.parallel }, () => worker()));
-  return records.filter(Boolean).flat();
-}
-
 async function cleanupCommand(flags) {
  const [subcommand] = flags._;
  if (subcommand === "envs") {
@ -850,91 +548,3 @@ function resolveRunTimeout(scenarios, flags) {
    .filter((timeout) => typeof timeout === "number");
  return scenarioTimeouts.length === 0 ? 120000 : Math.max(...scenarioTimeouts);
 }
-
-function validateBaselineExecutionFlags(flags) {
-  if ((flags.baseline || flags.save_baseline) && flags.execute !== true) {
-    throw new Error("--baseline and --save-baseline require --execute so baseline evidence comes from real OpenClaw runs");
-  }
-  if (flags.save_baseline && flags.reviewed_good !== true) {
-    throw new Error("--save-baseline requires --reviewed-good after reviewing a passing, stable execution report");
-  }
-}
-
-async function cleanupTargetRuntimeIfNeeded(targetPlan, records, options) {
-  if (targetPlan.kind !== "local-build") {
-    return null;
-  }
-
-  const command = ocmRuntimeRemoveJson(targetPlan.runtimeName);
-  if (!options.execute) {
-    return {
-      status: "planned",
-      runtimeName: targetPlan.runtimeName,
-      command
-    };
-  }
-
-  if (records.some((record) => record.cleanup === "retained")) {
-    return {
-      status: "retained",
-      runtimeName: targetPlan.runtimeName,
-      command,
-      reason: "one or more envs were retained"
-    };
-  }
-
-  const result = await runCleanupCommand(command, { timeoutMs: options.timeoutMs });
-  const cleanupStatus = classifyTargetRuntimeCleanup(result);
-  return {
-    status: cleanupStatus.status,
-    runtimeName: targetPlan.runtimeName,
-    command,
-    reason: cleanupStatus.reason,
-    result: {
-      status: result.status,
-      durationMs: result.durationMs,
-      timedOut: result.timedOut,
-      stdout: result.stdout,
-      stderr: result.stderr,
-      attempts: result.attempts ?? []
-    }
-  };
-}
-
-function classifyTargetRuntimeCleanup(result) {
-  if (result.status === 0) {
-    return { status: "removed" };
-  }
-
-  const output = `${result.stdout}\n${result.stderr}`;
-  if (/\bruntime\b[\s\S]*\bdoes not exist\b/i.test(output) || /\bnot found\b/i.test(output)) {
-    return {
-      status: "already-absent",
-      reason: "target runtime was not present when cleanup ran"
-    };
-  }
-
-  return { status: "remove-failed" };
-}
-
-function positiveIntegerFlag(flags, key, defaultValue) {
-  if (flags[key] === undefined) {
-    return defaultValue;
-  }
-  return positiveIntegerValue(flags[key], `--${key.replaceAll("_", "-")}`);
-}
-
-function profileIntegerFlag(flags, key, defaultValue) {
-  return positiveIntegerFlag(flags, key, defaultValue);
-}
-
-function positiveIntegerValue(raw, label) {
-  if (raw === true) {
-    throw new Error(`${label} requires a positive integer value`);
-  }
-  const value = Number(raw);
-  if (!Number.isInteger(value) || value < 1) {
-    throw new Error(`${label} must be a positive integer, got ${JSON.stringify(raw)}`);
-  }
-  return value;
-}