feat: guard exhaustive execution

2026-05-01 10:49:10 +01:00 · 2026-05-01 10:49:10 +01:00 · 6dc9e93505
commit 6dc9e93505
parent a5ad016202
4 changed files with 19 additions and 1 deletions
--- a/docs/AGENT_USAGE.md
+++ b/docs/AGENT_USAGE.md
@ -94,6 +94,11 @@ node bin/kova.mjs matrix run --profile release --target runtime:stable --include
 node bin/kova.mjs matrix run --profile release --target local-build:/path/to/openclaw --include scenario:release-runtime-startup --execute --gate --json
 ```

+The `exhaustive` profile cannot execute unless the command includes
+`--allow-exhaustive`. Use plan or filtered dry-runs first; only use the explicit
+flag when the machine is intentionally dedicated to a broad OpenClaw validation
+run.
+
 Matrix runs automatically produce a bundle path in the JSON receipt. Bundles
 include `artifact-index.json`, which lists evidence files with byte sizes and
 SHA-256 hashes.
--- a/src/cli.mjs
+++ b/src/cli.mjs
@ -41,7 +41,7 @@ Usage:
  kova plan [--scenario <id>] [--json]
  kova run --target <selector> [--from <selector>] [--scenario <id>] [--state <id>] [--auth <mock|live|skip>] [--repeat <n>] [--baseline [path]] [--save-baseline [path] --reviewed-good] [--regression-thresholds <json>] [--report-dir <path>] [--health-samples <n>] [--readiness-interval-ms <n>] [--resource-sample-interval-ms <n>] [--deep-profile] [--node-profile] [--heap-snapshot] [--profile-on-failure] [--execute] [--keep-env] [--retain-on-failure] [--json]
  kova matrix plan --profile <id> --target <selector> [--from <selector>] [--include <filter>] [--exclude <filter>] [--parallel <n>] [--json]
-  kova matrix run --profile <id> --target <selector> [--from <selector>] [--include <filter>] [--exclude <filter>] [--auth <mock|live|skip>] [--parallel <n>] [--repeat <n>] [--baseline [path]] [--save-baseline [path] --reviewed-good] [--regression-thresholds <json>] [--fail-fast] [--gate] [--report-dir <path>] [--health-samples <n>] [--readiness-interval-ms <n>] [--resource-sample-interval-ms <n>] [--deep-profile] [--node-profile] [--heap-snapshot] [--profile-on-failure] [--execute] [--keep-env] [--retain-on-failure] [--json]
+  kova matrix run --profile <id> --target <selector> [--from <selector>] [--include <filter>] [--exclude <filter>] [--auth <mock|live|skip>] [--parallel <n>] [--repeat <n>] [--baseline [path]] [--save-baseline [path] --reviewed-good] [--regression-thresholds <json>] [--fail-fast] [--gate] [--report-dir <path>] [--health-samples <n>] [--readiness-interval-ms <n>] [--resource-sample-interval-ms <n>] [--deep-profile] [--node-profile] [--heap-snapshot] [--profile-on-failure] [--execute] [--allow-exhaustive] [--keep-env] [--retain-on-failure] [--json]
  kova report summarize <report.json> [--json]
  kova report paste <report.json>
  kova report compare <baseline.json> <current.json> [--thresholds <json>] [--fixer] [--json]
@ -61,6 +61,7 @@ Notes:
  Kova uses OCM to create isolated OpenClaw envs and runtimes.
  Kova reports on OpenClaw behavior, not OCM behavior.
  run is dry-run/report-only unless --execute is passed.
+  Executed exhaustive matrix runs require --allow-exhaustive.
  --repeat records independent samples and computes aggregate performance stats.
  --auth defaults to mock so every disposable env has deliberate model auth unless a scenario opts out.
  setup provider/auth choices accept either numbers from the prompt or names such as openai, anthropic, env-only, api-key.
--- a/src/main.mjs
+++ b/src/main.mjs
@ -292,6 +292,7 @@ async function loadRegressionThresholds(flags) {
 async function matrixRun(flags) {
  const registry = await loadRegistryContext();
  const profile = await loadProfile(required(flags.profile, "--profile"));
+  validateProfileExecutionFlags(profile, flags);
  const target = required(flags.target, "--target");
  validateBaselineExecutionFlags(flags);
  const targetPlan = resolveTarget(target, "target");
@ -461,6 +462,12 @@ async function matrixRun(flags) {
  failGateIfNeeded(gate);
 }

+function validateProfileExecutionFlags(profile, flags) {
+  if (flags.execute === true && profile.id === "exhaustive" && flags.allow_exhaustive !== true) {
+    throw new Error("executing profile 'exhaustive' requires --allow-exhaustive");
+  }
+}
+
 async function retainFailedGateArtifacts(report, reportPath, jsonPath, bundle) {
  report.retainedGateArtifacts = {
    status: "pending"
--- a/src/selfcheck.mjs
+++ b/src/selfcheck.mjs
@ -195,6 +195,11 @@ export async function runSelfCheck(flags = {}) {
      "node bin/kova.mjs run --target runtime:stable --scenario fresh-install --execute --save-baseline --json",
      "--save-baseline requires --reviewed-good"
    ));
+    checks.push(await failingCommandCheck(
+      "exhaustive-execute-requires-explicit-flag",
+      "node bin/kova.mjs matrix run --profile exhaustive --target runtime:stable --execute --json",
+      "executing profile 'exhaustive' requires --allow-exhaustive"
+    ));
    checks.push(await jsonCommandCheck("cleanup-json", "node bin/kova.mjs cleanup envs --json", (data) => {
      assertEqual(data.schemaVersion, "kova.cleanup.envs.v1", "cleanup schema");
      assertEqual(data.execute, false, "cleanup execute flag");