From fe3d40552f2977de07698cfa0896b570a5f2ae8c Mon Sep 17 00:00:00 2001 From: Shakker Date: Thu, 7 May 2026 08:29:54 +0100 Subject: [PATCH] fix: narrow dashboard health probe --- ...shboard-session-send-turn-existing-user.json | 4 ++-- scenarios/dashboard-session-send-turn.json | 4 ++-- src/evaluator.mjs | 17 ++++++++++++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/scenarios/dashboard-session-send-turn-existing-user.json b/scenarios/dashboard-session-send-turn-existing-user.json index c737f68..b7be6eb 100644 --- a/scenarios/dashboard-session-send-turn-existing-user.json +++ b/scenarios/dashboard-session-send-turn-existing-user.json @@ -96,11 +96,11 @@ "title": "Post-Dashboard Gateway Health", "intent": "Verify the cloned gateway remains responsive after the dashboard-style turn and collect logs for embedded-run/liveness evidence.", "commands": [ - "ocm @{env} -- status", + "ocm @{env} -- gateway status --json --require-rpc", "ocm logs {env} --tail 300 --raw" ], "evidence": [ - "gateway status", + "gateway status probe", "embedded-run traces", "liveness warnings", "plugin errors", diff --git a/scenarios/dashboard-session-send-turn.json b/scenarios/dashboard-session-send-turn.json index 1163cac..456420c 100644 --- a/scenarios/dashboard-session-send-turn.json +++ b/scenarios/dashboard-session-send-turn.json @@ -104,11 +104,11 @@ "title": "Post-Dashboard Gateway Health", "intent": "Verify the gateway remains responsive after dashboard-style cold and warm message turns.", "commands": [ - "ocm @{env} -- status", + "ocm @{env} -- gateway status --json --require-rpc", "ocm logs {env} --tail 300 --raw" ], "evidence": [ - "gateway status", + "gateway status probe", "provider logs", "plugin errors", "memory after dashboard turn" diff --git a/src/evaluator.mjs b/src/evaluator.mjs index a16323b..794171e 100644 --- a/src/evaluator.mjs +++ b/src/evaluator.mjs @@ -127,14 +127,14 @@ export function evaluateRecord(record, scenario, options = {}) { const coldReadyMs = maxDurationWhere(allResults, (command) => command.startsWith("ocm start ")); const warmReadyMs = maxDurationWhere(allResults, (command) => command.startsWith("ocm service restart ")); const upgradeMs = maxDurationWhere(allResults, (command) => command.startsWith("ocm upgrade ")); - const statusMs = maxDurationWhere(allResults, (command) => command.includes(" -- status")); + const statusMs = maxDurationWhere(allResults, isPostAgentStatusCommand); const pluginsListMs = maxDurationWhere(allResults, (command) => command.includes(" -- plugins list")); const pluginInstallMs = maxDurationWhere(allResults, (command) => command.includes("run-official-plugin-install.mjs") || command.includes(" -- plugins install")); const modelsListMs = maxDurationWhere(allResults, (command) => command.includes(" -- models list")); const rssGrowthMb = maxNullable(resourceSummary.maxTotalRssGrowthMb); const gatewayRssGrowthMb = maxNullable(resourceSummary.maxGatewayRssGrowthMb); - checkDuration(violations, allResults, "statusMs", thresholds.statusMs, (command) => command.includes(" -- status")); + checkDuration(violations, allResults, "statusMs", thresholds.statusMs, isPostAgentStatusCommand); checkDuration(violations, allResults, "pluginsListMs", thresholds.pluginsListMs, (command) => command.includes(" -- plugins list")); checkDuration(violations, allResults, "pluginUpdateDryRunMs", thresholds.pluginUpdateDryRunMs, (command) => command.includes(" -- plugins update") && command.includes("--dry-run") @@ -1263,7 +1263,9 @@ function evaluateAgentFailureContainment({ turns, record, thresholds, gatewayExp ? thresholds.agentContainmentHealthFailures : (typeof thresholds.providerFailureHealthFailures === "number" ? thresholds.providerFailureHealthFailures : 0); const finalGatewayState = record.finalMetrics?.service?.gatewayState ?? null; - const statusCommands = collectResults(record).filter((result) => /\s--\sstatus\b|@\S+\s+--\s+status\b/.test(result.command) || result.command.includes(" -- status")); + const statusCommands = collectResults(record).filter((result) => + isPostAgentStatusCommand(result.command) + ); const statusWorks = statusCommands.length === 0 ? null : statusCommands.some((result) => result.status === 0 && result.timedOut !== true); return { @@ -1284,6 +1286,15 @@ function evaluateAgentFailureContainment({ turns, record, thresholds, gatewayExp }; } +function isPostAgentStatusCommand(command) { + return ( + /\s--\sstatus\b|@\S+\s+--\s+status\b/.test(command) || + command.includes(" -- status") || + /\s--\s+gateway\s+status\b/.test(command) || + /@\S+\s+--\s+gateway\s+status\b/.test(command) + ); +} + function checkAgentFailureContainment(violations, containment) { if (containment.processLeaksOk !== true) { const first = containment.leakedProcesses[0];