feat: add scoped health readiness model

This commit is contained in:
Shakker 2026-05-05 03:49:39 +01:00
parent ed3384487a
commit b8b7c023eb
No known key found for this signature in database
61 changed files with 1100 additions and 198 deletions

View File

@ -224,6 +224,87 @@ Current metrics include:
- runtime dependency staging grouped by bundled plugin when OpenClaw emits
`runtimeDeps.stage` spans with `pluginId` attributes
## Health And Readiness
Records keep existing compatibility fields such as `timeToListeningMs`,
`timeToHealthReadyMs`, `readinessClassification`, `healthFailures`, and
`healthP95Ms`. New readers should use `records[*].measurements.health`:
```json
{
"schemaVersion": "kova.health.v1",
"readiness": {
"phaseId": "cold-start",
"listeningReadyAtMs": 2536,
"healthReadyAtMs": 3005,
"classification": "ready",
"severity": "pass",
"reason": "gateway became healthy within the readiness threshold",
"thresholdMs": 30000,
"deadlineMs": 120000,
"attempts": 4
},
"startupSamples": {
"scope": "startup-sample",
"count": 4,
"okCount": 1,
"failureCount": 3,
"p95Ms": 120,
"maxMs": 120,
"slowestPhaseId": "cold-start"
},
"postReadySamples": {
"scope": "post-ready",
"count": 9,
"okCount": 9,
"failureCount": 0,
"p95Ms": 469,
"maxMs": 652,
"slowestPhaseId": "api-latency"
},
"unknownSamples": {
"scope": "unknown",
"count": 0,
"okCount": 0,
"failureCount": 0,
"p95Ms": null,
"maxMs": null,
"slowestPhaseId": null
},
"final": {
"scope": "final",
"gatewayState": "running",
"ok": true,
"healthOk": true,
"failureCount": 0,
"p95Ms": 90,
"maxMs": 90,
"slowestPhaseId": "final"
},
"slowestSample": {
"scope": "post-ready",
"phaseId": "api-latency",
"durationMs": 652
}
}
```
Scenario phases declare `healthScope` so the evaluator does not infer meaning
from phase ids. Allowed values are `readiness`, `startup-sample`, `post-ready`,
`final`, and `none`. Old or externally produced reports without phase scope are
treated as `unknown` when summarized for compatibility.
Compatibility derivation:
- `timeToListeningMs`: `measurements.health.readiness.listeningReadyAtMs`
- `timeToHealthReadyMs`: `measurements.health.readiness.healthReadyAtMs`
- `readinessClassification`: `measurements.health.readiness.classification`
- `healthFailures`: startup + post-ready + unknown + final health failures
- `healthP95Ms`: max startup/post-ready p95, falling back to old aggregate p95
for old reports
- `startupHealthP95Ms`: `measurements.health.startupSamples.p95Ms`
- `postReadyHealthP95Ms`: `measurements.health.postReadySamples.p95Ms`
Role-specific thresholds can fail a scenario separately from total process-tree
thresholds. For example, a report can show that `gateway` exceeded memory while
`package-manager` stayed normal, or that `package-manager` spiked during local
@ -276,8 +357,8 @@ Aggregate metric fields include:
- `samples`
Current aggregate metrics include startup readiness, TCP listening, RSS, CPU,
event-loop delay, agent turn latency, health p95, and runtime dependency
staging.
event-loop delay, agent turn latency, compatibility health p95, startup health
p95, post-ready health p95, and runtime dependency staging.
Baseline stores use schema `kova.baselines.v1`. Baseline read/write requires
`--execute` so stored evidence comes from real OpenClaw runs, not dry-run plans.

View File

@ -30,6 +30,7 @@
"diagnosticPresent",
"doctorFixMs",
"eventLoopMaxMs",
"finalHealthFailures",
"gatewayReadyHardTimeoutMs",
"gatewayReadyMs",
"gatewayResponsive",
@ -64,6 +65,8 @@
"pluginIndexPresent",
"pluginInstallMs",
"pluginLoadFailures",
"postReadyHealthFailures",
"postReadyHealthP95Ms",
"pluginUpdateDryRunMs",
"pluginsListMs",
"preProviderDominanceRatio",
@ -89,6 +92,8 @@
"statusAfterFailureMs",
"statusAfterModelsMs",
"statusMs",
"startupHealthFailures",
"startupHealthP95Ms",
"syncFsStallDetected",
"tuiSmokeMs",
"upgradeMs",

View File

@ -42,7 +42,8 @@
"runtime binding",
"startup readiness",
"no Kova auth setup phase"
]
],
"healthScope": "readiness"
},
{
"id": "missing-auth-agent-turn",
@ -57,7 +58,8 @@
"no provider request",
"process leak snapshot",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-auth-failure-health",
@ -72,7 +74,8 @@
"auth failure logs",
"plugin errors",
"memory after auth failure"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -43,7 +43,8 @@
"gateway port",
"runtime binding",
"env created without service"
]
],
"healthScope": "none"
},
{
"id": "cold-agent-turn",
@ -58,7 +59,8 @@
"mock provider request timing",
"gateway health after cold turn",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "warm-agent-turn",
@ -73,7 +75,8 @@
"mock provider request timing",
"cold/warm delta",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-agent-health",
@ -86,7 +89,8 @@
"env status",
"plugin errors",
"memory after agent turns"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -37,7 +37,8 @@
"gateway port",
"runtime binding",
"env created without service"
]
],
"healthScope": "none"
},
{
"id": "gateway-start",
@ -51,7 +52,8 @@
"gateway service installed",
"gateway service started",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "gateway-agent-turn",
@ -66,7 +68,8 @@
"mock provider request timing",
"gateway health after turn",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-agent-health",
@ -81,7 +84,8 @@
"provider logs",
"plugin errors",
"memory after agent turn"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -46,7 +46,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "cold-session-turn",
@ -60,7 +61,8 @@
"assistant text",
"provider request timing",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "warm-session-turn",
@ -74,7 +76,8 @@
"assistant text",
"provider request timing",
"cold/warm delta"
]
],
"healthScope": "post-ready"
},
{
"id": "session-turn-3",
@ -88,7 +91,8 @@
"assistant text",
"provider request timing",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "session-turn-4",
@ -102,7 +106,8 @@
"assistant text",
"provider request timing",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "session-turn-5",
@ -116,7 +121,8 @@
"assistant text",
"provider request timing",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "session-turn-6",
@ -130,7 +136,8 @@
"assistant text",
"provider request timing",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-session-health",
@ -146,7 +153,8 @@
"plugin errors",
"memory after repeated turns",
"process leak summary"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -36,7 +36,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "network-offline-turn",
@ -49,7 +50,8 @@
"bounded network failure",
"gateway status after failure",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-network-health",
@ -64,7 +66,8 @@
"network/provider failure logs",
"plugin errors",
"memory after network failure"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -47,7 +47,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "concurrent-provider-turns",
@ -63,7 +64,8 @@
"pre-provider timing",
"role resource samples",
"process leak snapshot"
]
],
"healthScope": "post-ready"
},
{
"id": "post-concurrency-health",
@ -78,7 +80,8 @@
"provider logs",
"plugin errors",
"memory after concurrent turns"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -38,7 +38,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "malformed-provider-turn",
@ -53,7 +54,8 @@
"malformed provider evidence",
"gateway remains supervised",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-failure-health",
@ -68,7 +70,8 @@
"provider logs",
"plugin errors",
"memory after malformed response"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -42,7 +42,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "transient-provider-failure-turn",
@ -57,7 +58,8 @@
"provider 200 recovery evidence",
"gateway remains supervised",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "recovery-provider-turn",
@ -71,7 +73,8 @@
"provider recovery timing",
"gateway remains healthy",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-failure-health",
@ -86,7 +89,8 @@
"provider logs",
"plugin errors",
"memory after recovery"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -44,7 +44,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "slow-provider-turn",
@ -58,7 +59,8 @@
"provider delay timing",
"pre-provider timing",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-failure-health",
@ -73,7 +75,8 @@
"provider logs",
"plugin errors",
"memory after provider delay"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -41,7 +41,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "streaming-stall-provider-turn",
@ -57,7 +58,8 @@
"process leak snapshot",
"gateway remains supervised",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-failure-health",
@ -72,7 +74,8 @@
"provider logs",
"plugin errors",
"memory after streaming stall"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -39,7 +39,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "timeout-provider-turn",
@ -54,7 +55,8 @@
"provider timeout/abort timing",
"gateway remains supervised",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-failure-health",
@ -69,7 +71,8 @@
"provider logs",
"plugin errors",
"memory after timeout"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -39,7 +39,8 @@
"gateway status",
"gateway port",
"readiness classification"
]
],
"healthScope": "readiness"
},
{
"id": "browser-smoke",
@ -54,7 +55,8 @@
"opened tab count",
"snapshot timing",
"browser stop timing"
]
],
"healthScope": "post-ready"
},
{
"id": "post-browser-health",
@ -68,7 +70,8 @@
"status after browser automation",
"browser plugin errors",
"gateway errors"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -30,7 +30,8 @@
"bundled plugin count",
"readiness classification",
"dependency staging"
]
],
"healthScope": "readiness"
},
{
"id": "inspect",
@ -46,7 +47,8 @@
"registry refresh",
"missing package/module errors",
"plugin service failures"
]
],
"healthScope": "post-ready"
},
{
"id": "restart",
@ -61,7 +63,8 @@
"warm readiness",
"bundled plugin reload",
"runtime dependency reuse"
]
],
"healthScope": "readiness"
}
],
"proves": [

View File

@ -29,7 +29,8 @@
"dependency staging duration",
"installed dependency list",
"missing dependency errors"
]
],
"healthScope": "readiness"
},
{
"id": "warm-restart",
@ -44,7 +45,8 @@
"warm ready time",
"dependency staging reuse",
"missing dependency errors"
]
],
"healthScope": "readiness"
}
],
"proves": [

View File

@ -37,7 +37,8 @@
"Node version",
"runtime version",
"gateway port"
]
],
"healthScope": "readiness"
},
{
"id": "core-smoke",
@ -53,7 +54,8 @@
"plugin list",
"filesystem stall logs",
"health latency"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -31,7 +31,8 @@
"gateway status",
"gateway port",
"readiness classification"
]
],
"healthScope": "readiness"
},
{
"id": "dashboard",
@ -44,7 +45,8 @@
"dashboard URL",
"token handling",
"command latency"
]
],
"healthScope": "post-ready"
},
{
"id": "post-dashboard-health",
@ -58,7 +60,8 @@
"status after dashboard command",
"websocket disconnect logs",
"gateway errors"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -41,7 +41,8 @@
"source env",
"clone root",
"cloned OpenClaw config"
]
],
"healthScope": "none"
},
{
"id": "upgrade",
@ -55,7 +56,8 @@
"upgrade JSON",
"runtime binding",
"post-upgrade service state"
]
],
"healthScope": "readiness"
},
{
"id": "gateway-start",
@ -69,7 +71,8 @@
"gateway service installed",
"gateway service started",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "dashboard-session-turn",
@ -85,7 +88,8 @@
"provider timing",
"gateway health after turn",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-dashboard-health",
@ -101,7 +105,8 @@
"liveness warnings",
"plugin errors",
"memory after dashboard turn"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -38,7 +38,8 @@
"gateway port",
"runtime binding",
"env created without service"
]
],
"healthScope": "none"
},
{
"id": "gateway-start",
@ -52,7 +53,8 @@
"gateway service installed",
"gateway service started",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "dashboard-session-turn",
@ -67,7 +69,8 @@
"mock provider request timing",
"gateway health after turn",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-dashboard-health",
@ -82,7 +85,8 @@
"provider logs",
"plugin errors",
"memory after dashboard turn"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -27,7 +27,8 @@
"evidence": [
"baseline status",
"gateway PID"
]
],
"healthScope": "readiness"
},
{
"id": "diagnostics",
@ -41,7 +42,8 @@
"error classification",
"gateway survival",
"recovery guidance"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -30,7 +30,8 @@
"env name",
"runtime binding",
"gateway port"
]
],
"healthScope": "readiness"
},
{
"id": "readiness",
@ -45,7 +46,8 @@
"gateway state",
"gateway PID",
"health/status result"
]
],
"healthScope": "post-ready"
},
{
"id": "plugins",
@ -59,7 +61,8 @@
"plugins list output",
"plugin update dry-run output",
"missing dependency log scan"
]
],
"healthScope": "post-ready"
},
{
"id": "models",
@ -72,7 +75,8 @@
"models list duration",
"timeout behavior",
"gateway health after model list"
]
],
"healthScope": "post-ready"
},
{
"id": "logs",
@ -85,7 +89,8 @@
"startup logs",
"missing dependency errors",
"plugin metadata scan warnings"
]
],
"healthScope": "post-ready"
},
{
"id": "cleanup",
@ -96,7 +101,8 @@
],
"evidence": [
"destroy result"
]
],
"healthScope": "none"
}
],
"proves": [

View File

@ -13,9 +13,9 @@
"thresholds": {
"coldReadyMs": 30000,
"warmReadyMs": 15000,
"healthP95Ms": 1000,
"peakRssMb": 900,
"eventLoopMaxMs": 500
"eventLoopMaxMs": 500,
"postReadyHealthP95Ms": 1000
},
"phases": [
{
@ -32,7 +32,8 @@
"RSS",
"CPU",
"startup logs"
]
],
"healthScope": "readiness"
},
{
"id": "api-latency",
@ -47,7 +48,8 @@
"command durations",
"health after each command",
"logs"
]
],
"healthScope": "post-ready"
},
{
"id": "warm-restart",
@ -61,7 +63,8 @@
"warm ready time",
"RSS delta",
"startup log delta"
]
],
"healthScope": "readiness"
}
],
"proves": [

View File

@ -37,7 +37,8 @@
"gateway status",
"gateway port",
"readiness classification"
]
],
"healthScope": "readiness"
},
{
"id": "mcp-bridge",
@ -51,7 +52,8 @@
"tools/list timing",
"tool count",
"bridge process exit"
]
],
"healthScope": "post-ready"
},
{
"id": "post-mcp-health",
@ -65,7 +67,8 @@
"status after MCP bridge",
"MCP bridge errors",
"gateway errors"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -39,7 +39,8 @@
"gateway port",
"runtime binding",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "media-timeout",
@ -53,7 +54,8 @@
"provider timeout observed",
"gateway status after timeout",
"mock provider request log"
]
],
"healthScope": "post-ready"
},
{
"id": "post-media-health",
@ -68,7 +70,8 @@
"provider timeout logs",
"plugin errors",
"memory after media timeout"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -36,7 +36,8 @@
"evidence": [
"fresh env started",
"baseline plugin list captured"
]
],
"healthScope": "readiness"
},
{
"id": "install",
@ -50,7 +51,8 @@
"security scanner results",
"plugins appear in list",
"registry refresh succeeds"
]
],
"healthScope": "post-ready"
},
{
"id": "restart",
@ -67,7 +69,8 @@
"official plugin remains installed",
"plugin load logs",
"missing dependency scan"
]
],
"healthScope": "readiness"
}
],
"proves": [

View File

@ -37,7 +37,8 @@
"gateway port",
"runtime binding",
"env created without service"
]
],
"healthScope": "none"
},
{
"id": "gateway-start",
@ -51,7 +52,8 @@
"gateway service installed",
"gateway service started",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "openai-compatible-turn",
@ -66,7 +68,8 @@
"mock provider request timing",
"gateway health after turn",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-http-health",
@ -81,7 +84,8 @@
"provider logs",
"plugin errors",
"memory after HTTP turn"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -28,7 +28,8 @@
"evidence": [
"baseline gateway status",
"readiness classification"
]
],
"healthScope": "readiness"
},
{
"id": "reject-invalid-plugin",
@ -41,7 +42,8 @@
"install command rejected",
"validation error",
"no install record committed"
]
],
"healthScope": "post-ready"
},
{
"id": "post-failure-health",
@ -56,7 +58,8 @@
"gateway status",
"plugin list",
"logs after invalid install"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -29,7 +29,8 @@
"evidence": [
"baseline plugin list",
"gateway readiness"
]
],
"healthScope": "readiness"
},
{
"id": "install",
@ -45,7 +46,8 @@
"plugin index update",
"registry refresh",
"plugin appears in list"
]
],
"healthScope": "post-ready"
},
{
"id": "restart",
@ -60,7 +62,8 @@
"restart readiness",
"plugin load logs",
"missing dependency scan"
]
],
"healthScope": "readiness"
}
],
"proves": [

View File

@ -28,7 +28,8 @@
"plugin list",
"update dry-run",
"runtime dependency errors"
]
],
"healthScope": "readiness"
},
{
"id": "restart",
@ -43,7 +44,8 @@
"restart status",
"logs",
"missing dependency scan"
]
],
"healthScope": "readiness"
}
],
"proves": [

View File

@ -31,7 +31,8 @@
"install result",
"plugin entry registered",
"gateway readiness before load"
]
],
"healthScope": "readiness"
},
{
"id": "restart",
@ -46,7 +47,8 @@
"missing dependency diagnostic",
"plugin load failure",
"gateway remains supervised"
]
],
"healthScope": "readiness"
},
{
"id": "survival",
@ -59,7 +61,8 @@
"evidence": [
"status after plugin failure",
"plugin list after failure"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -31,7 +31,8 @@
"evidence": [
"install record",
"plugin appears before uninstall"
]
],
"healthScope": "readiness"
},
{
"id": "remove",
@ -46,7 +47,8 @@
"uninstall output",
"install index cleanup",
"registry after removal"
]
],
"healthScope": "post-ready"
},
{
"id": "restart",
@ -61,7 +63,8 @@
"restart readiness",
"removed plugin not loaded",
"missing dependency scan"
]
],
"healthScope": "readiness"
}
],
"proves": [

View File

@ -31,7 +31,8 @@
"evidence": [
"plugin install record",
"plugin appears in list"
]
],
"healthScope": "readiness"
},
{
"id": "update",
@ -46,7 +47,8 @@
"plugin update dry-run output",
"tracked plugin metadata",
"registry refresh"
]
],
"healthScope": "post-ready"
},
{
"id": "post-update-health",
@ -60,7 +62,8 @@
"status after update",
"plugin lifecycle logs",
"dependency errors"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -28,7 +28,8 @@
"models list duration",
"provider timeout warnings",
"gateway status after model discovery"
]
],
"healthScope": "readiness"
},
{
"id": "logs",
@ -41,7 +42,8 @@
"timeout logs",
"auth skip logs",
"gateway stall logs"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -36,7 +36,8 @@
"time to listening",
"time to health ready",
"readiness classification"
]
],
"healthScope": "readiness"
},
{
"id": "post-start",
@ -52,7 +53,8 @@
"status command latency",
"plugin list",
"plugin startup health"
]
],
"healthScope": "post-ready"
},
{
"id": "startup-logs",
@ -66,7 +68,8 @@
"missing dependency errors",
"plugin service failures",
"startup phase logs"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -16,7 +16,7 @@
"soakCommandFailures": 0,
"soakHealthFailures": 0,
"rssGrowthMb": 300,
"healthP95Ms": 1000
"postReadyHealthP95Ms": 1000
},
"phases": [
{
@ -31,7 +31,8 @@
"baseline PID",
"baseline RSS",
"baseline health"
]
],
"healthScope": "readiness"
},
{
"id": "loop",
@ -44,7 +45,8 @@
"latency trend",
"RSS trend",
"logs during loop"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -37,7 +37,8 @@
"gateway port",
"runtime binding",
"env created without service"
]
],
"healthScope": "none"
},
{
"id": "gateway-start",
@ -51,7 +52,8 @@
"gateway service installed",
"gateway service started",
"startup readiness"
]
],
"healthScope": "readiness"
},
{
"id": "tui-message-turn",
@ -66,7 +68,8 @@
"mock provider request timing",
"gateway health after turn",
"role resource samples"
]
],
"healthScope": "post-ready"
},
{
"id": "post-tui-health",
@ -81,7 +84,8 @@
"provider logs",
"plugin errors",
"memory after TUI turn"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -30,7 +30,8 @@
"evidence": [
"gateway status",
"readiness classification"
]
],
"healthScope": "readiness"
},
{
"id": "tui-smoke",
@ -43,7 +44,8 @@
"TUI render time",
"connected screen",
"clean interrupt"
]
],
"healthScope": "post-ready"
},
{
"id": "post-tui-health",
@ -57,7 +59,8 @@
"status after TUI",
"TUI disconnect logs",
"gateway errors"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -41,7 +41,8 @@
"source env",
"clone root",
"pre-upgrade service status"
]
],
"healthScope": "none"
},
{
"id": "upgrade",
@ -55,7 +56,8 @@
"snapshot id",
"doctor/update output",
"rollback status"
]
],
"healthScope": "readiness"
},
{
"id": "post-upgrade",
@ -74,7 +76,8 @@
"plugins install index",
"doctor output",
"gateway logs without missing dependency/plugin load failures"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -28,7 +28,8 @@
"clone result",
"source env",
"clone root"
]
],
"healthScope": "none"
},
{
"id": "source-runtime",
@ -41,7 +42,8 @@
"evidence": [
"pre-upgrade runtime",
"pre-upgrade gateway status"
]
],
"healthScope": "readiness"
},
{
"id": "upgrade",
@ -55,7 +57,8 @@
"snapshot id",
"doctor/update output",
"rollback status"
]
],
"healthScope": "readiness"
},
{
"id": "post-upgrade",
@ -72,7 +75,8 @@
"plugins folder/index presence",
"doctor output",
"gateway logs"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -32,7 +32,8 @@
"clone result",
"source env",
"clone root"
]
],
"healthScope": "none"
},
{
"id": "source-runtime",
@ -47,7 +48,8 @@
"2026.4.20 upgrade output",
"pre-upgrade service status",
"pre-upgrade OpenClaw status"
]
],
"healthScope": "readiness"
},
{
"id": "upgrade",
@ -61,7 +63,8 @@
"snapshot id",
"doctor/update output",
"rollback status"
]
],
"healthScope": "readiness"
},
{
"id": "post-upgrade",
@ -78,7 +81,8 @@
"plugins folder/index presence",
"doctor output",
"gateway logs"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -35,7 +35,8 @@
"clone result",
"source env",
"clone root"
]
],
"healthScope": "none"
},
{
"id": "source-runtime",
@ -52,7 +53,8 @@
"pre-upgrade service status",
"pre-upgrade OpenClaw status",
"known 2026.4.24 plugin/runtime-deps logs"
]
],
"healthScope": "readiness"
},
{
"id": "upgrade",
@ -66,7 +68,8 @@
"snapshot id",
"doctor/update output",
"rollback status"
]
],
"healthScope": "readiness"
},
{
"id": "post-upgrade",
@ -83,7 +86,8 @@
"plugins install index",
"doctor output",
"gateway logs without missing dependency/plugin load failures"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -42,7 +42,8 @@
"stable channel start output",
"pre-upgrade gateway status",
"pre-upgrade OpenClaw status"
]
],
"healthScope": "readiness"
},
{
"id": "upgrade",
@ -56,7 +57,8 @@
"snapshot id",
"doctor/update output",
"rollback status"
]
],
"healthScope": "readiness"
},
{
"id": "post-upgrade",
@ -75,7 +77,8 @@
"plugins install index",
"doctor output",
"gateway logs without missing dependency/plugin load failures"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -39,7 +39,8 @@
"stable channel start output",
"pre-upgrade gateway status",
"pre-upgrade OpenClaw status"
]
],
"healthScope": "readiness"
},
{
"id": "upgrade",
@ -53,7 +54,8 @@
"snapshot id",
"doctor/update output",
"rollback status"
]
],
"healthScope": "readiness"
},
{
"id": "post-upgrade",
@ -72,7 +74,8 @@
"plugins install index",
"doctor output",
"gateway logs without missing dependency/plugin load failures"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -25,8 +25,8 @@
"soakCommandFailures": 0,
"soakHealthFailures": 0,
"soakHealthP95Ms": 1000,
"healthP95Ms": 1000,
"peakRssMb": 1000
"peakRssMb": 1000,
"postReadyHealthP95Ms": 1000
},
"phases": [
{
@ -41,7 +41,8 @@
"startup readiness",
"gateway PID",
"baseline RSS and CPU"
]
],
"healthScope": "readiness"
},
{
"id": "restart-after-workspace",
@ -55,7 +56,8 @@
"restart readiness",
"post-fixture gateway status",
"resource samples during restart"
]
],
"healthScope": "readiness"
},
{
"id": "user-facing-commands",
@ -74,7 +76,8 @@
"short repeated command p95",
"health p95",
"RSS and CPU peaks"
]
],
"healthScope": "post-ready"
}
],
"proves": [

View File

@ -1,6 +1,7 @@
import { buildAgentTurnBreakdown } from "./collectors/agent-turns.mjs";
import { computeProviderTurnAttribution } from "./collectors/provider.mjs";
import { summarizeRuntimeDepsLogs } from "./collectors/logs.mjs";
import { buildHealthMeasurement, deriveHealthCompatibility } from "./health.mjs";
import { resolveThresholdPolicy } from "./evaluation/thresholds.mjs";
import {
checkAggregateThreshold,
@ -91,8 +92,15 @@ export function evaluateRecord(record, scenario, options = {}) {
providerSimulation: agentProviderSimulation
});
const finalGatewayState = record.finalMetrics?.service?.gatewayState ?? null;
const healthFailures = countHealthFailures(record);
const healthP95Ms = collectHealthP95(record);
const health = buildHealthMeasurement(record, scenario);
const healthCompatibility = deriveHealthCompatibility(health, record);
const healthFailures = healthCompatibility.healthFailures;
const healthP95Ms = healthCompatibility.healthP95Ms;
const startupHealthP95Ms = healthCompatibility.startupHealthP95Ms;
const postReadyHealthP95Ms = healthCompatibility.postReadyHealthP95Ms;
const startupHealthFailures = healthCompatibility.startupHealthFailures;
const postReadyHealthFailures = healthCompatibility.postReadyHealthFailures;
const finalHealthFailures = healthCompatibility.finalHealthFailures;
const soakEvidence = collectSoakEvidence(allResults);
const mcpBridgeEvidence = collectMcpBridgeEvidence(allResults);
const browserAutomationEvidence = collectBrowserAutomationEvidence(allResults);
@ -101,10 +109,10 @@ export function evaluateRecord(record, scenario, options = {}) {
const officialPluginEvidence = collectOfficialPluginEvidence(allResults);
const listeningFailures = countListeningFailures(record);
const tcpConnectMaxMs = collectTcpConnectMax(record);
const timeToListeningMs = collectTimeToListening(record);
const timeToHealthReadyMs = collectTimeToHealthReady(record);
const timeToListeningMs = healthCompatibility.timeToListeningMs ?? collectTimeToListening(record);
const timeToHealthReadyMs = healthCompatibility.timeToHealthReadyMs ?? collectTimeToHealthReady(record);
const readinessFailures = countReadinessFailures(record);
const readinessClassification = collectWorstReadinessClassification(record);
const readinessClassification = healthCompatibility.readinessClassification ?? collectWorstReadinessClassification(record);
const coldReadyMs = maxDurationWhere(allResults, (command) => command.startsWith("ocm start "));
const warmReadyMs = maxDurationWhere(allResults, (command) => command.startsWith("ocm service restart "));
const upgradeMs = maxDurationWhere(allResults, (command) => command.startsWith("ocm upgrade "));
@ -203,6 +211,56 @@ export function evaluateRecord(record, scenario, options = {}) {
});
}
if (typeof thresholds.startupHealthFailures === "number" && startupHealthFailures > thresholds.startupHealthFailures) {
violations.push({
kind: "health",
metric: "startupHealthFailures",
expected: `<= ${thresholds.startupHealthFailures}`,
actual: startupHealthFailures,
message: `${startupHealthFailures} startup health check(s) failed, over threshold ${thresholds.startupHealthFailures}`
});
}
if (typeof thresholds.postReadyHealthFailures === "number" && postReadyHealthFailures > thresholds.postReadyHealthFailures) {
violations.push({
kind: "health",
metric: "postReadyHealthFailures",
expected: `<= ${thresholds.postReadyHealthFailures}`,
actual: postReadyHealthFailures,
message: `${postReadyHealthFailures} post-ready liveness check(s) failed, over threshold ${thresholds.postReadyHealthFailures}`
});
}
if (typeof thresholds.finalHealthFailures === "number" && finalHealthFailures > thresholds.finalHealthFailures) {
violations.push({
kind: "health",
metric: "finalHealthFailures",
expected: `<= ${thresholds.finalHealthFailures}`,
actual: finalHealthFailures,
message: `${finalHealthFailures} final health check(s) failed, over threshold ${thresholds.finalHealthFailures}`
});
}
if (typeof thresholds.startupHealthP95Ms === "number" && startupHealthP95Ms !== null && startupHealthP95Ms > thresholds.startupHealthP95Ms) {
violations.push({
kind: "health",
metric: "startupHealthP95Ms",
expected: `<= ${thresholds.startupHealthP95Ms}`,
actual: startupHealthP95Ms,
message: `startup health sample p95 ${startupHealthP95Ms}ms exceeded threshold ${thresholds.startupHealthP95Ms}ms`
});
}
if (typeof thresholds.postReadyHealthP95Ms === "number" && postReadyHealthP95Ms !== null && postReadyHealthP95Ms > thresholds.postReadyHealthP95Ms) {
violations.push({
kind: "health",
metric: "postReadyHealthP95Ms",
expected: `<= ${thresholds.postReadyHealthP95Ms}`,
actual: postReadyHealthP95Ms,
message: `post-ready liveness p95 ${postReadyHealthP95Ms}ms exceeded threshold ${thresholds.postReadyHealthP95Ms}ms`
});
}
if (typeof thresholds.soakMinDurationMs === "number" && soakEvidence.durationMs !== null && soakEvidence.durationMs < thresholds.soakMinDurationMs) {
violations.push({
kind: "soak",
@ -747,6 +805,7 @@ export function evaluateRecord(record, scenario, options = {}) {
agentProviderRequestCount: providerTurn?.requestCount ?? null,
agentProviderRequestMissing: providerTurn?.missingProviderRequest ?? null,
agentProviderAttribution: providerTurn,
health,
tcpConnectMaxMs,
timeToListeningMs,
timeToHealthReadyMs,
@ -758,6 +817,11 @@ export function evaluateRecord(record, scenario, options = {}) {
finalGatewayState,
healthFailures,
healthP95Ms,
startupHealthP95Ms,
postReadyHealthP95Ms,
startupHealthFailures,
postReadyHealthFailures,
finalHealthFailures,
soakEvidence,
mcpBridgeEvidence,
mcpInitializeMs: mcpBridgeEvidence.initializeMs,
@ -1882,26 +1946,6 @@ function countGatewayRestarts(record) {
return commandRestarts + countLogMetric(record, "gatewayRestartMentions");
}
function collectHealthP95(record) {
const p95Values = [];
for (const phase of record.phases ?? []) {
const p95 = phase.metrics?.healthSummary?.p95Ms;
if (typeof p95 === "number") {
p95Values.push(p95);
}
}
const finalP95 = record.finalMetrics?.healthSummary?.p95Ms;
if (typeof finalP95 === "number") {
p95Values.push(finalP95);
}
if (p95Values.length === 0) {
return null;
}
return Math.max(...p95Values);
}
function collectSoakEvidence(results) {
const loops = results
.filter((result) => result.command?.includes("run-soak-loop.mjs"))

319
src/health.mjs Normal file
View File

@ -0,0 +1,319 @@
export const HEALTH_SCHEMA = "kova.health.v1";
export const HEALTH_SCOPES = ["readiness", "startup-sample", "post-ready", "final", "none", "unknown"];
const startupScopes = new Set(["readiness", "startup-sample"]);
export function buildHealthMeasurement(record, scenario = null) {
const phaseContracts = new Map((scenario?.phases ?? []).map((phase) => [phase.id, phase]));
const entries = [];
for (const phase of record.phases ?? []) {
entries.push({
source: "phase",
phaseId: phase.id ?? null,
scope: normalizeHealthScope(phase.healthScope ?? phaseContracts.get(phase.id)?.healthScope),
metrics: phase.metrics ?? null
});
}
const finalEntry = {
source: "final",
phaseId: "final",
scope: "final",
metrics: record.finalMetrics ?? null
};
entries.push(finalEntry);
const readiness = selectReadiness(entries);
const startupSamples = summarizeScopedSamples(
entries.filter((entry) => startupScopes.has(entry.scope)),
"startup-sample",
startupSamplesForEntry
);
const postReadySamples = summarizeScopedSamples(
entries.filter((entry) => entry.scope === "post-ready"),
"post-ready",
postReadySamplesForEntry
);
const unknownSamples = summarizeScopedSamples(
entries.filter((entry) => entry.scope === "unknown"),
"unknown",
postReadySamplesForEntry
);
const final = summarizeFinalHealth(finalEntry.metrics);
const slowestSample = selectSlowestSample([startupSamples, postReadySamples, final]);
return {
schemaVersion: HEALTH_SCHEMA,
readiness,
startupSamples,
postReadySamples,
unknownSamples,
final,
slowestSample
};
}
export function deriveHealthCompatibility(health, record = null) {
const startupHealthP95Ms = health?.startupSamples?.p95Ms ?? null;
const postReadyHealthP95Ms = health?.postReadySamples?.p95Ms ?? null;
const scopedP95Ms = maxNullable(startupHealthP95Ms, postReadyHealthP95Ms);
const oldP95Ms = record ? collectOldHealthP95(record) : null;
const startupFailures = health?.startupSamples?.failureCount ?? 0;
const postReadyFailures = health?.postReadySamples?.failureCount ?? 0;
const unknownFailures = health?.unknownSamples?.failureCount ?? 0;
const finalFailures = health?.final?.failureCount ?? 0;
return {
timeToListeningMs: health?.readiness?.listeningReadyAtMs ?? null,
timeToHealthReadyMs: health?.readiness?.healthReadyAtMs ?? null,
readinessClassification: health?.readiness
? {
phaseId: health.readiness.phaseId,
state: health.readiness.classification,
severity: health.readiness.severity,
reason: health.readiness.reason,
thresholdMs: health.readiness.thresholdMs,
deadlineMs: health.readiness.deadlineMs,
listeningReadyAtMs: health.readiness.listeningReadyAtMs,
healthReadyAtMs: health.readiness.healthReadyAtMs
}
: null,
healthFailures: startupFailures + postReadyFailures + unknownFailures + finalFailures,
healthP95Ms: scopedP95Ms ?? oldP95Ms,
startupHealthP95Ms,
postReadyHealthP95Ms,
startupHealthFailures: startupFailures,
postReadyHealthFailures: postReadyFailures,
finalHealthFailures: finalFailures
};
}
function normalizeHealthScope(scope) {
return typeof scope === "string" && HEALTH_SCOPES.includes(scope) ? scope : "unknown";
}
function selectReadiness(entries) {
const scoped = entries
.filter((entry) => startupScopes.has(entry.scope))
.map((entry) => readinessValue(entry.metrics?.readiness, entry.phaseId))
.filter(Boolean);
const candidates = scoped.length > 0
? scoped
: entries.map((entry) => readinessValue(entry.metrics?.readiness, entry.phaseId)).filter(Boolean);
if (candidates.length === 0) {
return null;
}
candidates.sort((left, right) => {
const rankDelta = readinessRank(right.classification) - readinessRank(left.classification);
if (rankDelta !== 0) {
return rankDelta;
}
return (right.healthReadyAtMs ?? 0) - (left.healthReadyAtMs ?? 0);
});
return candidates[0];
}
function readinessValue(readiness, phaseId) {
if (!readiness?.classification || !(readiness.deadlineMs > 0)) {
return null;
}
return {
phaseId,
listeningReadyAtMs: readiness.listeningReadyAtMs,
healthReadyAtMs: readiness.healthReadyAtMs,
classification: readiness.classification.state,
severity: readiness.classification.severity,
reason: readiness.classification.reason,
thresholdMs: readiness.thresholdMs,
deadlineMs: readiness.deadlineMs,
attempts: readiness.attempts ?? null
};
}
function readinessRank(state) {
if (state === "hard-failure") {
return 4;
}
if (state === "unhealthy") {
return 3;
}
if (state === "slow-startup") {
return 2;
}
if (state === "ready") {
return 1;
}
return 0;
}
function startupSamplesForEntry(entry) {
const attempts = entry.metrics?.readiness?.healthAttempts;
if (Array.isArray(attempts) && attempts.length > 0) {
return attempts;
}
return entry.metrics?.healthSamples ?? [];
}
function postReadySamplesForEntry(entry) {
return entry.metrics?.healthSamples ?? [];
}
function summarizeScopedSamples(entries, scope, sampleSelector) {
const samples = [];
for (const entry of entries) {
for (const sample of sampleSelector(entry)) {
samples.push({ ...sample, phaseId: entry.phaseId });
}
}
if (samples.length > 0) {
return summarizeSamples(samples, scope);
}
const summaries = entries
.map((entry) => ({ phaseId: entry.phaseId, summary: entry.metrics?.healthSummary }))
.filter((entry) => entry.summary);
if (summaries.length === 0) {
return emptyHealthSummary(scope);
}
let slowestPhaseId = null;
let maxMs = null;
for (const { phaseId, summary } of summaries) {
if (typeof summary.maxMs === "number" && (maxMs === null || summary.maxMs > maxMs)) {
maxMs = summary.maxMs;
slowestPhaseId = phaseId;
}
}
return {
scope,
count: sum(summaries, "count"),
okCount: sum(summaries, "okCount"),
failureCount: sum(summaries, "failureCount"),
minMs: minNullable(...summaries.map(({ summary }) => summary.minMs)),
p50Ms: maxNullable(...summaries.map(({ summary }) => summary.p50Ms)),
p95Ms: maxNullable(...summaries.map(({ summary }) => summary.p95Ms)),
maxMs,
slowestPhaseId
};
}
function summarizeSamples(samples, scope) {
const durations = samples
.map((sample) => sample.durationMs)
.filter((duration) => typeof duration === "number")
.sort((left, right) => left - right);
let slowestPhaseId = null;
let slowestMs = null;
for (const sample of samples) {
if (typeof sample.durationMs === "number" && (slowestMs === null || sample.durationMs > slowestMs)) {
slowestMs = sample.durationMs;
slowestPhaseId = sample.phaseId ?? null;
}
}
return {
scope,
count: samples.length,
okCount: samples.filter((sample) => sample.ok === true).length,
failureCount: samples.filter((sample) => sample.ok !== true).length,
minMs: durations.at(0) ?? null,
p50Ms: percentile(durations, 0.5),
p95Ms: percentile(durations, 0.95),
maxMs: durations.at(-1) ?? null,
slowestPhaseId
};
}
function emptyHealthSummary(scope) {
return {
scope,
count: 0,
okCount: 0,
failureCount: 0,
minMs: null,
p50Ms: null,
p95Ms: null,
maxMs: null,
slowestPhaseId: null
};
}
function summarizeFinalHealth(metrics) {
const samples = Array.isArray(metrics?.healthSamples) ? metrics.healthSamples : [];
const summary = samples.length > 0 ? summarizeSamples(samples.map((sample) => ({ ...sample, phaseId: "final" })), "final") : null;
const fallbackFailureCount = healthFailureCount([metrics?.health]);
const failureCount = summary?.failureCount ?? metrics?.healthSummary?.failureCount ?? fallbackFailureCount;
const maxMs = summary?.maxMs ?? metrics?.healthSummary?.maxMs ?? metrics?.health?.durationMs ?? null;
const p95Ms = summary?.p95Ms ?? metrics?.healthSummary?.p95Ms ?? null;
const gatewayState = metrics?.service?.gatewayState ?? null;
const ok = metrics
? (gatewayState === null ? failureCount === 0 : gatewayState === "running" && failureCount === 0)
: null;
return {
scope: "final",
gatewayState,
ok,
healthOk: metrics?.health?.ok ?? null,
failureCount,
p95Ms,
maxMs,
slowestPhaseId: maxMs === null ? null : "final"
};
}
function selectSlowestSample(summaries) {
let slowest = null;
for (const summary of summaries) {
if (!summary || typeof summary.maxMs !== "number") {
continue;
}
if (!slowest || summary.maxMs > slowest.durationMs) {
slowest = {
scope: summary.scope,
phaseId: summary.slowestPhaseId ?? null,
durationMs: summary.maxMs
};
}
}
return slowest;
}
function collectOldHealthP95(record) {
const values = [];
for (const phase of record?.phases ?? []) {
if (typeof phase.metrics?.healthSummary?.p95Ms === "number") {
values.push(phase.metrics.healthSummary.p95Ms);
}
}
if (typeof record?.finalMetrics?.healthSummary?.p95Ms === "number") {
values.push(record.finalMetrics.healthSummary.p95Ms);
}
return values.length === 0 ? null : Math.max(...values);
}
function healthFailureCount(samples) {
return samples.filter((sample) => sample && sample.ok === false).length;
}
function sum(entries, key) {
return entries.reduce((total, entry) => total + (entry.summary?.[key] ?? 0), 0);
}
function maxNullable(...values) {
const numeric = values.filter((value) => typeof value === "number");
return numeric.length === 0 ? null : Math.max(...numeric);
}
function minNullable(...values) {
const numeric = values.filter((value) => typeof value === "number");
return numeric.length === 0 ? null : Math.min(...numeric);
}
function percentile(values, percentileValue) {
if (values.length === 0) {
return null;
}
const index = Math.ceil(values.length * percentileValue) - 1;
return values[Math.min(Math.max(index, 0), values.length - 1)];
}

View File

@ -20,6 +20,8 @@ export const PERFORMANCE_METRICS = [
{ id: "coldPreProviderMs", title: "Cold Pre-Provider", unit: "ms", regressionKey: "agentLatencyRegressionPercent" },
{ id: "warmPreProviderMs", title: "Warm Pre-Provider", unit: "ms", regressionKey: "agentLatencyRegressionPercent" },
{ id: "healthP95Ms", title: "Health p95", unit: "ms", regressionKey: "startupRegressionPercent" },
{ id: "startupHealthP95Ms", title: "Startup Health p95", unit: "ms", regressionKey: "startupRegressionPercent" },
{ id: "postReadyHealthP95Ms", title: "Post-Ready Health p95", unit: "ms", regressionKey: "startupRegressionPercent" },
{ id: "runtimeDepsStagingMs", title: "Runtime Deps Staging", unit: "ms", regressionKey: "startupRegressionPercent" }
];

View File

@ -1,6 +1,8 @@
import { scenariosDir } from "../paths.mjs";
import { assertNoShapeErrors, loadJsonRegistry, requireArray, requireKebabId, requireObject, requireString } from "./validate.mjs";
export const HEALTH_SCOPES = ["readiness", "startup-sample", "post-ready", "final", "none"];
export async function loadScenarios(selectedId) {
return loadJsonRegistry({
dir: scenariosDir,
@ -108,6 +110,7 @@ function validatePhases(phases, errors) {
requireKebabId(phase, "id", errors, prefix);
requireString(phase, "title", errors, prefix);
requireString(phase, "intent", errors, prefix);
requireString(phase, "healthScope", errors, prefix);
requireArray(phase, "commands", errors, prefix);
requireArray(phase, "evidence", errors, prefix);
@ -120,6 +123,9 @@ function validatePhases(phases, errors) {
validateStringArray(phase.commands, `${prefix}.commands`, errors);
validateStringArray(phase.evidence, `${prefix}.evidence`, errors);
if (typeof phase.healthScope === "string" && !HEALTH_SCOPES.includes(phase.healthScope)) {
errors.push(`${prefix}.healthScope must be one of ${HEALTH_SCOPES.join(", ")}`);
}
if (phase.expectedAgentFailure !== undefined && typeof phase.expectedAgentFailure !== "boolean") {
errors.push(`${prefix}.expectedAgentFailure must be a boolean when set`);
}

View File

@ -21,6 +21,11 @@ const defaultThresholds = {
timeToHealthReadyMs: 5000,
readinessFailures: 0,
healthP95Ms: 1000,
startupHealthFailures: 0,
postReadyHealthFailures: 0,
finalHealthFailures: 0,
startupHealthP95Ms: 1000,
postReadyHealthP95Ms: 1000,
gatewayRestartCount: 0,
providerTimeoutMentions: 0,
eventLoopDelayMentions: 0,
@ -317,6 +322,8 @@ function diagnosticRecordSummary(record) {
providerFinalMs: measurements.agentProviderFinalMs ?? measurements.coldProviderFinalMs ?? null,
runtimeDepsStagingMs: measurements.runtimeDepsStagingMs ?? null,
timeToHealthReadyMs: measurements.timeToHealthReadyMs ?? null,
startupHealthP95Ms: measurements.startupHealthP95Ms ?? null,
postReadyHealthP95Ms: measurements.postReadyHealthP95Ms ?? null,
peakRssMb: measurements.peakRssMb ?? null
};
}
@ -406,6 +413,11 @@ function metricDeltas(baseline, current) {
"timeToHealthReadyMs",
"healthP95Ms",
"healthFailures",
"startupHealthP95Ms",
"postReadyHealthP95Ms",
"startupHealthFailures",
"postReadyHealthFailures",
"finalHealthFailures",
"readinessFailures",
"missingDependencyErrors",
"pluginLoadFailures",

View File

@ -114,8 +114,7 @@ export function renderMarkdownReport(report) {
lines.push(`- TCP connect max: ${record.measurements.tcpConnectMaxMs ?? "unknown"} ms`);
lines.push(`- Missing dependency errors: ${record.measurements.missingDependencyErrors ?? "unknown"}`);
lines.push(`- Final gateway state: ${record.measurements.finalGatewayState ?? "unknown"}`);
lines.push(`- Health failures: ${record.measurements.healthFailures ?? "unknown"}`);
lines.push(`- Health p95: ${record.measurements.healthP95Ms ?? "unknown"} ms`);
lines.push(...formatHealthMeasurementLines(record.measurements));
if (record.measurements.soakEvidence?.available) {
lines.push(`- Soak trend: duration ${record.measurements.soakDurationMs ?? "unknown"} ms; iterations ${record.measurements.soakIterations ?? "unknown"}; command p95 ${record.measurements.soakCommandP95Ms ?? "unknown"} ms; health p95 ${record.measurements.soakHealthP95Ms ?? "unknown"} ms; RSS growth ${record.measurements.rssGrowthMb ?? "unknown"} MB; gateway RSS growth ${record.measurements.gatewayRssGrowthMb ?? "unknown"} MB`);
}
@ -499,6 +498,26 @@ function formatMetrics(metrics) {
return lines.length > 0 ? lines : ["- unavailable"];
}
function formatHealthMeasurementLines(measurements) {
const health = measurements.health;
const lines = [
`- Health failures: ${measurements.healthFailures ?? "unknown"}`,
`- Startup health p95: ${measurements.startupHealthP95Ms ?? health?.startupSamples?.p95Ms ?? "unknown"} ms`,
`- Post-ready liveness p95: ${measurements.postReadyHealthP95Ms ?? health?.postReadySamples?.p95Ms ?? "unknown"} ms`,
`- Final health failures: ${measurements.finalHealthFailures ?? health?.final?.failureCount ?? "unknown"}`
];
if (health?.final) {
const healthState = health.final.healthOk === null ? "unknown" : health.final.healthOk ? "ok" : "not-ok";
lines.push(`- Final health state: gateway ${health.final.gatewayState ?? "unknown"}; health ${healthState}`);
}
if (health?.slowestSample) {
lines.push(`- Slowest health sample: ${health.slowestSample.scope} ${health.slowestSample.phaseId ?? "unknown"} ${health.slowestSample.durationMs} ms`);
} else if (measurements.healthP95Ms !== null && measurements.healthP95Ms !== undefined) {
lines.push(`- Compatibility health p95: ${measurements.healthP95Ms} ms`);
}
return lines;
}
function formatRecordFailureCards(records = []) {
const cards = records
.filter((record) => !["PASS", "DRY-RUN"].includes(record.status))
@ -670,7 +689,14 @@ function summarizeMeasurements(measurements) {
timeToHealthReadyMs: measurements.timeToHealthReadyMs ?? null,
readinessClassification: measurements.readinessClassification ?? null,
readinessClassificationReason: measurements.readinessClassificationReason ?? null,
health: measurements.health ?? null,
healthFailures: measurements.healthFailures ?? null,
healthP95Ms: measurements.healthP95Ms ?? null,
startupHealthP95Ms: measurements.startupHealthP95Ms ?? null,
postReadyHealthP95Ms: measurements.postReadyHealthP95Ms ?? null,
startupHealthFailures: measurements.startupHealthFailures ?? null,
postReadyHealthFailures: measurements.postReadyHealthFailures ?? null,
finalHealthFailures: measurements.finalHealthFailures ?? null,
missingDependencyErrors: measurements.missingDependencyErrors ?? null,
pluginLoadFailures: measurements.pluginLoadFailures ?? null,
officialPluginEvidence: measurements.officialPluginEvidence ?? null,
@ -1150,6 +1176,7 @@ function compactRolePeaks(measurements) {
function pushMeasurementBrief(lines, measurements, { compact }) {
lines.push("Measurements:");
lines.push(`- startup: listening ${valueMs(measurements.timeToListeningMs)}; health ${valueMs(measurements.timeToHealthReadyMs)}; readiness ${measurements.readinessClassification ?? "unknown"}; gateway ${measurements.finalGatewayState ?? "unknown"}; restarts ${measurements.gatewayRestartCount ?? "unknown"}`);
lines.push(`- health: startup p95 ${valueMs(measurements.startupHealthP95Ms)}; post-ready p95 ${valueMs(measurements.postReadyHealthP95Ms)}; failures ${measurements.healthFailures ?? "unknown"}; final failures ${measurements.finalHealthFailures ?? "unknown"}${healthSlowestText(measurements)}`);
lines.push(`- resources: peak RSS ${valueMb(measurements.peakRssMb)}; max CPU ${valuePercent(measurements.cpuPercentMax)}; samples ${measurements.resourceSampleCount ?? "unknown"}; roles ${rolePeakText(measurements)}`);
lines.push(`- agent: turn ${valueMs(measurements.agentTurnMs, "not-run")}; cold/warm ${valueMs(measurements.coldAgentTurnMs)}/${valueMs(measurements.warmAgentTurnMs)}; cold-warm delta ${valueMs(measurements.agentColdWarmDeltaMs)}; pre-provider ${valueMs(measurements.agentPreProviderMs)}; provider ${valueMs(measurements.agentProviderFinalMs)}; cleanup ${valueMs(measurements.agentCleanupMaxMs)}; diagnosis ${measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; leaks ${measurements.agentProcessLeakCount ?? "unknown"}`);
lines.push(`- plugins/runtime: missing deps ${measurements.missingDependencyErrors ?? "unknown"}; plugin failures ${measurements.pluginLoadFailures ?? "unknown"}; runtime deps ${valueMs(measurements.runtimeDepsStagingMs)}${runtimeDepsPluginText(measurements)}; warm restages ${measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${measurements.runtimeDepsWarmReuseOk ?? "unknown"}`);
@ -1223,6 +1250,14 @@ function valuePercent(value) {
return value === null || value === undefined ? "unknown" : `${value}%`;
}
function healthSlowestText(measurements) {
const slowest = measurements.health?.slowestSample;
if (!slowest) {
return "";
}
return `; slowest ${slowest.scope}/${slowest.phaseId ?? "unknown"} ${valueMs(slowest.durationMs)}`;
}
function buildFixerPrompt({ report, primaryBlocker, why, measurements, evidence, likelyOwner }) {
const parts = [
`Investigate OpenClaw release gate failure ${primaryBlocker}.`,

View File

@ -142,6 +142,7 @@ export async function executeScenario(scenario, context) {
id: phase.id,
title: phase.title,
intent: phase.intent,
healthScope: phase.healthScope,
expectedAgentFailure: phase.expectedAgentFailure === true,
commands,
evidence: phase.evidence ?? [],
@ -347,6 +348,7 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
id: phase.id,
title: phase.title,
intent: phase.intent,
healthScope: phase.healthScope,
expectedAgentFailure: phase.expectedAgentFailure === true,
commands: materializeScenarioPhaseCommands(phase, context, envName, artifactDir),
evidence: phase.evidence ?? []

View File

@ -356,6 +356,8 @@ export async function runSelfCheck(flags = {}) {
checks.push(markdownFailureCardsCheck());
checks.push(reportRecommendedNextScenarioCheck());
checks.push(readinessClassificationCheck());
checks.push(healthReadinessModelCheck());
checks.push(oldHealthReportCompatibilityCheck());
checks.push(await resourceRoleAttributionCheck(tmp));
checks.push(await resourceRootCommandRoleBoundaryCheck());
checks.push(await resourceRolePollutionCheck());
@ -365,6 +367,7 @@ export async function runSelfCheck(flags = {}) {
checks.push(await cleanupRetryCheck(tmp));
checks.push(stateRegistryValidationCheck());
checks.push(scenarioCloneFirstValidationCheck());
checks.push(scenarioHealthScopeValidationCheck());
checks.push(scenarioStateCompatibilityCheck());
checks.push(await cpuProfileParserCheck());
checks.push(await heapProfileParserCheck());
@ -3880,6 +3883,7 @@ function readinessClassificationCheck() {
phases: [
{
id: "provision",
healthScope: "readiness",
results: [],
metrics: {
readiness: {
@ -3951,6 +3955,186 @@ function readinessClassificationCheck() {
}
}
function healthReadinessModelCheck() {
try {
const record = {
status: "PASS",
phases: [
{
id: "cold-start",
healthScope: "readiness",
results: [],
metrics: {
readiness: {
deadlineMs: 90000,
thresholdMs: 30000,
ready: true,
listeningReady: true,
listeningReadyAtMs: 120,
healthReadyAtMs: 200,
attempts: 2,
classification: {
state: "ready",
severity: "pass",
reason: "gateway became healthy within the readiness threshold"
},
healthAttempts: [
{ ok: false, durationMs: 25 },
{ ok: true, durationMs: 30 }
]
},
healthSamples: [
{ ok: true, durationMs: 40 }
],
healthSummary: {
count: 1,
okCount: 1,
failureCount: 0,
minMs: 40,
p50Ms: 40,
p95Ms: 40,
maxMs: 40
}
}
},
{
id: "api-latency",
healthScope: "post-ready",
results: [],
metrics: {
healthSamples: [
{ ok: true, durationMs: 10 },
{ ok: true, durationMs: 1500 }
],
healthSummary: {
count: 2,
okCount: 2,
failureCount: 0,
minMs: 10,
p50Ms: 10,
p95Ms: 1500,
maxMs: 1500
}
}
}
],
finalMetrics: {
service: { gatewayState: "running" },
healthSamples: [{ ok: true, durationMs: 50 }],
healthSummary: {
count: 1,
okCount: 1,
failureCount: 0,
minMs: 50,
p50Ms: 50,
p95Ms: 50,
maxMs: 50
},
health: { ok: true, durationMs: 50 }
}
};
const scenario = {
phases: [
{ id: "cold-start", healthScope: "readiness" },
{ id: "api-latency", healthScope: "post-ready" }
],
thresholds: {
gatewayReadyMs: 30000,
postReadyHealthP95Ms: 1000
}
};
evaluateRecord(record, scenario);
assertEqual(record.status, "FAIL", "post-ready health threshold fails");
assertEqual(record.measurements.health.schemaVersion, "kova.health.v1", "health schema");
assertEqual(record.measurements.timeToHealthReadyMs, 200, "readiness health ready derived");
assertEqual(record.measurements.startupHealthP95Ms, 30, "startup health p95 derived from readiness attempts");
assertEqual(record.measurements.postReadyHealthP95Ms, 1500, "post-ready health p95 derived from post-ready samples");
assertEqual(record.measurements.healthP95Ms, 1500, "compatibility health p95 derived");
assertEqual(record.measurements.health.slowestSample.scope, "post-ready", "slowest health scope");
assertEqual(
record.violations.some((violation) => violation.metric === "postReadyHealthP95Ms"),
true,
"post-ready health violation"
);
assertEqual(
record.violations.some((violation) => violation.metric === "timeToHealthReadyMs"),
false,
"post-ready liveness does not masquerade as readiness"
);
return {
id: "health-readiness-model",
status: "PASS",
command: "evaluate synthetic scoped health record",
durationMs: 0
};
} catch (error) {
return {
id: "health-readiness-model",
status: "FAIL",
command: "evaluate synthetic scoped health record",
durationMs: 0,
message: error.message
};
}
}
function oldHealthReportCompatibilityCheck() {
try {
const report = {
schemaVersion: "kova.report.v1",
generatedAt: "2026-05-05T00:00:00.000Z",
runId: "old-health-report",
mode: "execution",
target: "runtime:stable",
platform: { os: "darwin", release: "25.0.0", arch: "arm64", node: process.version },
summary: { total: 1, statuses: { PASS: 1 } },
records: [{
scenario: "fresh-install",
title: "Fresh Install",
status: "PASS",
target: "runtime:stable",
state: { id: "fresh", title: "Fresh" },
envName: "kova-old-health",
likelyOwner: "OpenClaw",
objective: "Old report compatibility.",
measurements: {
peakRssMb: 100,
cpuPercentMax: 10,
timeToListeningMs: 100,
timeToHealthReadyMs: 200,
readinessClassification: "ready",
healthFailures: 0,
healthP95Ms: 900,
finalGatewayState: "running"
},
phases: [],
violations: []
}]
};
const summary = renderReportSummary(report, { structured: true });
assertEqual(summary.scenarios[0].measurements.health, null, "old report health object absent");
assertEqual(summary.scenarios[0].measurements.healthP95Ms, 900, "old report health p95 summarized");
const markdown = renderMarkdownReport(report);
assertEqual(markdown.includes("Compatibility health p95: 900 ms"), true, "old report markdown compatibility p95");
const comparison = compareReports(report, report, { thresholds: { healthP95Ms: 0 } });
assertEqual(comparison.ok, true, "old report compare remains ok");
return {
id: "old-health-report-compatibility",
status: "PASS",
command: "summarize and compare legacy health report shape",
durationMs: 0
};
} catch (error) {
return {
id: "old-health-report-compatibility",
status: "FAIL",
command: "summarize and compare legacy health report shape",
durationMs: 0,
message: error.message
};
}
}
async function resourceRoleAttributionCheck(tmp) {
const command = "node -e 'setTimeout(() => {}, 650)'";
const artifactPath = join(tmp, "resource-role-attribution.jsonl");
@ -4739,6 +4923,7 @@ function scenarioCloneFirstValidationCheck() {
id: "status",
title: "Status",
intent: "Unsafe durable source access.",
healthScope: "post-ready",
commands: ["ocm service status {sourceEnv} --json"],
evidence: ["status"]
}]
@ -4762,6 +4947,7 @@ function scenarioCloneFirstValidationCheck() {
id: "clone",
title: "Clone",
intent: "Clone source.",
healthScope: "none",
commands: ["ocm env clone {sourceEnv} {env} --json", "ocm logs {sourceEnv} --tail 20"],
evidence: ["clone"]
}]
@ -4783,12 +4969,14 @@ function scenarioCloneFirstValidationCheck() {
id: "clone",
title: "Clone",
intent: "Clone source.",
healthScope: "none",
commands: ["ocm env clone {sourceEnv} {env} --json"],
evidence: ["clone"]
}, {
id: "upgrade",
title: "Upgrade",
intent: "Upgrade disposable clone.",
healthScope: "readiness",
commands: ["ocm upgrade {env} --channel beta --json"],
evidence: ["upgrade"]
}]
@ -4811,6 +4999,72 @@ function scenarioCloneFirstValidationCheck() {
}
}
function scenarioHealthScopeValidationCheck() {
try {
let rejectedMissing = false;
try {
validateScenarioShape({
id: "missing-health-scope",
surface: "fresh-install",
title: "Missing Health Scope",
objective: "Scenario phase without an explicit health scope.",
tags: ["fresh-user"],
proves: ["baseline"],
thresholds: {},
phases: [{
id: "start",
title: "Start",
intent: "Start gateway.",
commands: ["ocm start {env} {startSelector} --json"],
evidence: ["start"]
}]
}, "missing-health-scope.json");
} catch (error) {
rejectedMissing = /phases\[0\]\.healthScope must be a non-empty string/.test(error.message);
}
assertEqual(rejectedMissing, true, "missing healthScope rejected");
let rejectedInvalid = false;
try {
validateScenarioShape({
id: "invalid-health-scope",
surface: "fresh-install",
title: "Invalid Health Scope",
objective: "Scenario phase with an invalid health scope.",
tags: ["fresh-user"],
proves: ["baseline"],
thresholds: {},
phases: [{
id: "start",
title: "Start",
intent: "Start gateway.",
healthScope: "startup",
commands: ["ocm start {env} {startSelector} --json"],
evidence: ["start"]
}]
}, "invalid-health-scope.json");
} catch (error) {
rejectedInvalid = /healthScope must be one of/.test(error.message);
}
assertEqual(rejectedInvalid, true, "invalid healthScope rejected");
return {
id: "scenario-health-scope-validation",
status: "PASS",
command: "validate scenario health scope contracts",
durationMs: 0
};
} catch (error) {
return {
id: "scenario-health-scope-validation",
status: "FAIL",
command: "validate scenario health scope contracts",
durationMs: 0,
message: error.message
};
}
}
function scenarioStateCompatibilityCheck() {
try {
let rejected = false;

View File

@ -18,8 +18,8 @@
"preProviderMs": 10000,
"providerFinalMs": 3000,
"agentCleanupMs": 5000,
"healthP95Ms": 1000,
"peakRssMb": 900
"peakRssMb": 900,
"postReadyHealthP95Ms": 1000
},
"roleThresholds": {
"gateway": {
@ -81,7 +81,7 @@
"warmPreProviderMs",
"agentPreProviderP95Ms",
"agentCleanupMaxMs",
"healthP95Ms",
"postReadyHealthP95Ms",
"peakRssMb",
"providerTimeoutMentions",
"pluginLoadFailures"

View File

@ -14,8 +14,8 @@
"agentTurnMs": 45000,
"preProviderMs": 10000,
"providerFinalMs": 3000,
"healthP95Ms": 1000,
"peakRssMb": 900
"peakRssMb": 900,
"postReadyHealthP95Ms": 1000
},
"roleThresholds": {
"gateway": {
@ -63,7 +63,7 @@
"agentTurnP95Ms",
"agentTurnMaxMs",
"coldPreProviderMs",
"healthP95Ms",
"postReadyHealthP95Ms",
"peakRssMb",
"pluginLoadFailures"
]

View File

@ -14,8 +14,8 @@
"agentTurnMs": 45000,
"preProviderMs": 10000,
"providerFinalMs": 3000,
"healthP95Ms": 1000,
"peakRssMb": 900
"peakRssMb": 900,
"postReadyHealthP95Ms": 1000
},
"roleThresholds": {
"gateway": {
@ -61,7 +61,7 @@
"agentTurnMs",
"agentTurnP95Ms",
"coldPreProviderMs",
"healthP95Ms",
"postReadyHealthP95Ms",
"peakRssMb",
"pluginLoadFailures"
]

View File

@ -12,8 +12,8 @@
"thresholds": {
"coldReadyMs": 30000,
"warmReadyMs": 15000,
"healthP95Ms": 1000,
"peakRssMb": 900
"peakRssMb": 900,
"postReadyHealthP95Ms": 1000
},
"roleThresholds": {
"gateway": {
@ -60,7 +60,7 @@
"metrics": [
"coldReadyMs",
"warmReadyMs",
"healthP95Ms",
"postReadyHealthP95Ms",
"peakRssMb",
"eventLoopMaxMs"
]

View File

@ -14,8 +14,8 @@
"agentTurnMs": 45000,
"preProviderMs": 10000,
"providerFinalMs": 3000,
"healthP95Ms": 1000,
"peakRssMb": 900
"peakRssMb": 900,
"postReadyHealthP95Ms": 1000
},
"roleThresholds": {
"gateway": {
@ -62,7 +62,7 @@
"agentTurnMs",
"agentTurnP95Ms",
"coldPreProviderMs",
"healthP95Ms",
"postReadyHealthP95Ms",
"peakRssMb",
"pluginLoadFailures"
]

View File

@ -19,7 +19,7 @@
"rssGrowthMb": 300,
"gatewayRssGrowthMb": 300,
"soakHealthP95Ms": 1000,
"healthP95Ms": 1000
"postReadyHealthP95Ms": 1000
},
"roleThresholds": {
"gateway": {

View File

@ -14,8 +14,8 @@
"agentTurnMs": 45000,
"preProviderMs": 10000,
"providerFinalMs": 3000,
"healthP95Ms": 1000,
"peakRssMb": 900
"peakRssMb": 900,
"postReadyHealthP95Ms": 1000
},
"roleThresholds": {
"gateway": {
@ -61,7 +61,7 @@
"agentTurnMs",
"agentTurnP95Ms",
"coldPreProviderMs",
"healthP95Ms",
"postReadyHealthP95Ms",
"peakRssMb",
"pluginLoadFailures"
]

View File

@ -18,9 +18,9 @@
"modelsListMs": 20000,
"soakCommandP95Ms": 12000,
"soakHealthP95Ms": 1000,
"healthP95Ms": 1000,
"peakRssMb": 1000,
"eventLoopMaxMs": 500
"eventLoopMaxMs": 500,
"postReadyHealthP95Ms": 1000
},
"roleThresholds": {
"gateway": {
@ -74,7 +74,7 @@
"soakCommandP95Ms",
"soakHealthP95Ms",
"peakRssMb",
"healthP95Ms",
"postReadyHealthP95Ms",
"eventLoopMaxMs"
]
}