diff --git a/metrics/known.json b/metrics/known.json index 640c54a..f6e9c75 100644 --- a/metrics/known.json +++ b/metrics/known.json @@ -32,6 +32,11 @@ "maxCpuPercent", "missingDependencyErrors", "modelsListMs", + "mcpInitializeMs", + "mcpProcessLeaks", + "mcpShutdownMs", + "mcpToolCountMin", + "mcpToolsListMs", "openclawSlowestSpanMs", "openclawTimelineParseErrors", "peakRssMb", diff --git a/process-roles/mcp-runtime.json b/process-roles/mcp-runtime.json index b0303eb..2a2af36 100644 --- a/process-roles/mcp-runtime.json +++ b/process-roles/mcp-runtime.json @@ -2,6 +2,12 @@ "id": "mcp-runtime", "title": "MCP Runtime", "description": "MCP server and transport subprocesses started by OpenClaw or plugins.", - "commandPatterns": ["mcp"], - "processPatterns": ["mcp", "modelcontextprotocol"] + "commandPatterns": [ + "(^|\\s)mcp\\s+serve(\\s|$)", + "mcp-bridge-smoke\\.mjs" + ], + "processPatterns": [ + "(^|\\s)mcp\\s+serve(\\s|$)", + "mcp-bridge-smoke\\.mjs" + ] } diff --git a/profiles/exhaustive.json b/profiles/exhaustive.json index 717b3f9..522fa83 100644 --- a/profiles/exhaustive.json +++ b/profiles/exhaustive.json @@ -36,6 +36,7 @@ { "scenario": "agent-provider-recovery", "state": "mock-openai-provider", "timeoutMs": 240000 }, { "scenario": "dashboard-readiness", "state": "fresh" }, { "scenario": "tui-responsiveness", "state": "fresh" }, + { "scenario": "mcp-runtime-start-stop", "state": "fresh" }, { "scenario": "gateway-performance", "state": "many-bundled-plugins" }, { "scenario": "gateway-performance", "state": "gateway-already-running" }, { "scenario": "gateway-performance", "state": "stale-service-state" }, diff --git a/profiles/release.json b/profiles/release.json index c10e72b..5cfab2d 100644 --- a/profiles/release.json +++ b/profiles/release.json @@ -15,6 +15,7 @@ "doctor-cli": { "peakRssMb": 700, "maxCpuPercent": 300 }, "tui-cli": { "peakRssMb": 650, "maxCpuPercent": 250 }, "dashboard-cli": { "peakRssMb": 650, "maxCpuPercent": 250 }, + "mcp-runtime": { "peakRssMb": 500, "maxCpuPercent": 200 }, "browser-sidecar": { "peakRssMb": 500, "maxCpuPercent": 250 }, "mock-provider": { "peakRssMb": 300, "maxCpuPercent": 150 } }, @@ -78,6 +79,17 @@ "soakHealthP95Ms": 1000, "peakRssMb": 1000 } + }, + "mcp-runtime": { + "thresholds": { + "mcpInitializeMs": 10000, + "mcpToolsListMs": 10000, + "mcpShutdownMs": 5000, + "mcpToolCountMin": 1, + "mcpProcessLeaks": 0, + "statusMs": 10000, + "pluginLoadFailures": 0 + } } } }, @@ -124,6 +136,7 @@ "failure-containment:broken-plugin-deps", "soak:large-workspace", "workspace-scan:large-workspace", + "mcp-runtime:fresh", "cross-platform-smoke:slow-filesystem" ] }, @@ -146,7 +159,7 @@ "tui", "gateway-performance" ], - "warning": ["failure-containment", "soak", "workspace-scan", "cross-platform-smoke"] + "warning": ["failure-containment", "soak", "workspace-scan", "mcp-runtime", "cross-platform-smoke"] }, "scenarios": { "blocking": [ @@ -163,7 +176,8 @@ "gateway-performance" ], "warning": [ - "workspace-scan-pressure" + "workspace-scan-pressure", + "mcp-runtime-start-stop" ] } }, @@ -201,6 +215,7 @@ { "scenario": "failure-injection", "state": "broken-plugin-deps" }, { "scenario": "soak", "state": "large-workspace" }, { "scenario": "workspace-scan-pressure", "state": "large-workspace" }, + { "scenario": "mcp-runtime-start-stop", "state": "fresh" }, { "scenario": "cross-platform-smoke", "state": "slow-filesystem" } ] }, @@ -343,6 +358,10 @@ "state": "large-workspace", "timeoutMs": 240000 }, + { + "scenario": "mcp-runtime-start-stop", + "state": "fresh" + }, { "scenario": "cross-platform-smoke", "state": "slow-filesystem" diff --git a/scenarios/mcp-runtime-start-stop.json b/scenarios/mcp-runtime-start-stop.json new file mode 100644 index 0000000..6001c3b --- /dev/null +++ b/scenarios/mcp-runtime-start-stop.json @@ -0,0 +1,44 @@ +{ + "id": "mcp-runtime-start-stop", + "surface": "mcp-runtime", + "title": "MCP Runtime Start/Stop", + "objective": "Start OpenClaw's real MCP stdio bridge against the disposable gateway, perform a JSON-RPC initialize and tools/list smoke, then verify the bridge exits without leaking a runtime process.", + "tags": ["mcp", "stdio", "gateway", "runtime", "start-stop"], + "timeoutMs": 120000, + "thresholds": { + "gatewayReadyMs": 30000, + "gatewayReadyHardTimeoutMs": 120000, + "statusMs": 10000, + "mcpInitializeMs": 10000, + "mcpToolsListMs": 10000, + "mcpShutdownMs": 5000, + "mcpToolCountMin": 1, + "mcpProcessLeaks": 0, + "missingDependencyErrors": 0, + "pluginLoadFailures": 0, + "peakRssMb": 900 + }, + "phases": [ + { + "id": "gateway", + "title": "Gateway Start", + "intent": "Start the gateway and confirm it is healthy before opening the MCP stdio bridge.", + "commands": ["ocm start {env} {startSelector} --json", "ocm @{env} -- status"], + "evidence": ["gateway status", "gateway port", "readiness classification"] + }, + { + "id": "mcp-bridge", + "title": "MCP Bridge Smoke", + "intent": "Spawn the real OpenClaw MCP stdio bridge, initialize it, list tools, and close it cleanly.", + "commands": ["node {kovaRoot}/support/mcp-bridge-smoke.mjs --env {env} --artifact-dir {artifactDir} --timeout-ms 30000"], + "evidence": ["MCP initialize timing", "tools/list timing", "tool count", "bridge process exit"] + }, + { + "id": "post-mcp-health", + "title": "Post-MCP Gateway Health", + "intent": "Verify the gateway remains responsive after the MCP bridge starts and exits.", + "commands": ["ocm @{env} -- status", "ocm logs {env} --tail 300 --raw"], + "evidence": ["status after MCP bridge", "MCP bridge errors", "gateway errors"] + } + ] +} diff --git a/src/collectors/resources.mjs b/src/collectors/resources.mjs index 479e901..2637869 100644 --- a/src/collectors/resources.mjs +++ b/src/collectors/resources.mjs @@ -61,7 +61,7 @@ export function startResourceSampler(rootPid, options = {}) { roles.add("gateway-tree"); } if (roles.size > 0) { - for (const role of matchingRegistryRoles(process, options.rootCommand, roleMatchers)) { + for (const role of matchingRegistryRoles(process, options.rootCommand, roleMatchers, roles)) { roles.add(role); } } @@ -290,6 +290,12 @@ export function diffProcessSnapshots(before, after, options = {}) { }; } +export function classifyRegistryRolesForProcess(process, options = {}) { + const roleMatchers = compileRoleMatchers(options.processRoles ?? []); + const existingRoles = new Set(options.existingRoles ?? []); + return matchingRegistryRoles(process, options.rootCommand, roleMatchers, existingRoles); +} + function compileRoleMatchers(roles) { return roles.map((role) => ({ id: role.id, @@ -310,13 +316,14 @@ function compilePatterns(patterns) { }); } -function matchingRegistryRoles(process, rootCommand, roleMatchers) { +function matchingRegistryRoles(process, rootCommand, roleMatchers, existingRoles = new Set()) { const roles = []; + const isCommandTree = existingRoles.has("command-tree"); for (const role of roleMatchers) { if (role.id === "command-tree" || role.id === "gateway" || role.id === "gateway-tree") { continue; } - if (matchesAny(role.processPatterns, process.command) || matchesAny(role.commandPatterns, rootCommand) || + if (matchesAny(role.processPatterns, process.command) || (isCommandTree && matchesAny(role.commandPatterns, rootCommand)) || matchesAny(role.commandPatterns, process.command)) { roles.push(role.id); } diff --git a/src/evaluator.mjs b/src/evaluator.mjs index ea94a3e..ed63923 100644 --- a/src/evaluator.mjs +++ b/src/evaluator.mjs @@ -77,6 +77,7 @@ export function evaluateRecord(record, scenario, options = {}) { const healthFailures = countHealthFailures(record); const healthP95Ms = collectHealthP95(record); const soakEvidence = collectSoakEvidence(allResults); + const mcpBridgeEvidence = collectMcpBridgeEvidence(allResults); const listeningFailures = countListeningFailures(record); const tcpConnectMaxMs = collectTcpConnectMax(record); const timeToListeningMs = collectTimeToListening(record); @@ -230,6 +231,69 @@ export function evaluateRecord(record, scenario, options = {}) { }); } + if (mcpBridgeEvidence.available) { + if (typeof thresholds.mcpInitializeMs === "number" && mcpBridgeEvidence.initializeMs !== null && mcpBridgeEvidence.initializeMs > thresholds.mcpInitializeMs) { + violations.push({ + kind: "mcp", + metric: "mcpInitializeMs", + expected: `<= ${thresholds.mcpInitializeMs}`, + actual: mcpBridgeEvidence.initializeMs, + message: `MCP bridge initialize took ${mcpBridgeEvidence.initializeMs}ms, over threshold ${thresholds.mcpInitializeMs}ms` + }); + } + + if (typeof thresholds.mcpToolsListMs === "number" && mcpBridgeEvidence.toolsListMs !== null && mcpBridgeEvidence.toolsListMs > thresholds.mcpToolsListMs) { + violations.push({ + kind: "mcp", + metric: "mcpToolsListMs", + expected: `<= ${thresholds.mcpToolsListMs}`, + actual: mcpBridgeEvidence.toolsListMs, + message: `MCP tools/list took ${mcpBridgeEvidence.toolsListMs}ms, over threshold ${thresholds.mcpToolsListMs}ms` + }); + } + + if (typeof thresholds.mcpShutdownMs === "number" && mcpBridgeEvidence.shutdownMs !== null && mcpBridgeEvidence.shutdownMs > thresholds.mcpShutdownMs) { + violations.push({ + kind: "mcp", + metric: "mcpShutdownMs", + expected: `<= ${thresholds.mcpShutdownMs}`, + actual: mcpBridgeEvidence.shutdownMs, + message: `MCP bridge shutdown took ${mcpBridgeEvidence.shutdownMs}ms, over threshold ${thresholds.mcpShutdownMs}ms` + }); + } + + if (typeof thresholds.mcpToolCountMin === "number" && mcpBridgeEvidence.toolCount !== null && mcpBridgeEvidence.toolCount < thresholds.mcpToolCountMin) { + violations.push({ + kind: "mcp", + metric: "mcpToolCountMin", + expected: `>= ${thresholds.mcpToolCountMin}`, + actual: mcpBridgeEvidence.toolCount, + message: `MCP bridge exposed ${mcpBridgeEvidence.toolCount} tool(s), below required ${thresholds.mcpToolCountMin}` + }); + } + + const leakCount = mcpBridgeEvidence.processExited === false ? 1 : 0; + if (typeof thresholds.mcpProcessLeaks === "number" && leakCount > thresholds.mcpProcessLeaks) { + violations.push({ + kind: "mcp", + metric: "mcpProcessLeaks", + expected: `<= ${thresholds.mcpProcessLeaks}`, + actual: leakCount, + message: "MCP bridge process did not exit cleanly after the smoke" + }); + } + + if (mcpBridgeEvidence.errors.length > 0) { + violations.push({ + kind: "mcp", + metric: "mcpBridgeErrors", + expected: "0", + actual: mcpBridgeEvidence.errors.length, + message: `MCP bridge smoke reported ${mcpBridgeEvidence.errors.length} error(s): ${mcpBridgeEvidence.errors[0]}` + }); + } + } + if (typeof thresholds.rssGrowthMb === "number" && rssGrowthMb !== null && rssGrowthMb > thresholds.rssGrowthMb) { violations.push({ kind: "soak", @@ -495,6 +559,15 @@ export function evaluateRecord(record, scenario, options = {}) { healthFailures, healthP95Ms, soakEvidence, + mcpBridgeEvidence, + mcpInitializeMs: mcpBridgeEvidence.initializeMs, + mcpToolsListMs: mcpBridgeEvidence.toolsListMs, + mcpShutdownMs: mcpBridgeEvidence.shutdownMs, + mcpToolCount: mcpBridgeEvidence.toolCount, + mcpToolNames: mcpBridgeEvidence.toolNames, + mcpProcessExited: mcpBridgeEvidence.processExited, + mcpProcessLeaks: mcpBridgeEvidence.available ? (mcpBridgeEvidence.processExited === false ? 1 : 0) : null, + mcpErrors: mcpBridgeEvidence.errors, soakDurationMs: soakEvidence.durationMs, soakIterations: soakEvidence.iterations, soakCommandP95Ms: soakEvidence.commandP95Ms, @@ -1602,6 +1675,65 @@ function parseSoakLoopOutput(result) { } } +function collectMcpBridgeEvidence(results) { + const smokes = results + .filter((result) => result.command?.includes("mcp-bridge-smoke.mjs")) + .map((result) => parseMcpBridgeSmokeOutput(result)) + .filter(Boolean); + + if (smokes.length === 0) { + return { + schemaVersion: "kova.mcpBridgeEvidence.v1", + available: false, + initializeMs: null, + toolsListMs: null, + shutdownMs: null, + toolCount: null, + toolNames: [], + processExited: null, + errors: [], + smokes: [] + }; + } + + return { + schemaVersion: "kova.mcpBridgeEvidence.v1", + available: true, + initializeMs: maxNullable(...smokes.map((smoke) => smoke.initializeMs)), + toolsListMs: maxNullable(...smokes.map((smoke) => smoke.toolsListMs)), + shutdownMs: maxNullable(...smokes.map((smoke) => smoke.shutdownMs)), + toolCount: maxNullable(...smokes.map((smoke) => smoke.toolCount)), + toolNames: [...new Set(smokes.flatMap((smoke) => smoke.toolNames ?? []))].sort(), + processExited: smokes.every((smoke) => smoke.processExited === true), + errors: smokes.flatMap((smoke) => smoke.errors ?? []), + smokes: smokes.map((smoke) => ({ + durationMs: smoke.durationMs ?? null, + initializeMs: smoke.initializeMs ?? null, + toolsListMs: smoke.toolsListMs ?? null, + shutdownMs: smoke.shutdownMs ?? null, + toolCount: smoke.toolCount ?? null, + processExited: smoke.processExited ?? null, + exitStatus: smoke.exitStatus ?? null, + exitSignal: smoke.exitSignal ?? null, + errors: smoke.errors ?? [] + })) + }; +} + +function parseMcpBridgeSmokeOutput(result) { + const text = result.stdout ?? ""; + const jsonStart = text.indexOf("{"); + if (jsonStart < 0) { + return null; + } + try { + const parsed = JSON.parse(text.slice(jsonStart)); + return parsed?.schemaVersion === "kova.mcpBridgeSmoke.v1" ? parsed : null; + } catch { + return null; + } +} + function healthFailureCount(samples) { return samples.filter((sample) => sample && !sample.ok).length; } diff --git a/src/report.mjs b/src/report.mjs index 0be8081..1ef5e83 100644 --- a/src/report.mjs +++ b/src/report.mjs @@ -146,6 +146,9 @@ export function renderMarkdownReport(report) { lines.push(`- Structured event-loop delay: ${record.measurements.eventLoopDelayMs ?? "unknown"} ms`); lines.push(`- Runtime deps staging: ${record.measurements.runtimeDepsStagingMs ?? "unknown"} ms`); lines.push(`- Runtime deps warm reuse: ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"} (cold installs ${record.measurements.coldRuntimeDepsInstallCount ?? "unknown"}; warm restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm max ${record.measurements.warmRuntimeDepsStagingMs ?? "unknown"} ms)`); + if (record.measurements.mcpBridgeEvidence?.available) { + lines.push(`- MCP bridge: initialize ${record.measurements.mcpInitializeMs ?? "unknown"} ms; tools/list ${record.measurements.mcpToolsListMs ?? "unknown"} ms; tools ${record.measurements.mcpToolCount ?? "unknown"}; shutdown ${record.measurements.mcpShutdownMs ?? "unknown"} ms; exited ${record.measurements.mcpProcessExited ?? "unknown"}`); + } lines.push(`- Provider/model timing: ${record.measurements.providerModelTimingMs ?? "unknown"} ms`); lines.push(`- Agent turn: ${record.measurements.agentTurnMs ?? "unknown"} ms (${record.measurements.agentResponseOk ?? "not-run"})`); if (record.measurements.agentTurnCount > 0) { @@ -844,7 +847,7 @@ export function renderPasteSummary(report) { const roleText = compactRolePeaks(record.measurements).slice(0, 4) .map((role) => `${role.role} ${role.peakRssMb ?? "?"}MB/${role.maxCpuPercent ?? "?"}%`) .join(", ") || "unknown"; - lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`); + lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; MCP init/tools/shutdown ${record.measurements.mcpInitializeMs ?? "unknown"}/${record.measurements.mcpToolsListMs ?? "unknown"}/${record.measurements.mcpShutdownMs ?? "unknown"}ms; MCP tools ${record.measurements.mcpToolCount ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`); } } else if (record.violations?.length > 0) { if (record.measurements) { diff --git a/src/runner.mjs b/src/runner.mjs index 943d8c7..e1f920c 100644 --- a/src/runner.mjs +++ b/src/runner.mjs @@ -125,7 +125,7 @@ export async function executeScenario(scenario, context) { continue; } - const commands = materializeCommands(phase.commands ?? [], commandValues(context, envName)); + const commands = materializeScenarioPhaseCommands(phase, context, envName, artifactDir); const results = []; for (const [commandIndex, command] of commands.entries()) { const result = await runScenarioCommand(command, context, envName, artifactDir, phase.id, commandIndex, authPolicy); @@ -347,7 +347,7 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy) title: phase.title, intent: phase.intent, expectedAgentFailure: phase.expectedAgentFailure === true, - commands: materializeCommands(phase.commands ?? [], commandValues(context, envName, artifactDir)), + commands: materializeScenarioPhaseCommands(phase, context, envName, artifactDir), evidence: phase.evidence ?? [] }); @@ -429,6 +429,10 @@ function buildStateLifecyclePhase(context, envName, scenario, kind, steps, artif }; } +function materializeScenarioPhaseCommands(phase, context, envName, artifactDir) { + return materializeCommands(phase.commands ?? [], commandValues(context, envName, artifactDir)); +} + async function executeStateLifecycleSteps(context, envName, scenario, kind, steps, artifactDir, phaseId = null, authPolicy = null) { if (!Array.isArray(steps) || steps.length === 0) { return null; @@ -726,7 +730,7 @@ function commandValues(context, envName, artifactDir = "") { target: context.target, from: context.from ?? "", sourceEnv: quoteShell(context.sourceEnv ?? ""), - artifactDir, + artifactDir: artifactDir ? quoteShell(artifactDir) : "", kovaRoot: quoteShell(repoRoot), startSelector: context.targetPlan.startSelector, upgradeSelector: context.targetPlan.upgradeSelector, diff --git a/src/selfcheck.mjs b/src/selfcheck.mjs index d071513..5385d20 100644 --- a/src/selfcheck.mjs +++ b/src/selfcheck.mjs @@ -31,7 +31,7 @@ import { parseProviderRequestLog, parseTimelineProviderRequestLog } from "./collectors/provider.mjs"; -import { captureProcessSnapshot, diffProcessSnapshots } from "./collectors/resources.mjs"; +import { captureProcessSnapshot, classifyRegistryRolesForProcess, diffProcessSnapshots } from "./collectors/resources.mjs"; import { renderMarkdownReport, renderPasteSummary, renderReportSummary } from "./report.mjs"; import { compareReports, renderCompareSummary } from "./compare.mjs"; @@ -179,6 +179,20 @@ export async function runSelfCheck(flags = {}) { assertEqual(commands.some((command) => command.includes("ocm service restart")), true, "workspace restart command"); assertEqual(commands.some((command) => command.includes("run-soak-loop.mjs") && command.includes("--duration-ms 15000")), true, "workspace repeated command loop"); })); + checks.push(await jsonCommandCheck("mcp-runtime-dry-run-json", `node bin/kova.mjs run --target runtime:stable --scenario mcp-runtime-start-stop --state fresh --report-dir ${quoteShell(tmp)} --json`, async (data) => { + const report = JSON.parse(await readFile(data.jsonPath, "utf8")); + const record = report.records?.[0]; + assertEqual(record?.surface, "mcp-runtime", "MCP runtime surface"); + const commands = record?.phases?.flatMap((phase) => phase.commands ?? []) ?? []; + const bridgeCommand = commands.find((command) => command.includes("mcp-bridge-smoke.mjs")) ?? ""; + assertEqual(bridgeCommand.includes("--artifact-dir '"), true, "MCP bridge helper receives quoted artifact dir"); + assertEqual(commands.some((command) => command.includes("ocm start") && command.includes("--json")), true, "MCP gateway start command"); + assertEqual(record?.thresholds?.mcpProcessLeaks, 0, "MCP process leak threshold"); + })); + checks.push(await commandCheck( + "mcp-runtime-role-patterns", + "node -e \"const role=require('./process-roles/mcp-runtime.json'); if (role.commandPatterns.includes('mcp') || role.processPatterns.includes('mcp') || role.processPatterns.some((p)=>p.includes('modelcontextprotocol'))) process.exit(1);\"" + )); checks.push(await jsonCommandCheck("diagnostic-profile-plan-json", "node bin/kova.mjs matrix plan --profile diagnostic --target local-build:/tmp/openclaw --include scenario:release-runtime-startup --json", (data) => { assertEqual(data.schemaVersion, "kova.matrix.plan.v1", "diagnostic matrix plan schema"); assertEqual(data.profile?.id, "diagnostic", "diagnostic profile id"); @@ -231,6 +245,7 @@ export async function runSelfCheck(flags = {}) { checks.push(reportRecommendedNextScenarioCheck()); checks.push(readinessClassificationCheck()); checks.push(await resourceRoleAttributionCheck(tmp)); + checks.push(await resourceRootCommandRoleBoundaryCheck()); checks.push(await processSnapshotCheck(tmp)); checks.push(roleThresholdEvaluationCheck()); checks.push(thresholdPolicyCalibrationCheck()); @@ -251,6 +266,7 @@ export async function runSelfCheck(flags = {}) { checks.push(agentAuthFailureEvaluationCheck()); checks.push(await soakLoopRunnerCheck(tmp)); checks.push(soakTrendEvaluationCheck()); + checks.push(mcpBridgeEvidenceEvaluationCheck()); checks.push(await jsonCommandCheck( "dry-run-state-lifecycle-json", `node bin/kova.mjs run --target runtime:stable --scenario fresh-install --state missing-plugin-index --report-dir ${quoteShell(tmp)} --json`, @@ -2219,6 +2235,103 @@ function soakTrendEvaluationCheck() { } } +function mcpBridgeEvidenceEvaluationCheck() { + try { + const smoke = { + schemaVersion: "kova.mcpBridgeSmoke.v1", + durationMs: 1800, + initializeMs: 120, + toolsListMs: 90, + shutdownMs: 45, + toolCount: 8, + toolNames: ["conversations_list", "messages_read"], + processExited: true, + exitStatus: 0, + exitSignal: null, + errors: [] + }; + const record = { + scenario: "mcp-runtime-start-stop", + status: "PASS", + phases: [{ + id: "mcp-bridge", + results: [{ + command: "node support/mcp-bridge-smoke.mjs --env kova-self-check --artifact-dir /tmp/kova", + status: 0, + timedOut: false, + durationMs: 1800, + stdout: JSON.stringify(smoke), + stderr: "" + }], + metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() } + }], + finalMetrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() } + }; + evaluateRecord(record, { + id: "mcp-runtime-start-stop", + thresholds: { + mcpInitializeMs: 10000, + mcpToolsListMs: 10000, + mcpShutdownMs: 5000, + mcpToolCountMin: 1, + mcpProcessLeaks: 0, + pluginLoadFailures: 0 + } + }, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } }); + + assertEqual(record.status, "PASS", "MCP bridge record status"); + assertEqual(record.measurements.mcpInitializeMs, 120, "MCP initialize ms"); + assertEqual(record.measurements.mcpToolsListMs, 90, "MCP tools/list ms"); + assertEqual(record.measurements.mcpShutdownMs, 45, "MCP shutdown ms"); + assertEqual(record.measurements.mcpToolCount, 8, "MCP tool count"); + assertEqual(record.measurements.mcpProcessLeaks, 0, "MCP process leak count"); + + const leaked = { + ...record, + status: "PASS", + violations: [], + measurements: undefined, + phases: [{ + id: "mcp-bridge", + results: [{ + command: "node support/mcp-bridge-smoke.mjs --env kova-self-check --artifact-dir /tmp/kova", + status: 0, + timedOut: false, + durationMs: 1800, + stdout: JSON.stringify({ ...smoke, processExited: false }), + stderr: "" + }], + metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() } + }] + }; + evaluateRecord(leaked, { + id: "mcp-runtime-start-stop", + thresholds: { mcpProcessLeaks: 0 } + }, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } }); + assertEqual(leaked.status, "FAIL", "MCP leaked process status"); + assertEqual( + leaked.violations.some((violation) => violation.metric === "mcpProcessLeaks"), + true, + "MCP process leak violation" + ); + + return { + id: "mcp-bridge-evidence-evaluation", + status: "PASS", + command: "evaluate synthetic MCP bridge evidence", + durationMs: 0 + }; + } catch (error) { + return { + id: "mcp-bridge-evidence-evaluation", + status: "FAIL", + command: "evaluate synthetic MCP bridge evidence", + durationMs: 0, + message: error.message + }; + } +} + function agentColdWarmEvaluationCheck() { try { const coldCommand = "ocm @kova -- agent --local --agent main --session-id kova-agent-cold-warm --message hi --json"; @@ -2931,6 +3044,45 @@ async function resourceRoleAttributionCheck(tmp) { } } +async function resourceRootCommandRoleBoundaryCheck() { + try { + const processRoles = await loadProcessRoles(); + const gatewayRoles = classifyRegistryRolesForProcess( + { command: "openclaw-gateway" }, + { + processRoles, + rootCommand: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop", + existingRoles: ["gateway", "gateway-tree"] + } + ); + const commandRoles = classifyRegistryRolesForProcess( + { command: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop" }, + { + processRoles, + rootCommand: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop", + existingRoles: ["command-tree"] + } + ); + + assertEqual(gatewayRoles.includes("mcp-runtime"), false, "root command role must not tag gateway process"); + assertEqual(commandRoles.includes("mcp-runtime"), true, "root command role tags command tree process"); + return { + id: "resource-root-command-role-boundary", + status: "PASS", + command: "classify synthetic gateway and command-tree roles", + durationMs: 0 + }; + } catch (error) { + return { + id: "resource-root-command-role-boundary", + status: "FAIL", + command: "classify synthetic gateway and command-tree roles", + durationMs: 0, + message: error.message + }; + } +} + async function processSnapshotCheck(tmp) { const child = runCommand("node -e 'setTimeout(() => {}, 1200)'", { timeoutMs: 5000, diff --git a/states/fresh.json b/states/fresh.json index e8647ee..726398e 100644 --- a/states/fresh.json +++ b/states/fresh.json @@ -21,7 +21,8 @@ "plugin-bad-manifest", "plugin-missing-runtime-deps", "dashboard", - "tui" + "tui", + "mcp-runtime" ], "incompatibleSurfaces": [ "upgrade-existing-user" diff --git a/support/mcp-bridge-smoke.mjs b/support/mcp-bridge-smoke.mjs new file mode 100644 index 0000000..f1e0a13 --- /dev/null +++ b/support/mcp-bridge-smoke.mjs @@ -0,0 +1,338 @@ +#!/usr/bin/env node + +import { spawn } from "node:child_process"; +import { chmod, mkdir, readFile, rm, writeFile } from "node:fs/promises"; +import { join } from "node:path"; + +const SCHEMA_VERSION = "kova.mcpBridgeSmoke.v1"; + +const args = parseArgs(process.argv.slice(2)); +const envName = requiredArg(args, "env"); +const artifactDir = requiredArg(args, "artifact-dir"); +const timeoutMs = positiveInt(args["timeout-ms"] ?? 30000, "timeout-ms"); +assertKovaEnvName(envName); + +const startedAtEpochMs = Date.now(); +const summary = { + schemaVersion: SCHEMA_VERSION, + env: envName, + startedAt: new Date(startedAtEpochMs).toISOString(), + finishedAt: null, + durationMs: null, + gateway: null, + initializeMs: null, + toolsListMs: null, + shutdownMs: null, + toolCount: null, + toolNames: [], + processExited: false, + exitStatus: null, + exitSignal: null, + errors: [], + stderrSnippet: "" +}; + +let child; +let tokenFile; + +try { + const envInfo = await readOcmEnvInfo(envName, timeoutMs); + const config = JSON.parse(await readFile(envInfo.configPath, "utf8")); + const token = config?.gateway?.auth?.token; + if (typeof token !== "string" || token.length === 0) { + throw new Error(`gateway.auth.token missing in ${envInfo.configPath}`); + } + + await mkdir(artifactDir, { recursive: true }); + tokenFile = join(artifactDir, "mcp-gateway-token"); + await writeFile(tokenFile, token, { encoding: "utf8", mode: 0o600 }); + await chmod(tokenFile, 0o600); + + const gatewayPort = Number(envInfo.gatewayPort ?? config?.gateway?.port); + if (!Number.isInteger(gatewayPort) || gatewayPort <= 0) { + throw new Error("gateway port missing from OCM env metadata and OpenClaw config"); + } + const gatewayUrl = `ws://127.0.0.1:${gatewayPort}`; + summary.gateway = { port: gatewayPort, url: gatewayUrl }; + + child = spawn("ocm", [ + `@${envName}`, + "--", + "mcp", + "serve", + "--url", + gatewayUrl, + "--token-file", + tokenFile, + "--claude-channel-mode", + "off" + ], { + stdio: ["pipe", "pipe", "pipe"], + shell: false, + env: process.env + }); + + const transport = createJsonLineTransport(child); + await transport.waitForSpawn(); + + const initializeStarted = Date.now(); + await transport.request("initialize", { + protocolVersion: "2024-11-05", + capabilities: {}, + clientInfo: { name: "kova-mcp-bridge-smoke", version: "1.0.0" } + }, timeoutMs); + summary.initializeMs = Date.now() - initializeStarted; + + transport.notify("notifications/initialized", {}); + + const listStarted = Date.now(); + const tools = await transport.request("tools/list", {}, timeoutMs); + summary.toolsListMs = Date.now() - listStarted; + const toolList = Array.isArray(tools?.tools) ? tools.tools : []; + summary.toolCount = toolList.length; + summary.toolNames = toolList.map((tool) => tool?.name).filter((name) => typeof name === "string").sort(); + + const shutdownStarted = Date.now(); + child.stdin.end(); + const exit = await waitForExit(child, Math.min(timeoutMs, 5000)); + summary.shutdownMs = Date.now() - shutdownStarted; + summary.processExited = true; + summary.exitStatus = exit.status; + summary.exitSignal = exit.signal; +} catch (error) { + summary.errors.push(formatError(error)); + if (child && !summary.processExited) { + child.kill("SIGTERM"); + try { + const exit = await waitForExit(child, 3000); + summary.processExited = true; + summary.exitStatus = exit.status; + summary.exitSignal = exit.signal; + } catch { + child.kill("SIGKILL"); + } + } +} finally { + if (child?.stderrText) { + summary.stderrSnippet = child.stderrText.slice(-4000); + } + if (tokenFile) { + await rm(tokenFile, { force: true }); + } + const finishedAtEpochMs = Date.now(); + summary.finishedAt = new Date(finishedAtEpochMs).toISOString(); + summary.durationMs = finishedAtEpochMs - startedAtEpochMs; + console.log(JSON.stringify(summary, null, 2)); +} + +process.exit(summary.errors.length === 0 && summary.processExited ? 0 : 1); + +function createJsonLineTransport(processHandle) { + let nextId = 1; + let stdout = ""; + const pending = new Map(); + let spawnError; + let spawned = false; + + processHandle.stderrText = ""; + + processHandle.stdout.on("data", (chunk) => { + stdout += chunk.toString("utf8"); + for (;;) { + const newline = stdout.indexOf("\n"); + if (newline < 0) { + break; + } + const line = stdout.slice(0, newline).replace(/\r$/, ""); + stdout = stdout.slice(newline + 1); + if (line.trim().length === 0) { + continue; + } + let message; + try { + message = JSON.parse(line); + } catch { + continue; + } + const waiter = pending.get(message.id); + if (!waiter) { + continue; + } + pending.delete(message.id); + if (message.error) { + waiter.reject(new Error(message.error.message ?? JSON.stringify(message.error))); + } else { + waiter.resolve(message.result); + } + } + }); + + processHandle.stderr.on("data", (chunk) => { + processHandle.stderrText += chunk.toString("utf8"); + }); + processHandle.stdin.on("error", (error) => { + for (const waiter of pending.values()) { + waiter.reject(error); + } + pending.clear(); + }); + + processHandle.on("spawn", () => { + spawned = true; + }); + processHandle.on("error", (error) => { + spawnError = error; + for (const waiter of pending.values()) { + waiter.reject(error); + } + pending.clear(); + }); + processHandle.on("exit", (status, signal) => { + const error = new Error(`MCP bridge exited before reply (status=${status ?? "null"}, signal=${signal ?? "none"})`); + for (const waiter of pending.values()) { + waiter.reject(error); + } + pending.clear(); + }); + + return { + async waitForSpawn() { + const deadline = Date.now() + 5000; + while (!spawned) { + if (spawnError) { + throw spawnError; + } + if (Date.now() >= deadline) { + throw new Error("MCP bridge process did not spawn"); + } + await sleep(25); + } + }, + request(method, params, requestTimeoutMs) { + const id = nextId; + nextId += 1; + const payload = { jsonrpc: "2.0", id, method, params }; + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + pending.delete(id); + reject(new Error(`${method} timed out after ${requestTimeoutMs}ms`)); + }, requestTimeoutMs); + pending.set(id, { + resolve: (value) => { + clearTimeout(timer); + resolve(value); + }, + reject: (error) => { + clearTimeout(timer); + reject(error); + } + }); + processHandle.stdin.write(`${JSON.stringify(payload)}\n`); + }); + }, + notify(method, params) { + processHandle.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", method, params })}\n`); + } + }; +} + +async function readOcmEnvInfo(env, timeoutMs) { + const result = await runProcess("ocm", ["env", "show", env, "--json"], timeoutMs); + if (result.status !== 0) { + throw new Error(`ocm env show failed: ${firstLine(result.stderr) || firstLine(result.stdout) || result.status}`); + } + return JSON.parse(result.stdout); +} + +function runProcess(command, args, timeoutMs) { + return new Promise((resolve) => { + const child = spawn(command, args, { stdio: ["ignore", "pipe", "pipe"], env: process.env }); + let stdout = ""; + let stderr = ""; + let timedOut = false; + const timer = setTimeout(() => { + timedOut = true; + child.kill("SIGTERM"); + setTimeout(() => child.kill("SIGKILL"), 3000).unref(); + }, timeoutMs); + child.stdout.on("data", (chunk) => { + stdout += chunk.toString("utf8"); + }); + child.stderr.on("data", (chunk) => { + stderr += chunk.toString("utf8"); + }); + child.on("error", (error) => { + clearTimeout(timer); + resolve({ status: 127, signal: null, timedOut, stdout, stderr: error.message }); + }); + child.on("close", (status, signal) => { + clearTimeout(timer); + resolve({ status: timedOut ? 124 : (status ?? 1), signal, timedOut, stdout, stderr }); + }); + }); +} + +function waitForExit(child, timeoutMs) { + if (child.exitCode !== null || child.signalCode !== null) { + return Promise.resolve({ status: child.exitCode, signal: child.signalCode }); + } + return new Promise((resolve, reject) => { + const timer = setTimeout(() => reject(new Error(`process did not exit within ${timeoutMs}ms`)), timeoutMs); + child.once("exit", (status, signal) => { + clearTimeout(timer); + resolve({ status, signal }); + }); + }); +} + +function parseArgs(values) { + const parsed = {}; + for (let index = 0; index < values.length; index += 1) { + const value = values[index]; + if (!value.startsWith("--")) { + throw new Error(`unexpected positional argument '${value}'`); + } + const key = value.slice(2); + const next = values[index + 1]; + if (!next || next.startsWith("--")) { + throw new Error(`missing value for --${key}`); + } + parsed[key] = next; + index += 1; + } + return parsed; +} + +function requiredArg(values, key) { + const value = values[key]; + if (typeof value !== "string" || value.length === 0) { + throw new Error(`missing --${key}`); + } + return value; +} + +function positiveInt(value, key) { + const number = Number(value); + if (!Number.isInteger(number) || number <= 0) { + throw new Error(`--${key} must be a positive integer`); + } + return number; +} + +function assertKovaEnvName(value) { + if (!/^kova-[A-Za-z0-9][A-Za-z0-9._-]*$/.test(value)) { + throw new Error(`unsafe Kova env name '${value}'`); + } +} + +function formatError(error) { + return error instanceof Error ? error.message : String(error); +} + +function firstLine(value) { + return String(value ?? "").trim().split(/\r?\n/)[0] ?? ""; +} + +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/surfaces/mcp-runtime.json b/surfaces/mcp-runtime.json new file mode 100644 index 0000000..ddf5fe2 --- /dev/null +++ b/surfaces/mcp-runtime.json @@ -0,0 +1,30 @@ +{ + "id": "mcp-runtime", + "title": "MCP Runtime", + "ownerArea": "mcp-runtime", + "description": "Start OpenClaw's MCP stdio bridge against a running gateway, list exposed tools, and prove the bridge process stops cleanly.", + "requiredStates": ["fresh"], + "targetKinds": ["npm", "channel", "runtime", "local-build"], + "requiredMetrics": ["mcpInitializeMs", "mcpToolsListMs", "mcpShutdownMs", "mcpToolCountMin", "mcpProcessLeaks", "gatewayReadyMs", "statusMs", "pluginLoadFailures", "peakRssMb"], + "processRoles": ["gateway", "command-tree", "mcp-runtime"], + "thresholds": { + "gatewayReadyMs": 30000, + "statusMs": 10000, + "mcpInitializeMs": 10000, + "mcpToolsListMs": 10000, + "mcpShutdownMs": 5000, + "mcpToolCountMin": 1, + "mcpProcessLeaks": 0, + "pluginLoadFailures": 0, + "peakRssMb": 900 + }, + "roleThresholds": { + "gateway": { "peakRssMb": 800, "maxCpuPercent": 250 }, + "mcp-runtime": { "peakRssMb": 500, "maxCpuPercent": 200 }, + "command-tree": { "peakRssMb": 900, "maxCpuPercent": 300 } + }, + "diagnostics": { + "timelineRequiredForSourceBuild": false, + "expectedSpans": ["mcp.runtime.start", "mcp.runtime.stop", "gateway.websocket"] + } +}