feat: add mcp runtime validation scenario

This commit is contained in:
Shakker 2026-05-01 11:41:26 +01:00
parent b651614046
commit 3b04052170
No known key found for this signature in database
13 changed files with 755 additions and 13 deletions

View File

@ -32,6 +32,11 @@
"maxCpuPercent",
"missingDependencyErrors",
"modelsListMs",
"mcpInitializeMs",
"mcpProcessLeaks",
"mcpShutdownMs",
"mcpToolCountMin",
"mcpToolsListMs",
"openclawSlowestSpanMs",
"openclawTimelineParseErrors",
"peakRssMb",

View File

@ -2,6 +2,12 @@
"id": "mcp-runtime",
"title": "MCP Runtime",
"description": "MCP server and transport subprocesses started by OpenClaw or plugins.",
"commandPatterns": ["mcp"],
"processPatterns": ["mcp", "modelcontextprotocol"]
"commandPatterns": [
"(^|\\s)mcp\\s+serve(\\s|$)",
"mcp-bridge-smoke\\.mjs"
],
"processPatterns": [
"(^|\\s)mcp\\s+serve(\\s|$)",
"mcp-bridge-smoke\\.mjs"
]
}

View File

@ -36,6 +36,7 @@
{ "scenario": "agent-provider-recovery", "state": "mock-openai-provider", "timeoutMs": 240000 },
{ "scenario": "dashboard-readiness", "state": "fresh" },
{ "scenario": "tui-responsiveness", "state": "fresh" },
{ "scenario": "mcp-runtime-start-stop", "state": "fresh" },
{ "scenario": "gateway-performance", "state": "many-bundled-plugins" },
{ "scenario": "gateway-performance", "state": "gateway-already-running" },
{ "scenario": "gateway-performance", "state": "stale-service-state" },

View File

@ -15,6 +15,7 @@
"doctor-cli": { "peakRssMb": 700, "maxCpuPercent": 300 },
"tui-cli": { "peakRssMb": 650, "maxCpuPercent": 250 },
"dashboard-cli": { "peakRssMb": 650, "maxCpuPercent": 250 },
"mcp-runtime": { "peakRssMb": 500, "maxCpuPercent": 200 },
"browser-sidecar": { "peakRssMb": 500, "maxCpuPercent": 250 },
"mock-provider": { "peakRssMb": 300, "maxCpuPercent": 150 }
},
@ -78,6 +79,17 @@
"soakHealthP95Ms": 1000,
"peakRssMb": 1000
}
},
"mcp-runtime": {
"thresholds": {
"mcpInitializeMs": 10000,
"mcpToolsListMs": 10000,
"mcpShutdownMs": 5000,
"mcpToolCountMin": 1,
"mcpProcessLeaks": 0,
"statusMs": 10000,
"pluginLoadFailures": 0
}
}
}
},
@ -124,6 +136,7 @@
"failure-containment:broken-plugin-deps",
"soak:large-workspace",
"workspace-scan:large-workspace",
"mcp-runtime:fresh",
"cross-platform-smoke:slow-filesystem"
]
},
@ -146,7 +159,7 @@
"tui",
"gateway-performance"
],
"warning": ["failure-containment", "soak", "workspace-scan", "cross-platform-smoke"]
"warning": ["failure-containment", "soak", "workspace-scan", "mcp-runtime", "cross-platform-smoke"]
},
"scenarios": {
"blocking": [
@ -163,7 +176,8 @@
"gateway-performance"
],
"warning": [
"workspace-scan-pressure"
"workspace-scan-pressure",
"mcp-runtime-start-stop"
]
}
},
@ -201,6 +215,7 @@
{ "scenario": "failure-injection", "state": "broken-plugin-deps" },
{ "scenario": "soak", "state": "large-workspace" },
{ "scenario": "workspace-scan-pressure", "state": "large-workspace" },
{ "scenario": "mcp-runtime-start-stop", "state": "fresh" },
{ "scenario": "cross-platform-smoke", "state": "slow-filesystem" }
]
},
@ -343,6 +358,10 @@
"state": "large-workspace",
"timeoutMs": 240000
},
{
"scenario": "mcp-runtime-start-stop",
"state": "fresh"
},
{
"scenario": "cross-platform-smoke",
"state": "slow-filesystem"

View File

@ -0,0 +1,44 @@
{
"id": "mcp-runtime-start-stop",
"surface": "mcp-runtime",
"title": "MCP Runtime Start/Stop",
"objective": "Start OpenClaw's real MCP stdio bridge against the disposable gateway, perform a JSON-RPC initialize and tools/list smoke, then verify the bridge exits without leaking a runtime process.",
"tags": ["mcp", "stdio", "gateway", "runtime", "start-stop"],
"timeoutMs": 120000,
"thresholds": {
"gatewayReadyMs": 30000,
"gatewayReadyHardTimeoutMs": 120000,
"statusMs": 10000,
"mcpInitializeMs": 10000,
"mcpToolsListMs": 10000,
"mcpShutdownMs": 5000,
"mcpToolCountMin": 1,
"mcpProcessLeaks": 0,
"missingDependencyErrors": 0,
"pluginLoadFailures": 0,
"peakRssMb": 900
},
"phases": [
{
"id": "gateway",
"title": "Gateway Start",
"intent": "Start the gateway and confirm it is healthy before opening the MCP stdio bridge.",
"commands": ["ocm start {env} {startSelector} --json", "ocm @{env} -- status"],
"evidence": ["gateway status", "gateway port", "readiness classification"]
},
{
"id": "mcp-bridge",
"title": "MCP Bridge Smoke",
"intent": "Spawn the real OpenClaw MCP stdio bridge, initialize it, list tools, and close it cleanly.",
"commands": ["node {kovaRoot}/support/mcp-bridge-smoke.mjs --env {env} --artifact-dir {artifactDir} --timeout-ms 30000"],
"evidence": ["MCP initialize timing", "tools/list timing", "tool count", "bridge process exit"]
},
{
"id": "post-mcp-health",
"title": "Post-MCP Gateway Health",
"intent": "Verify the gateway remains responsive after the MCP bridge starts and exits.",
"commands": ["ocm @{env} -- status", "ocm logs {env} --tail 300 --raw"],
"evidence": ["status after MCP bridge", "MCP bridge errors", "gateway errors"]
}
]
}

View File

@ -61,7 +61,7 @@ export function startResourceSampler(rootPid, options = {}) {
roles.add("gateway-tree");
}
if (roles.size > 0) {
for (const role of matchingRegistryRoles(process, options.rootCommand, roleMatchers)) {
for (const role of matchingRegistryRoles(process, options.rootCommand, roleMatchers, roles)) {
roles.add(role);
}
}
@ -290,6 +290,12 @@ export function diffProcessSnapshots(before, after, options = {}) {
};
}
export function classifyRegistryRolesForProcess(process, options = {}) {
const roleMatchers = compileRoleMatchers(options.processRoles ?? []);
const existingRoles = new Set(options.existingRoles ?? []);
return matchingRegistryRoles(process, options.rootCommand, roleMatchers, existingRoles);
}
function compileRoleMatchers(roles) {
return roles.map((role) => ({
id: role.id,
@ -310,13 +316,14 @@ function compilePatterns(patterns) {
});
}
function matchingRegistryRoles(process, rootCommand, roleMatchers) {
function matchingRegistryRoles(process, rootCommand, roleMatchers, existingRoles = new Set()) {
const roles = [];
const isCommandTree = existingRoles.has("command-tree");
for (const role of roleMatchers) {
if (role.id === "command-tree" || role.id === "gateway" || role.id === "gateway-tree") {
continue;
}
if (matchesAny(role.processPatterns, process.command) || matchesAny(role.commandPatterns, rootCommand) ||
if (matchesAny(role.processPatterns, process.command) || (isCommandTree && matchesAny(role.commandPatterns, rootCommand)) ||
matchesAny(role.commandPatterns, process.command)) {
roles.push(role.id);
}

View File

@ -77,6 +77,7 @@ export function evaluateRecord(record, scenario, options = {}) {
const healthFailures = countHealthFailures(record);
const healthP95Ms = collectHealthP95(record);
const soakEvidence = collectSoakEvidence(allResults);
const mcpBridgeEvidence = collectMcpBridgeEvidence(allResults);
const listeningFailures = countListeningFailures(record);
const tcpConnectMaxMs = collectTcpConnectMax(record);
const timeToListeningMs = collectTimeToListening(record);
@ -230,6 +231,69 @@ export function evaluateRecord(record, scenario, options = {}) {
});
}
if (mcpBridgeEvidence.available) {
if (typeof thresholds.mcpInitializeMs === "number" && mcpBridgeEvidence.initializeMs !== null && mcpBridgeEvidence.initializeMs > thresholds.mcpInitializeMs) {
violations.push({
kind: "mcp",
metric: "mcpInitializeMs",
expected: `<= ${thresholds.mcpInitializeMs}`,
actual: mcpBridgeEvidence.initializeMs,
message: `MCP bridge initialize took ${mcpBridgeEvidence.initializeMs}ms, over threshold ${thresholds.mcpInitializeMs}ms`
});
}
if (typeof thresholds.mcpToolsListMs === "number" && mcpBridgeEvidence.toolsListMs !== null && mcpBridgeEvidence.toolsListMs > thresholds.mcpToolsListMs) {
violations.push({
kind: "mcp",
metric: "mcpToolsListMs",
expected: `<= ${thresholds.mcpToolsListMs}`,
actual: mcpBridgeEvidence.toolsListMs,
message: `MCP tools/list took ${mcpBridgeEvidence.toolsListMs}ms, over threshold ${thresholds.mcpToolsListMs}ms`
});
}
if (typeof thresholds.mcpShutdownMs === "number" && mcpBridgeEvidence.shutdownMs !== null && mcpBridgeEvidence.shutdownMs > thresholds.mcpShutdownMs) {
violations.push({
kind: "mcp",
metric: "mcpShutdownMs",
expected: `<= ${thresholds.mcpShutdownMs}`,
actual: mcpBridgeEvidence.shutdownMs,
message: `MCP bridge shutdown took ${mcpBridgeEvidence.shutdownMs}ms, over threshold ${thresholds.mcpShutdownMs}ms`
});
}
if (typeof thresholds.mcpToolCountMin === "number" && mcpBridgeEvidence.toolCount !== null && mcpBridgeEvidence.toolCount < thresholds.mcpToolCountMin) {
violations.push({
kind: "mcp",
metric: "mcpToolCountMin",
expected: `>= ${thresholds.mcpToolCountMin}`,
actual: mcpBridgeEvidence.toolCount,
message: `MCP bridge exposed ${mcpBridgeEvidence.toolCount} tool(s), below required ${thresholds.mcpToolCountMin}`
});
}
const leakCount = mcpBridgeEvidence.processExited === false ? 1 : 0;
if (typeof thresholds.mcpProcessLeaks === "number" && leakCount > thresholds.mcpProcessLeaks) {
violations.push({
kind: "mcp",
metric: "mcpProcessLeaks",
expected: `<= ${thresholds.mcpProcessLeaks}`,
actual: leakCount,
message: "MCP bridge process did not exit cleanly after the smoke"
});
}
if (mcpBridgeEvidence.errors.length > 0) {
violations.push({
kind: "mcp",
metric: "mcpBridgeErrors",
expected: "0",
actual: mcpBridgeEvidence.errors.length,
message: `MCP bridge smoke reported ${mcpBridgeEvidence.errors.length} error(s): ${mcpBridgeEvidence.errors[0]}`
});
}
}
if (typeof thresholds.rssGrowthMb === "number" && rssGrowthMb !== null && rssGrowthMb > thresholds.rssGrowthMb) {
violations.push({
kind: "soak",
@ -495,6 +559,15 @@ export function evaluateRecord(record, scenario, options = {}) {
healthFailures,
healthP95Ms,
soakEvidence,
mcpBridgeEvidence,
mcpInitializeMs: mcpBridgeEvidence.initializeMs,
mcpToolsListMs: mcpBridgeEvidence.toolsListMs,
mcpShutdownMs: mcpBridgeEvidence.shutdownMs,
mcpToolCount: mcpBridgeEvidence.toolCount,
mcpToolNames: mcpBridgeEvidence.toolNames,
mcpProcessExited: mcpBridgeEvidence.processExited,
mcpProcessLeaks: mcpBridgeEvidence.available ? (mcpBridgeEvidence.processExited === false ? 1 : 0) : null,
mcpErrors: mcpBridgeEvidence.errors,
soakDurationMs: soakEvidence.durationMs,
soakIterations: soakEvidence.iterations,
soakCommandP95Ms: soakEvidence.commandP95Ms,
@ -1602,6 +1675,65 @@ function parseSoakLoopOutput(result) {
}
}
function collectMcpBridgeEvidence(results) {
const smokes = results
.filter((result) => result.command?.includes("mcp-bridge-smoke.mjs"))
.map((result) => parseMcpBridgeSmokeOutput(result))
.filter(Boolean);
if (smokes.length === 0) {
return {
schemaVersion: "kova.mcpBridgeEvidence.v1",
available: false,
initializeMs: null,
toolsListMs: null,
shutdownMs: null,
toolCount: null,
toolNames: [],
processExited: null,
errors: [],
smokes: []
};
}
return {
schemaVersion: "kova.mcpBridgeEvidence.v1",
available: true,
initializeMs: maxNullable(...smokes.map((smoke) => smoke.initializeMs)),
toolsListMs: maxNullable(...smokes.map((smoke) => smoke.toolsListMs)),
shutdownMs: maxNullable(...smokes.map((smoke) => smoke.shutdownMs)),
toolCount: maxNullable(...smokes.map((smoke) => smoke.toolCount)),
toolNames: [...new Set(smokes.flatMap((smoke) => smoke.toolNames ?? []))].sort(),
processExited: smokes.every((smoke) => smoke.processExited === true),
errors: smokes.flatMap((smoke) => smoke.errors ?? []),
smokes: smokes.map((smoke) => ({
durationMs: smoke.durationMs ?? null,
initializeMs: smoke.initializeMs ?? null,
toolsListMs: smoke.toolsListMs ?? null,
shutdownMs: smoke.shutdownMs ?? null,
toolCount: smoke.toolCount ?? null,
processExited: smoke.processExited ?? null,
exitStatus: smoke.exitStatus ?? null,
exitSignal: smoke.exitSignal ?? null,
errors: smoke.errors ?? []
}))
};
}
function parseMcpBridgeSmokeOutput(result) {
const text = result.stdout ?? "";
const jsonStart = text.indexOf("{");
if (jsonStart < 0) {
return null;
}
try {
const parsed = JSON.parse(text.slice(jsonStart));
return parsed?.schemaVersion === "kova.mcpBridgeSmoke.v1" ? parsed : null;
} catch {
return null;
}
}
function healthFailureCount(samples) {
return samples.filter((sample) => sample && !sample.ok).length;
}

View File

@ -146,6 +146,9 @@ export function renderMarkdownReport(report) {
lines.push(`- Structured event-loop delay: ${record.measurements.eventLoopDelayMs ?? "unknown"} ms`);
lines.push(`- Runtime deps staging: ${record.measurements.runtimeDepsStagingMs ?? "unknown"} ms`);
lines.push(`- Runtime deps warm reuse: ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"} (cold installs ${record.measurements.coldRuntimeDepsInstallCount ?? "unknown"}; warm restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm max ${record.measurements.warmRuntimeDepsStagingMs ?? "unknown"} ms)`);
if (record.measurements.mcpBridgeEvidence?.available) {
lines.push(`- MCP bridge: initialize ${record.measurements.mcpInitializeMs ?? "unknown"} ms; tools/list ${record.measurements.mcpToolsListMs ?? "unknown"} ms; tools ${record.measurements.mcpToolCount ?? "unknown"}; shutdown ${record.measurements.mcpShutdownMs ?? "unknown"} ms; exited ${record.measurements.mcpProcessExited ?? "unknown"}`);
}
lines.push(`- Provider/model timing: ${record.measurements.providerModelTimingMs ?? "unknown"} ms`);
lines.push(`- Agent turn: ${record.measurements.agentTurnMs ?? "unknown"} ms (${record.measurements.agentResponseOk ?? "not-run"})`);
if (record.measurements.agentTurnCount > 0) {
@ -844,7 +847,7 @@ export function renderPasteSummary(report) {
const roleText = compactRolePeaks(record.measurements).slice(0, 4)
.map((role) => `${role.role} ${role.peakRssMb ?? "?"}MB/${role.maxCpuPercent ?? "?"}%`)
.join(", ") || "unknown";
lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`);
lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; MCP init/tools/shutdown ${record.measurements.mcpInitializeMs ?? "unknown"}/${record.measurements.mcpToolsListMs ?? "unknown"}/${record.measurements.mcpShutdownMs ?? "unknown"}ms; MCP tools ${record.measurements.mcpToolCount ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`);
}
} else if (record.violations?.length > 0) {
if (record.measurements) {

View File

@ -125,7 +125,7 @@ export async function executeScenario(scenario, context) {
continue;
}
const commands = materializeCommands(phase.commands ?? [], commandValues(context, envName));
const commands = materializeScenarioPhaseCommands(phase, context, envName, artifactDir);
const results = [];
for (const [commandIndex, command] of commands.entries()) {
const result = await runScenarioCommand(command, context, envName, artifactDir, phase.id, commandIndex, authPolicy);
@ -347,7 +347,7 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
title: phase.title,
intent: phase.intent,
expectedAgentFailure: phase.expectedAgentFailure === true,
commands: materializeCommands(phase.commands ?? [], commandValues(context, envName, artifactDir)),
commands: materializeScenarioPhaseCommands(phase, context, envName, artifactDir),
evidence: phase.evidence ?? []
});
@ -429,6 +429,10 @@ function buildStateLifecyclePhase(context, envName, scenario, kind, steps, artif
};
}
function materializeScenarioPhaseCommands(phase, context, envName, artifactDir) {
return materializeCommands(phase.commands ?? [], commandValues(context, envName, artifactDir));
}
async function executeStateLifecycleSteps(context, envName, scenario, kind, steps, artifactDir, phaseId = null, authPolicy = null) {
if (!Array.isArray(steps) || steps.length === 0) {
return null;
@ -726,7 +730,7 @@ function commandValues(context, envName, artifactDir = "") {
target: context.target,
from: context.from ?? "",
sourceEnv: quoteShell(context.sourceEnv ?? ""),
artifactDir,
artifactDir: artifactDir ? quoteShell(artifactDir) : "",
kovaRoot: quoteShell(repoRoot),
startSelector: context.targetPlan.startSelector,
upgradeSelector: context.targetPlan.upgradeSelector,

View File

@ -31,7 +31,7 @@ import {
parseProviderRequestLog,
parseTimelineProviderRequestLog
} from "./collectors/provider.mjs";
import { captureProcessSnapshot, diffProcessSnapshots } from "./collectors/resources.mjs";
import { captureProcessSnapshot, classifyRegistryRolesForProcess, diffProcessSnapshots } from "./collectors/resources.mjs";
import { renderMarkdownReport, renderPasteSummary, renderReportSummary } from "./report.mjs";
import { compareReports, renderCompareSummary } from "./compare.mjs";
@ -179,6 +179,20 @@ export async function runSelfCheck(flags = {}) {
assertEqual(commands.some((command) => command.includes("ocm service restart")), true, "workspace restart command");
assertEqual(commands.some((command) => command.includes("run-soak-loop.mjs") && command.includes("--duration-ms 15000")), true, "workspace repeated command loop");
}));
checks.push(await jsonCommandCheck("mcp-runtime-dry-run-json", `node bin/kova.mjs run --target runtime:stable --scenario mcp-runtime-start-stop --state fresh --report-dir ${quoteShell(tmp)} --json`, async (data) => {
const report = JSON.parse(await readFile(data.jsonPath, "utf8"));
const record = report.records?.[0];
assertEqual(record?.surface, "mcp-runtime", "MCP runtime surface");
const commands = record?.phases?.flatMap((phase) => phase.commands ?? []) ?? [];
const bridgeCommand = commands.find((command) => command.includes("mcp-bridge-smoke.mjs")) ?? "";
assertEqual(bridgeCommand.includes("--artifact-dir '"), true, "MCP bridge helper receives quoted artifact dir");
assertEqual(commands.some((command) => command.includes("ocm start") && command.includes("--json")), true, "MCP gateway start command");
assertEqual(record?.thresholds?.mcpProcessLeaks, 0, "MCP process leak threshold");
}));
checks.push(await commandCheck(
"mcp-runtime-role-patterns",
"node -e \"const role=require('./process-roles/mcp-runtime.json'); if (role.commandPatterns.includes('mcp') || role.processPatterns.includes('mcp') || role.processPatterns.some((p)=>p.includes('modelcontextprotocol'))) process.exit(1);\""
));
checks.push(await jsonCommandCheck("diagnostic-profile-plan-json", "node bin/kova.mjs matrix plan --profile diagnostic --target local-build:/tmp/openclaw --include scenario:release-runtime-startup --json", (data) => {
assertEqual(data.schemaVersion, "kova.matrix.plan.v1", "diagnostic matrix plan schema");
assertEqual(data.profile?.id, "diagnostic", "diagnostic profile id");
@ -231,6 +245,7 @@ export async function runSelfCheck(flags = {}) {
checks.push(reportRecommendedNextScenarioCheck());
checks.push(readinessClassificationCheck());
checks.push(await resourceRoleAttributionCheck(tmp));
checks.push(await resourceRootCommandRoleBoundaryCheck());
checks.push(await processSnapshotCheck(tmp));
checks.push(roleThresholdEvaluationCheck());
checks.push(thresholdPolicyCalibrationCheck());
@ -251,6 +266,7 @@ export async function runSelfCheck(flags = {}) {
checks.push(agentAuthFailureEvaluationCheck());
checks.push(await soakLoopRunnerCheck(tmp));
checks.push(soakTrendEvaluationCheck());
checks.push(mcpBridgeEvidenceEvaluationCheck());
checks.push(await jsonCommandCheck(
"dry-run-state-lifecycle-json",
`node bin/kova.mjs run --target runtime:stable --scenario fresh-install --state missing-plugin-index --report-dir ${quoteShell(tmp)} --json`,
@ -2219,6 +2235,103 @@ function soakTrendEvaluationCheck() {
}
}
function mcpBridgeEvidenceEvaluationCheck() {
try {
const smoke = {
schemaVersion: "kova.mcpBridgeSmoke.v1",
durationMs: 1800,
initializeMs: 120,
toolsListMs: 90,
shutdownMs: 45,
toolCount: 8,
toolNames: ["conversations_list", "messages_read"],
processExited: true,
exitStatus: 0,
exitSignal: null,
errors: []
};
const record = {
scenario: "mcp-runtime-start-stop",
status: "PASS",
phases: [{
id: "mcp-bridge",
results: [{
command: "node support/mcp-bridge-smoke.mjs --env kova-self-check --artifact-dir /tmp/kova",
status: 0,
timedOut: false,
durationMs: 1800,
stdout: JSON.stringify(smoke),
stderr: ""
}],
metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
}],
finalMetrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
};
evaluateRecord(record, {
id: "mcp-runtime-start-stop",
thresholds: {
mcpInitializeMs: 10000,
mcpToolsListMs: 10000,
mcpShutdownMs: 5000,
mcpToolCountMin: 1,
mcpProcessLeaks: 0,
pluginLoadFailures: 0
}
}, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } });
assertEqual(record.status, "PASS", "MCP bridge record status");
assertEqual(record.measurements.mcpInitializeMs, 120, "MCP initialize ms");
assertEqual(record.measurements.mcpToolsListMs, 90, "MCP tools/list ms");
assertEqual(record.measurements.mcpShutdownMs, 45, "MCP shutdown ms");
assertEqual(record.measurements.mcpToolCount, 8, "MCP tool count");
assertEqual(record.measurements.mcpProcessLeaks, 0, "MCP process leak count");
const leaked = {
...record,
status: "PASS",
violations: [],
measurements: undefined,
phases: [{
id: "mcp-bridge",
results: [{
command: "node support/mcp-bridge-smoke.mjs --env kova-self-check --artifact-dir /tmp/kova",
status: 0,
timedOut: false,
durationMs: 1800,
stdout: JSON.stringify({ ...smoke, processExited: false }),
stderr: ""
}],
metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
}]
};
evaluateRecord(leaked, {
id: "mcp-runtime-start-stop",
thresholds: { mcpProcessLeaks: 0 }
}, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } });
assertEqual(leaked.status, "FAIL", "MCP leaked process status");
assertEqual(
leaked.violations.some((violation) => violation.metric === "mcpProcessLeaks"),
true,
"MCP process leak violation"
);
return {
id: "mcp-bridge-evidence-evaluation",
status: "PASS",
command: "evaluate synthetic MCP bridge evidence",
durationMs: 0
};
} catch (error) {
return {
id: "mcp-bridge-evidence-evaluation",
status: "FAIL",
command: "evaluate synthetic MCP bridge evidence",
durationMs: 0,
message: error.message
};
}
}
function agentColdWarmEvaluationCheck() {
try {
const coldCommand = "ocm @kova -- agent --local --agent main --session-id kova-agent-cold-warm --message hi --json";
@ -2931,6 +3044,45 @@ async function resourceRoleAttributionCheck(tmp) {
}
}
async function resourceRootCommandRoleBoundaryCheck() {
try {
const processRoles = await loadProcessRoles();
const gatewayRoles = classifyRegistryRolesForProcess(
{ command: "openclaw-gateway" },
{
processRoles,
rootCommand: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop",
existingRoles: ["gateway", "gateway-tree"]
}
);
const commandRoles = classifyRegistryRolesForProcess(
{ command: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop" },
{
processRoles,
rootCommand: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop",
existingRoles: ["command-tree"]
}
);
assertEqual(gatewayRoles.includes("mcp-runtime"), false, "root command role must not tag gateway process");
assertEqual(commandRoles.includes("mcp-runtime"), true, "root command role tags command tree process");
return {
id: "resource-root-command-role-boundary",
status: "PASS",
command: "classify synthetic gateway and command-tree roles",
durationMs: 0
};
} catch (error) {
return {
id: "resource-root-command-role-boundary",
status: "FAIL",
command: "classify synthetic gateway and command-tree roles",
durationMs: 0,
message: error.message
};
}
}
async function processSnapshotCheck(tmp) {
const child = runCommand("node -e 'setTimeout(() => {}, 1200)'", {
timeoutMs: 5000,

View File

@ -21,7 +21,8 @@
"plugin-bad-manifest",
"plugin-missing-runtime-deps",
"dashboard",
"tui"
"tui",
"mcp-runtime"
],
"incompatibleSurfaces": [
"upgrade-existing-user"

View File

@ -0,0 +1,338 @@
#!/usr/bin/env node
import { spawn } from "node:child_process";
import { chmod, mkdir, readFile, rm, writeFile } from "node:fs/promises";
import { join } from "node:path";
const SCHEMA_VERSION = "kova.mcpBridgeSmoke.v1";
const args = parseArgs(process.argv.slice(2));
const envName = requiredArg(args, "env");
const artifactDir = requiredArg(args, "artifact-dir");
const timeoutMs = positiveInt(args["timeout-ms"] ?? 30000, "timeout-ms");
assertKovaEnvName(envName);
const startedAtEpochMs = Date.now();
const summary = {
schemaVersion: SCHEMA_VERSION,
env: envName,
startedAt: new Date(startedAtEpochMs).toISOString(),
finishedAt: null,
durationMs: null,
gateway: null,
initializeMs: null,
toolsListMs: null,
shutdownMs: null,
toolCount: null,
toolNames: [],
processExited: false,
exitStatus: null,
exitSignal: null,
errors: [],
stderrSnippet: ""
};
let child;
let tokenFile;
try {
const envInfo = await readOcmEnvInfo(envName, timeoutMs);
const config = JSON.parse(await readFile(envInfo.configPath, "utf8"));
const token = config?.gateway?.auth?.token;
if (typeof token !== "string" || token.length === 0) {
throw new Error(`gateway.auth.token missing in ${envInfo.configPath}`);
}
await mkdir(artifactDir, { recursive: true });
tokenFile = join(artifactDir, "mcp-gateway-token");
await writeFile(tokenFile, token, { encoding: "utf8", mode: 0o600 });
await chmod(tokenFile, 0o600);
const gatewayPort = Number(envInfo.gatewayPort ?? config?.gateway?.port);
if (!Number.isInteger(gatewayPort) || gatewayPort <= 0) {
throw new Error("gateway port missing from OCM env metadata and OpenClaw config");
}
const gatewayUrl = `ws://127.0.0.1:${gatewayPort}`;
summary.gateway = { port: gatewayPort, url: gatewayUrl };
child = spawn("ocm", [
`@${envName}`,
"--",
"mcp",
"serve",
"--url",
gatewayUrl,
"--token-file",
tokenFile,
"--claude-channel-mode",
"off"
], {
stdio: ["pipe", "pipe", "pipe"],
shell: false,
env: process.env
});
const transport = createJsonLineTransport(child);
await transport.waitForSpawn();
const initializeStarted = Date.now();
await transport.request("initialize", {
protocolVersion: "2024-11-05",
capabilities: {},
clientInfo: { name: "kova-mcp-bridge-smoke", version: "1.0.0" }
}, timeoutMs);
summary.initializeMs = Date.now() - initializeStarted;
transport.notify("notifications/initialized", {});
const listStarted = Date.now();
const tools = await transport.request("tools/list", {}, timeoutMs);
summary.toolsListMs = Date.now() - listStarted;
const toolList = Array.isArray(tools?.tools) ? tools.tools : [];
summary.toolCount = toolList.length;
summary.toolNames = toolList.map((tool) => tool?.name).filter((name) => typeof name === "string").sort();
const shutdownStarted = Date.now();
child.stdin.end();
const exit = await waitForExit(child, Math.min(timeoutMs, 5000));
summary.shutdownMs = Date.now() - shutdownStarted;
summary.processExited = true;
summary.exitStatus = exit.status;
summary.exitSignal = exit.signal;
} catch (error) {
summary.errors.push(formatError(error));
if (child && !summary.processExited) {
child.kill("SIGTERM");
try {
const exit = await waitForExit(child, 3000);
summary.processExited = true;
summary.exitStatus = exit.status;
summary.exitSignal = exit.signal;
} catch {
child.kill("SIGKILL");
}
}
} finally {
if (child?.stderrText) {
summary.stderrSnippet = child.stderrText.slice(-4000);
}
if (tokenFile) {
await rm(tokenFile, { force: true });
}
const finishedAtEpochMs = Date.now();
summary.finishedAt = new Date(finishedAtEpochMs).toISOString();
summary.durationMs = finishedAtEpochMs - startedAtEpochMs;
console.log(JSON.stringify(summary, null, 2));
}
process.exit(summary.errors.length === 0 && summary.processExited ? 0 : 1);
function createJsonLineTransport(processHandle) {
let nextId = 1;
let stdout = "";
const pending = new Map();
let spawnError;
let spawned = false;
processHandle.stderrText = "";
processHandle.stdout.on("data", (chunk) => {
stdout += chunk.toString("utf8");
for (;;) {
const newline = stdout.indexOf("\n");
if (newline < 0) {
break;
}
const line = stdout.slice(0, newline).replace(/\r$/, "");
stdout = stdout.slice(newline + 1);
if (line.trim().length === 0) {
continue;
}
let message;
try {
message = JSON.parse(line);
} catch {
continue;
}
const waiter = pending.get(message.id);
if (!waiter) {
continue;
}
pending.delete(message.id);
if (message.error) {
waiter.reject(new Error(message.error.message ?? JSON.stringify(message.error)));
} else {
waiter.resolve(message.result);
}
}
});
processHandle.stderr.on("data", (chunk) => {
processHandle.stderrText += chunk.toString("utf8");
});
processHandle.stdin.on("error", (error) => {
for (const waiter of pending.values()) {
waiter.reject(error);
}
pending.clear();
});
processHandle.on("spawn", () => {
spawned = true;
});
processHandle.on("error", (error) => {
spawnError = error;
for (const waiter of pending.values()) {
waiter.reject(error);
}
pending.clear();
});
processHandle.on("exit", (status, signal) => {
const error = new Error(`MCP bridge exited before reply (status=${status ?? "null"}, signal=${signal ?? "none"})`);
for (const waiter of pending.values()) {
waiter.reject(error);
}
pending.clear();
});
return {
async waitForSpawn() {
const deadline = Date.now() + 5000;
while (!spawned) {
if (spawnError) {
throw spawnError;
}
if (Date.now() >= deadline) {
throw new Error("MCP bridge process did not spawn");
}
await sleep(25);
}
},
request(method, params, requestTimeoutMs) {
const id = nextId;
nextId += 1;
const payload = { jsonrpc: "2.0", id, method, params };
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
pending.delete(id);
reject(new Error(`${method} timed out after ${requestTimeoutMs}ms`));
}, requestTimeoutMs);
pending.set(id, {
resolve: (value) => {
clearTimeout(timer);
resolve(value);
},
reject: (error) => {
clearTimeout(timer);
reject(error);
}
});
processHandle.stdin.write(`${JSON.stringify(payload)}\n`);
});
},
notify(method, params) {
processHandle.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", method, params })}\n`);
}
};
}
async function readOcmEnvInfo(env, timeoutMs) {
const result = await runProcess("ocm", ["env", "show", env, "--json"], timeoutMs);
if (result.status !== 0) {
throw new Error(`ocm env show failed: ${firstLine(result.stderr) || firstLine(result.stdout) || result.status}`);
}
return JSON.parse(result.stdout);
}
function runProcess(command, args, timeoutMs) {
return new Promise((resolve) => {
const child = spawn(command, args, { stdio: ["ignore", "pipe", "pipe"], env: process.env });
let stdout = "";
let stderr = "";
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
child.kill("SIGTERM");
setTimeout(() => child.kill("SIGKILL"), 3000).unref();
}, timeoutMs);
child.stdout.on("data", (chunk) => {
stdout += chunk.toString("utf8");
});
child.stderr.on("data", (chunk) => {
stderr += chunk.toString("utf8");
});
child.on("error", (error) => {
clearTimeout(timer);
resolve({ status: 127, signal: null, timedOut, stdout, stderr: error.message });
});
child.on("close", (status, signal) => {
clearTimeout(timer);
resolve({ status: timedOut ? 124 : (status ?? 1), signal, timedOut, stdout, stderr });
});
});
}
function waitForExit(child, timeoutMs) {
if (child.exitCode !== null || child.signalCode !== null) {
return Promise.resolve({ status: child.exitCode, signal: child.signalCode });
}
return new Promise((resolve, reject) => {
const timer = setTimeout(() => reject(new Error(`process did not exit within ${timeoutMs}ms`)), timeoutMs);
child.once("exit", (status, signal) => {
clearTimeout(timer);
resolve({ status, signal });
});
});
}
function parseArgs(values) {
const parsed = {};
for (let index = 0; index < values.length; index += 1) {
const value = values[index];
if (!value.startsWith("--")) {
throw new Error(`unexpected positional argument '${value}'`);
}
const key = value.slice(2);
const next = values[index + 1];
if (!next || next.startsWith("--")) {
throw new Error(`missing value for --${key}`);
}
parsed[key] = next;
index += 1;
}
return parsed;
}
function requiredArg(values, key) {
const value = values[key];
if (typeof value !== "string" || value.length === 0) {
throw new Error(`missing --${key}`);
}
return value;
}
function positiveInt(value, key) {
const number = Number(value);
if (!Number.isInteger(number) || number <= 0) {
throw new Error(`--${key} must be a positive integer`);
}
return number;
}
function assertKovaEnvName(value) {
if (!/^kova-[A-Za-z0-9][A-Za-z0-9._-]*$/.test(value)) {
throw new Error(`unsafe Kova env name '${value}'`);
}
}
function formatError(error) {
return error instanceof Error ? error.message : String(error);
}
function firstLine(value) {
return String(value ?? "").trim().split(/\r?\n/)[0] ?? "";
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}

30
surfaces/mcp-runtime.json Normal file
View File

@ -0,0 +1,30 @@
{
"id": "mcp-runtime",
"title": "MCP Runtime",
"ownerArea": "mcp-runtime",
"description": "Start OpenClaw's MCP stdio bridge against a running gateway, list exposed tools, and prove the bridge process stops cleanly.",
"requiredStates": ["fresh"],
"targetKinds": ["npm", "channel", "runtime", "local-build"],
"requiredMetrics": ["mcpInitializeMs", "mcpToolsListMs", "mcpShutdownMs", "mcpToolCountMin", "mcpProcessLeaks", "gatewayReadyMs", "statusMs", "pluginLoadFailures", "peakRssMb"],
"processRoles": ["gateway", "command-tree", "mcp-runtime"],
"thresholds": {
"gatewayReadyMs": 30000,
"statusMs": 10000,
"mcpInitializeMs": 10000,
"mcpToolsListMs": 10000,
"mcpShutdownMs": 5000,
"mcpToolCountMin": 1,
"mcpProcessLeaks": 0,
"pluginLoadFailures": 0,
"peakRssMb": 900
},
"roleThresholds": {
"gateway": { "peakRssMb": 800, "maxCpuPercent": 250 },
"mcp-runtime": { "peakRssMb": 500, "maxCpuPercent": 200 },
"command-tree": { "peakRssMb": 900, "maxCpuPercent": 300 }
},
"diagnostics": {
"timelineRequiredForSourceBuild": false,
"expectedSpans": ["mcp.runtime.start", "mcp.runtime.stop", "gateway.websocket"]
}
}