feat: add mcp runtime validation scenario
This commit is contained in:
parent
b651614046
commit
3b04052170
@ -32,6 +32,11 @@
|
||||
"maxCpuPercent",
|
||||
"missingDependencyErrors",
|
||||
"modelsListMs",
|
||||
"mcpInitializeMs",
|
||||
"mcpProcessLeaks",
|
||||
"mcpShutdownMs",
|
||||
"mcpToolCountMin",
|
||||
"mcpToolsListMs",
|
||||
"openclawSlowestSpanMs",
|
||||
"openclawTimelineParseErrors",
|
||||
"peakRssMb",
|
||||
|
||||
@ -2,6 +2,12 @@
|
||||
"id": "mcp-runtime",
|
||||
"title": "MCP Runtime",
|
||||
"description": "MCP server and transport subprocesses started by OpenClaw or plugins.",
|
||||
"commandPatterns": ["mcp"],
|
||||
"processPatterns": ["mcp", "modelcontextprotocol"]
|
||||
"commandPatterns": [
|
||||
"(^|\\s)mcp\\s+serve(\\s|$)",
|
||||
"mcp-bridge-smoke\\.mjs"
|
||||
],
|
||||
"processPatterns": [
|
||||
"(^|\\s)mcp\\s+serve(\\s|$)",
|
||||
"mcp-bridge-smoke\\.mjs"
|
||||
]
|
||||
}
|
||||
|
||||
@ -36,6 +36,7 @@
|
||||
{ "scenario": "agent-provider-recovery", "state": "mock-openai-provider", "timeoutMs": 240000 },
|
||||
{ "scenario": "dashboard-readiness", "state": "fresh" },
|
||||
{ "scenario": "tui-responsiveness", "state": "fresh" },
|
||||
{ "scenario": "mcp-runtime-start-stop", "state": "fresh" },
|
||||
{ "scenario": "gateway-performance", "state": "many-bundled-plugins" },
|
||||
{ "scenario": "gateway-performance", "state": "gateway-already-running" },
|
||||
{ "scenario": "gateway-performance", "state": "stale-service-state" },
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
"doctor-cli": { "peakRssMb": 700, "maxCpuPercent": 300 },
|
||||
"tui-cli": { "peakRssMb": 650, "maxCpuPercent": 250 },
|
||||
"dashboard-cli": { "peakRssMb": 650, "maxCpuPercent": 250 },
|
||||
"mcp-runtime": { "peakRssMb": 500, "maxCpuPercent": 200 },
|
||||
"browser-sidecar": { "peakRssMb": 500, "maxCpuPercent": 250 },
|
||||
"mock-provider": { "peakRssMb": 300, "maxCpuPercent": 150 }
|
||||
},
|
||||
@ -78,6 +79,17 @@
|
||||
"soakHealthP95Ms": 1000,
|
||||
"peakRssMb": 1000
|
||||
}
|
||||
},
|
||||
"mcp-runtime": {
|
||||
"thresholds": {
|
||||
"mcpInitializeMs": 10000,
|
||||
"mcpToolsListMs": 10000,
|
||||
"mcpShutdownMs": 5000,
|
||||
"mcpToolCountMin": 1,
|
||||
"mcpProcessLeaks": 0,
|
||||
"statusMs": 10000,
|
||||
"pluginLoadFailures": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -124,6 +136,7 @@
|
||||
"failure-containment:broken-plugin-deps",
|
||||
"soak:large-workspace",
|
||||
"workspace-scan:large-workspace",
|
||||
"mcp-runtime:fresh",
|
||||
"cross-platform-smoke:slow-filesystem"
|
||||
]
|
||||
},
|
||||
@ -146,7 +159,7 @@
|
||||
"tui",
|
||||
"gateway-performance"
|
||||
],
|
||||
"warning": ["failure-containment", "soak", "workspace-scan", "cross-platform-smoke"]
|
||||
"warning": ["failure-containment", "soak", "workspace-scan", "mcp-runtime", "cross-platform-smoke"]
|
||||
},
|
||||
"scenarios": {
|
||||
"blocking": [
|
||||
@ -163,7 +176,8 @@
|
||||
"gateway-performance"
|
||||
],
|
||||
"warning": [
|
||||
"workspace-scan-pressure"
|
||||
"workspace-scan-pressure",
|
||||
"mcp-runtime-start-stop"
|
||||
]
|
||||
}
|
||||
},
|
||||
@ -201,6 +215,7 @@
|
||||
{ "scenario": "failure-injection", "state": "broken-plugin-deps" },
|
||||
{ "scenario": "soak", "state": "large-workspace" },
|
||||
{ "scenario": "workspace-scan-pressure", "state": "large-workspace" },
|
||||
{ "scenario": "mcp-runtime-start-stop", "state": "fresh" },
|
||||
{ "scenario": "cross-platform-smoke", "state": "slow-filesystem" }
|
||||
]
|
||||
},
|
||||
@ -343,6 +358,10 @@
|
||||
"state": "large-workspace",
|
||||
"timeoutMs": 240000
|
||||
},
|
||||
{
|
||||
"scenario": "mcp-runtime-start-stop",
|
||||
"state": "fresh"
|
||||
},
|
||||
{
|
||||
"scenario": "cross-platform-smoke",
|
||||
"state": "slow-filesystem"
|
||||
|
||||
44
scenarios/mcp-runtime-start-stop.json
Normal file
44
scenarios/mcp-runtime-start-stop.json
Normal file
@ -0,0 +1,44 @@
|
||||
{
|
||||
"id": "mcp-runtime-start-stop",
|
||||
"surface": "mcp-runtime",
|
||||
"title": "MCP Runtime Start/Stop",
|
||||
"objective": "Start OpenClaw's real MCP stdio bridge against the disposable gateway, perform a JSON-RPC initialize and tools/list smoke, then verify the bridge exits without leaking a runtime process.",
|
||||
"tags": ["mcp", "stdio", "gateway", "runtime", "start-stop"],
|
||||
"timeoutMs": 120000,
|
||||
"thresholds": {
|
||||
"gatewayReadyMs": 30000,
|
||||
"gatewayReadyHardTimeoutMs": 120000,
|
||||
"statusMs": 10000,
|
||||
"mcpInitializeMs": 10000,
|
||||
"mcpToolsListMs": 10000,
|
||||
"mcpShutdownMs": 5000,
|
||||
"mcpToolCountMin": 1,
|
||||
"mcpProcessLeaks": 0,
|
||||
"missingDependencyErrors": 0,
|
||||
"pluginLoadFailures": 0,
|
||||
"peakRssMb": 900
|
||||
},
|
||||
"phases": [
|
||||
{
|
||||
"id": "gateway",
|
||||
"title": "Gateway Start",
|
||||
"intent": "Start the gateway and confirm it is healthy before opening the MCP stdio bridge.",
|
||||
"commands": ["ocm start {env} {startSelector} --json", "ocm @{env} -- status"],
|
||||
"evidence": ["gateway status", "gateway port", "readiness classification"]
|
||||
},
|
||||
{
|
||||
"id": "mcp-bridge",
|
||||
"title": "MCP Bridge Smoke",
|
||||
"intent": "Spawn the real OpenClaw MCP stdio bridge, initialize it, list tools, and close it cleanly.",
|
||||
"commands": ["node {kovaRoot}/support/mcp-bridge-smoke.mjs --env {env} --artifact-dir {artifactDir} --timeout-ms 30000"],
|
||||
"evidence": ["MCP initialize timing", "tools/list timing", "tool count", "bridge process exit"]
|
||||
},
|
||||
{
|
||||
"id": "post-mcp-health",
|
||||
"title": "Post-MCP Gateway Health",
|
||||
"intent": "Verify the gateway remains responsive after the MCP bridge starts and exits.",
|
||||
"commands": ["ocm @{env} -- status", "ocm logs {env} --tail 300 --raw"],
|
||||
"evidence": ["status after MCP bridge", "MCP bridge errors", "gateway errors"]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -61,7 +61,7 @@ export function startResourceSampler(rootPid, options = {}) {
|
||||
roles.add("gateway-tree");
|
||||
}
|
||||
if (roles.size > 0) {
|
||||
for (const role of matchingRegistryRoles(process, options.rootCommand, roleMatchers)) {
|
||||
for (const role of matchingRegistryRoles(process, options.rootCommand, roleMatchers, roles)) {
|
||||
roles.add(role);
|
||||
}
|
||||
}
|
||||
@ -290,6 +290,12 @@ export function diffProcessSnapshots(before, after, options = {}) {
|
||||
};
|
||||
}
|
||||
|
||||
export function classifyRegistryRolesForProcess(process, options = {}) {
|
||||
const roleMatchers = compileRoleMatchers(options.processRoles ?? []);
|
||||
const existingRoles = new Set(options.existingRoles ?? []);
|
||||
return matchingRegistryRoles(process, options.rootCommand, roleMatchers, existingRoles);
|
||||
}
|
||||
|
||||
function compileRoleMatchers(roles) {
|
||||
return roles.map((role) => ({
|
||||
id: role.id,
|
||||
@ -310,13 +316,14 @@ function compilePatterns(patterns) {
|
||||
});
|
||||
}
|
||||
|
||||
function matchingRegistryRoles(process, rootCommand, roleMatchers) {
|
||||
function matchingRegistryRoles(process, rootCommand, roleMatchers, existingRoles = new Set()) {
|
||||
const roles = [];
|
||||
const isCommandTree = existingRoles.has("command-tree");
|
||||
for (const role of roleMatchers) {
|
||||
if (role.id === "command-tree" || role.id === "gateway" || role.id === "gateway-tree") {
|
||||
continue;
|
||||
}
|
||||
if (matchesAny(role.processPatterns, process.command) || matchesAny(role.commandPatterns, rootCommand) ||
|
||||
if (matchesAny(role.processPatterns, process.command) || (isCommandTree && matchesAny(role.commandPatterns, rootCommand)) ||
|
||||
matchesAny(role.commandPatterns, process.command)) {
|
||||
roles.push(role.id);
|
||||
}
|
||||
|
||||
@ -77,6 +77,7 @@ export function evaluateRecord(record, scenario, options = {}) {
|
||||
const healthFailures = countHealthFailures(record);
|
||||
const healthP95Ms = collectHealthP95(record);
|
||||
const soakEvidence = collectSoakEvidence(allResults);
|
||||
const mcpBridgeEvidence = collectMcpBridgeEvidence(allResults);
|
||||
const listeningFailures = countListeningFailures(record);
|
||||
const tcpConnectMaxMs = collectTcpConnectMax(record);
|
||||
const timeToListeningMs = collectTimeToListening(record);
|
||||
@ -230,6 +231,69 @@ export function evaluateRecord(record, scenario, options = {}) {
|
||||
});
|
||||
}
|
||||
|
||||
if (mcpBridgeEvidence.available) {
|
||||
if (typeof thresholds.mcpInitializeMs === "number" && mcpBridgeEvidence.initializeMs !== null && mcpBridgeEvidence.initializeMs > thresholds.mcpInitializeMs) {
|
||||
violations.push({
|
||||
kind: "mcp",
|
||||
metric: "mcpInitializeMs",
|
||||
expected: `<= ${thresholds.mcpInitializeMs}`,
|
||||
actual: mcpBridgeEvidence.initializeMs,
|
||||
message: `MCP bridge initialize took ${mcpBridgeEvidence.initializeMs}ms, over threshold ${thresholds.mcpInitializeMs}ms`
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof thresholds.mcpToolsListMs === "number" && mcpBridgeEvidence.toolsListMs !== null && mcpBridgeEvidence.toolsListMs > thresholds.mcpToolsListMs) {
|
||||
violations.push({
|
||||
kind: "mcp",
|
||||
metric: "mcpToolsListMs",
|
||||
expected: `<= ${thresholds.mcpToolsListMs}`,
|
||||
actual: mcpBridgeEvidence.toolsListMs,
|
||||
message: `MCP tools/list took ${mcpBridgeEvidence.toolsListMs}ms, over threshold ${thresholds.mcpToolsListMs}ms`
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof thresholds.mcpShutdownMs === "number" && mcpBridgeEvidence.shutdownMs !== null && mcpBridgeEvidence.shutdownMs > thresholds.mcpShutdownMs) {
|
||||
violations.push({
|
||||
kind: "mcp",
|
||||
metric: "mcpShutdownMs",
|
||||
expected: `<= ${thresholds.mcpShutdownMs}`,
|
||||
actual: mcpBridgeEvidence.shutdownMs,
|
||||
message: `MCP bridge shutdown took ${mcpBridgeEvidence.shutdownMs}ms, over threshold ${thresholds.mcpShutdownMs}ms`
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof thresholds.mcpToolCountMin === "number" && mcpBridgeEvidence.toolCount !== null && mcpBridgeEvidence.toolCount < thresholds.mcpToolCountMin) {
|
||||
violations.push({
|
||||
kind: "mcp",
|
||||
metric: "mcpToolCountMin",
|
||||
expected: `>= ${thresholds.mcpToolCountMin}`,
|
||||
actual: mcpBridgeEvidence.toolCount,
|
||||
message: `MCP bridge exposed ${mcpBridgeEvidence.toolCount} tool(s), below required ${thresholds.mcpToolCountMin}`
|
||||
});
|
||||
}
|
||||
|
||||
const leakCount = mcpBridgeEvidence.processExited === false ? 1 : 0;
|
||||
if (typeof thresholds.mcpProcessLeaks === "number" && leakCount > thresholds.mcpProcessLeaks) {
|
||||
violations.push({
|
||||
kind: "mcp",
|
||||
metric: "mcpProcessLeaks",
|
||||
expected: `<= ${thresholds.mcpProcessLeaks}`,
|
||||
actual: leakCount,
|
||||
message: "MCP bridge process did not exit cleanly after the smoke"
|
||||
});
|
||||
}
|
||||
|
||||
if (mcpBridgeEvidence.errors.length > 0) {
|
||||
violations.push({
|
||||
kind: "mcp",
|
||||
metric: "mcpBridgeErrors",
|
||||
expected: "0",
|
||||
actual: mcpBridgeEvidence.errors.length,
|
||||
message: `MCP bridge smoke reported ${mcpBridgeEvidence.errors.length} error(s): ${mcpBridgeEvidence.errors[0]}`
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof thresholds.rssGrowthMb === "number" && rssGrowthMb !== null && rssGrowthMb > thresholds.rssGrowthMb) {
|
||||
violations.push({
|
||||
kind: "soak",
|
||||
@ -495,6 +559,15 @@ export function evaluateRecord(record, scenario, options = {}) {
|
||||
healthFailures,
|
||||
healthP95Ms,
|
||||
soakEvidence,
|
||||
mcpBridgeEvidence,
|
||||
mcpInitializeMs: mcpBridgeEvidence.initializeMs,
|
||||
mcpToolsListMs: mcpBridgeEvidence.toolsListMs,
|
||||
mcpShutdownMs: mcpBridgeEvidence.shutdownMs,
|
||||
mcpToolCount: mcpBridgeEvidence.toolCount,
|
||||
mcpToolNames: mcpBridgeEvidence.toolNames,
|
||||
mcpProcessExited: mcpBridgeEvidence.processExited,
|
||||
mcpProcessLeaks: mcpBridgeEvidence.available ? (mcpBridgeEvidence.processExited === false ? 1 : 0) : null,
|
||||
mcpErrors: mcpBridgeEvidence.errors,
|
||||
soakDurationMs: soakEvidence.durationMs,
|
||||
soakIterations: soakEvidence.iterations,
|
||||
soakCommandP95Ms: soakEvidence.commandP95Ms,
|
||||
@ -1602,6 +1675,65 @@ function parseSoakLoopOutput(result) {
|
||||
}
|
||||
}
|
||||
|
||||
function collectMcpBridgeEvidence(results) {
|
||||
const smokes = results
|
||||
.filter((result) => result.command?.includes("mcp-bridge-smoke.mjs"))
|
||||
.map((result) => parseMcpBridgeSmokeOutput(result))
|
||||
.filter(Boolean);
|
||||
|
||||
if (smokes.length === 0) {
|
||||
return {
|
||||
schemaVersion: "kova.mcpBridgeEvidence.v1",
|
||||
available: false,
|
||||
initializeMs: null,
|
||||
toolsListMs: null,
|
||||
shutdownMs: null,
|
||||
toolCount: null,
|
||||
toolNames: [],
|
||||
processExited: null,
|
||||
errors: [],
|
||||
smokes: []
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
schemaVersion: "kova.mcpBridgeEvidence.v1",
|
||||
available: true,
|
||||
initializeMs: maxNullable(...smokes.map((smoke) => smoke.initializeMs)),
|
||||
toolsListMs: maxNullable(...smokes.map((smoke) => smoke.toolsListMs)),
|
||||
shutdownMs: maxNullable(...smokes.map((smoke) => smoke.shutdownMs)),
|
||||
toolCount: maxNullable(...smokes.map((smoke) => smoke.toolCount)),
|
||||
toolNames: [...new Set(smokes.flatMap((smoke) => smoke.toolNames ?? []))].sort(),
|
||||
processExited: smokes.every((smoke) => smoke.processExited === true),
|
||||
errors: smokes.flatMap((smoke) => smoke.errors ?? []),
|
||||
smokes: smokes.map((smoke) => ({
|
||||
durationMs: smoke.durationMs ?? null,
|
||||
initializeMs: smoke.initializeMs ?? null,
|
||||
toolsListMs: smoke.toolsListMs ?? null,
|
||||
shutdownMs: smoke.shutdownMs ?? null,
|
||||
toolCount: smoke.toolCount ?? null,
|
||||
processExited: smoke.processExited ?? null,
|
||||
exitStatus: smoke.exitStatus ?? null,
|
||||
exitSignal: smoke.exitSignal ?? null,
|
||||
errors: smoke.errors ?? []
|
||||
}))
|
||||
};
|
||||
}
|
||||
|
||||
function parseMcpBridgeSmokeOutput(result) {
|
||||
const text = result.stdout ?? "";
|
||||
const jsonStart = text.indexOf("{");
|
||||
if (jsonStart < 0) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(text.slice(jsonStart));
|
||||
return parsed?.schemaVersion === "kova.mcpBridgeSmoke.v1" ? parsed : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function healthFailureCount(samples) {
|
||||
return samples.filter((sample) => sample && !sample.ok).length;
|
||||
}
|
||||
|
||||
@ -146,6 +146,9 @@ export function renderMarkdownReport(report) {
|
||||
lines.push(`- Structured event-loop delay: ${record.measurements.eventLoopDelayMs ?? "unknown"} ms`);
|
||||
lines.push(`- Runtime deps staging: ${record.measurements.runtimeDepsStagingMs ?? "unknown"} ms`);
|
||||
lines.push(`- Runtime deps warm reuse: ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"} (cold installs ${record.measurements.coldRuntimeDepsInstallCount ?? "unknown"}; warm restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm max ${record.measurements.warmRuntimeDepsStagingMs ?? "unknown"} ms)`);
|
||||
if (record.measurements.mcpBridgeEvidence?.available) {
|
||||
lines.push(`- MCP bridge: initialize ${record.measurements.mcpInitializeMs ?? "unknown"} ms; tools/list ${record.measurements.mcpToolsListMs ?? "unknown"} ms; tools ${record.measurements.mcpToolCount ?? "unknown"}; shutdown ${record.measurements.mcpShutdownMs ?? "unknown"} ms; exited ${record.measurements.mcpProcessExited ?? "unknown"}`);
|
||||
}
|
||||
lines.push(`- Provider/model timing: ${record.measurements.providerModelTimingMs ?? "unknown"} ms`);
|
||||
lines.push(`- Agent turn: ${record.measurements.agentTurnMs ?? "unknown"} ms (${record.measurements.agentResponseOk ?? "not-run"})`);
|
||||
if (record.measurements.agentTurnCount > 0) {
|
||||
@ -844,7 +847,7 @@ export function renderPasteSummary(report) {
|
||||
const roleText = compactRolePeaks(record.measurements).slice(0, 4)
|
||||
.map((role) => `${role.role} ${role.peakRssMb ?? "?"}MB/${role.maxCpuPercent ?? "?"}%`)
|
||||
.join(", ") || "unknown";
|
||||
lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`);
|
||||
lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; MCP init/tools/shutdown ${record.measurements.mcpInitializeMs ?? "unknown"}/${record.measurements.mcpToolsListMs ?? "unknown"}/${record.measurements.mcpShutdownMs ?? "unknown"}ms; MCP tools ${record.measurements.mcpToolCount ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`);
|
||||
}
|
||||
} else if (record.violations?.length > 0) {
|
||||
if (record.measurements) {
|
||||
|
||||
@ -125,7 +125,7 @@ export async function executeScenario(scenario, context) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const commands = materializeCommands(phase.commands ?? [], commandValues(context, envName));
|
||||
const commands = materializeScenarioPhaseCommands(phase, context, envName, artifactDir);
|
||||
const results = [];
|
||||
for (const [commandIndex, command] of commands.entries()) {
|
||||
const result = await runScenarioCommand(command, context, envName, artifactDir, phase.id, commandIndex, authPolicy);
|
||||
@ -347,7 +347,7 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
|
||||
title: phase.title,
|
||||
intent: phase.intent,
|
||||
expectedAgentFailure: phase.expectedAgentFailure === true,
|
||||
commands: materializeCommands(phase.commands ?? [], commandValues(context, envName, artifactDir)),
|
||||
commands: materializeScenarioPhaseCommands(phase, context, envName, artifactDir),
|
||||
evidence: phase.evidence ?? []
|
||||
});
|
||||
|
||||
@ -429,6 +429,10 @@ function buildStateLifecyclePhase(context, envName, scenario, kind, steps, artif
|
||||
};
|
||||
}
|
||||
|
||||
function materializeScenarioPhaseCommands(phase, context, envName, artifactDir) {
|
||||
return materializeCommands(phase.commands ?? [], commandValues(context, envName, artifactDir));
|
||||
}
|
||||
|
||||
async function executeStateLifecycleSteps(context, envName, scenario, kind, steps, artifactDir, phaseId = null, authPolicy = null) {
|
||||
if (!Array.isArray(steps) || steps.length === 0) {
|
||||
return null;
|
||||
@ -726,7 +730,7 @@ function commandValues(context, envName, artifactDir = "") {
|
||||
target: context.target,
|
||||
from: context.from ?? "",
|
||||
sourceEnv: quoteShell(context.sourceEnv ?? ""),
|
||||
artifactDir,
|
||||
artifactDir: artifactDir ? quoteShell(artifactDir) : "",
|
||||
kovaRoot: quoteShell(repoRoot),
|
||||
startSelector: context.targetPlan.startSelector,
|
||||
upgradeSelector: context.targetPlan.upgradeSelector,
|
||||
|
||||
@ -31,7 +31,7 @@ import {
|
||||
parseProviderRequestLog,
|
||||
parseTimelineProviderRequestLog
|
||||
} from "./collectors/provider.mjs";
|
||||
import { captureProcessSnapshot, diffProcessSnapshots } from "./collectors/resources.mjs";
|
||||
import { captureProcessSnapshot, classifyRegistryRolesForProcess, diffProcessSnapshots } from "./collectors/resources.mjs";
|
||||
import { renderMarkdownReport, renderPasteSummary, renderReportSummary } from "./report.mjs";
|
||||
import { compareReports, renderCompareSummary } from "./compare.mjs";
|
||||
|
||||
@ -179,6 +179,20 @@ export async function runSelfCheck(flags = {}) {
|
||||
assertEqual(commands.some((command) => command.includes("ocm service restart")), true, "workspace restart command");
|
||||
assertEqual(commands.some((command) => command.includes("run-soak-loop.mjs") && command.includes("--duration-ms 15000")), true, "workspace repeated command loop");
|
||||
}));
|
||||
checks.push(await jsonCommandCheck("mcp-runtime-dry-run-json", `node bin/kova.mjs run --target runtime:stable --scenario mcp-runtime-start-stop --state fresh --report-dir ${quoteShell(tmp)} --json`, async (data) => {
|
||||
const report = JSON.parse(await readFile(data.jsonPath, "utf8"));
|
||||
const record = report.records?.[0];
|
||||
assertEqual(record?.surface, "mcp-runtime", "MCP runtime surface");
|
||||
const commands = record?.phases?.flatMap((phase) => phase.commands ?? []) ?? [];
|
||||
const bridgeCommand = commands.find((command) => command.includes("mcp-bridge-smoke.mjs")) ?? "";
|
||||
assertEqual(bridgeCommand.includes("--artifact-dir '"), true, "MCP bridge helper receives quoted artifact dir");
|
||||
assertEqual(commands.some((command) => command.includes("ocm start") && command.includes("--json")), true, "MCP gateway start command");
|
||||
assertEqual(record?.thresholds?.mcpProcessLeaks, 0, "MCP process leak threshold");
|
||||
}));
|
||||
checks.push(await commandCheck(
|
||||
"mcp-runtime-role-patterns",
|
||||
"node -e \"const role=require('./process-roles/mcp-runtime.json'); if (role.commandPatterns.includes('mcp') || role.processPatterns.includes('mcp') || role.processPatterns.some((p)=>p.includes('modelcontextprotocol'))) process.exit(1);\""
|
||||
));
|
||||
checks.push(await jsonCommandCheck("diagnostic-profile-plan-json", "node bin/kova.mjs matrix plan --profile diagnostic --target local-build:/tmp/openclaw --include scenario:release-runtime-startup --json", (data) => {
|
||||
assertEqual(data.schemaVersion, "kova.matrix.plan.v1", "diagnostic matrix plan schema");
|
||||
assertEqual(data.profile?.id, "diagnostic", "diagnostic profile id");
|
||||
@ -231,6 +245,7 @@ export async function runSelfCheck(flags = {}) {
|
||||
checks.push(reportRecommendedNextScenarioCheck());
|
||||
checks.push(readinessClassificationCheck());
|
||||
checks.push(await resourceRoleAttributionCheck(tmp));
|
||||
checks.push(await resourceRootCommandRoleBoundaryCheck());
|
||||
checks.push(await processSnapshotCheck(tmp));
|
||||
checks.push(roleThresholdEvaluationCheck());
|
||||
checks.push(thresholdPolicyCalibrationCheck());
|
||||
@ -251,6 +266,7 @@ export async function runSelfCheck(flags = {}) {
|
||||
checks.push(agentAuthFailureEvaluationCheck());
|
||||
checks.push(await soakLoopRunnerCheck(tmp));
|
||||
checks.push(soakTrendEvaluationCheck());
|
||||
checks.push(mcpBridgeEvidenceEvaluationCheck());
|
||||
checks.push(await jsonCommandCheck(
|
||||
"dry-run-state-lifecycle-json",
|
||||
`node bin/kova.mjs run --target runtime:stable --scenario fresh-install --state missing-plugin-index --report-dir ${quoteShell(tmp)} --json`,
|
||||
@ -2219,6 +2235,103 @@ function soakTrendEvaluationCheck() {
|
||||
}
|
||||
}
|
||||
|
||||
function mcpBridgeEvidenceEvaluationCheck() {
|
||||
try {
|
||||
const smoke = {
|
||||
schemaVersion: "kova.mcpBridgeSmoke.v1",
|
||||
durationMs: 1800,
|
||||
initializeMs: 120,
|
||||
toolsListMs: 90,
|
||||
shutdownMs: 45,
|
||||
toolCount: 8,
|
||||
toolNames: ["conversations_list", "messages_read"],
|
||||
processExited: true,
|
||||
exitStatus: 0,
|
||||
exitSignal: null,
|
||||
errors: []
|
||||
};
|
||||
const record = {
|
||||
scenario: "mcp-runtime-start-stop",
|
||||
status: "PASS",
|
||||
phases: [{
|
||||
id: "mcp-bridge",
|
||||
results: [{
|
||||
command: "node support/mcp-bridge-smoke.mjs --env kova-self-check --artifact-dir /tmp/kova",
|
||||
status: 0,
|
||||
timedOut: false,
|
||||
durationMs: 1800,
|
||||
stdout: JSON.stringify(smoke),
|
||||
stderr: ""
|
||||
}],
|
||||
metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
|
||||
}],
|
||||
finalMetrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
|
||||
};
|
||||
evaluateRecord(record, {
|
||||
id: "mcp-runtime-start-stop",
|
||||
thresholds: {
|
||||
mcpInitializeMs: 10000,
|
||||
mcpToolsListMs: 10000,
|
||||
mcpShutdownMs: 5000,
|
||||
mcpToolCountMin: 1,
|
||||
mcpProcessLeaks: 0,
|
||||
pluginLoadFailures: 0
|
||||
}
|
||||
}, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } });
|
||||
|
||||
assertEqual(record.status, "PASS", "MCP bridge record status");
|
||||
assertEqual(record.measurements.mcpInitializeMs, 120, "MCP initialize ms");
|
||||
assertEqual(record.measurements.mcpToolsListMs, 90, "MCP tools/list ms");
|
||||
assertEqual(record.measurements.mcpShutdownMs, 45, "MCP shutdown ms");
|
||||
assertEqual(record.measurements.mcpToolCount, 8, "MCP tool count");
|
||||
assertEqual(record.measurements.mcpProcessLeaks, 0, "MCP process leak count");
|
||||
|
||||
const leaked = {
|
||||
...record,
|
||||
status: "PASS",
|
||||
violations: [],
|
||||
measurements: undefined,
|
||||
phases: [{
|
||||
id: "mcp-bridge",
|
||||
results: [{
|
||||
command: "node support/mcp-bridge-smoke.mjs --env kova-self-check --artifact-dir /tmp/kova",
|
||||
status: 0,
|
||||
timedOut: false,
|
||||
durationMs: 1800,
|
||||
stdout: JSON.stringify({ ...smoke, processExited: false }),
|
||||
stderr: ""
|
||||
}],
|
||||
metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
|
||||
}]
|
||||
};
|
||||
evaluateRecord(leaked, {
|
||||
id: "mcp-runtime-start-stop",
|
||||
thresholds: { mcpProcessLeaks: 0 }
|
||||
}, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } });
|
||||
assertEqual(leaked.status, "FAIL", "MCP leaked process status");
|
||||
assertEqual(
|
||||
leaked.violations.some((violation) => violation.metric === "mcpProcessLeaks"),
|
||||
true,
|
||||
"MCP process leak violation"
|
||||
);
|
||||
|
||||
return {
|
||||
id: "mcp-bridge-evidence-evaluation",
|
||||
status: "PASS",
|
||||
command: "evaluate synthetic MCP bridge evidence",
|
||||
durationMs: 0
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
id: "mcp-bridge-evidence-evaluation",
|
||||
status: "FAIL",
|
||||
command: "evaluate synthetic MCP bridge evidence",
|
||||
durationMs: 0,
|
||||
message: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function agentColdWarmEvaluationCheck() {
|
||||
try {
|
||||
const coldCommand = "ocm @kova -- agent --local --agent main --session-id kova-agent-cold-warm --message hi --json";
|
||||
@ -2931,6 +3044,45 @@ async function resourceRoleAttributionCheck(tmp) {
|
||||
}
|
||||
}
|
||||
|
||||
async function resourceRootCommandRoleBoundaryCheck() {
|
||||
try {
|
||||
const processRoles = await loadProcessRoles();
|
||||
const gatewayRoles = classifyRegistryRolesForProcess(
|
||||
{ command: "openclaw-gateway" },
|
||||
{
|
||||
processRoles,
|
||||
rootCommand: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop",
|
||||
existingRoles: ["gateway", "gateway-tree"]
|
||||
}
|
||||
);
|
||||
const commandRoles = classifyRegistryRolesForProcess(
|
||||
{ command: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop" },
|
||||
{
|
||||
processRoles,
|
||||
rootCommand: "node support/mcp-bridge-smoke.mjs --env kova-mcp-runtime-start-stop",
|
||||
existingRoles: ["command-tree"]
|
||||
}
|
||||
);
|
||||
|
||||
assertEqual(gatewayRoles.includes("mcp-runtime"), false, "root command role must not tag gateway process");
|
||||
assertEqual(commandRoles.includes("mcp-runtime"), true, "root command role tags command tree process");
|
||||
return {
|
||||
id: "resource-root-command-role-boundary",
|
||||
status: "PASS",
|
||||
command: "classify synthetic gateway and command-tree roles",
|
||||
durationMs: 0
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
id: "resource-root-command-role-boundary",
|
||||
status: "FAIL",
|
||||
command: "classify synthetic gateway and command-tree roles",
|
||||
durationMs: 0,
|
||||
message: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function processSnapshotCheck(tmp) {
|
||||
const child = runCommand("node -e 'setTimeout(() => {}, 1200)'", {
|
||||
timeoutMs: 5000,
|
||||
|
||||
@ -21,7 +21,8 @@
|
||||
"plugin-bad-manifest",
|
||||
"plugin-missing-runtime-deps",
|
||||
"dashboard",
|
||||
"tui"
|
||||
"tui",
|
||||
"mcp-runtime"
|
||||
],
|
||||
"incompatibleSurfaces": [
|
||||
"upgrade-existing-user"
|
||||
|
||||
338
support/mcp-bridge-smoke.mjs
Normal file
338
support/mcp-bridge-smoke.mjs
Normal file
@ -0,0 +1,338 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { chmod, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
|
||||
const SCHEMA_VERSION = "kova.mcpBridgeSmoke.v1";
|
||||
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
const envName = requiredArg(args, "env");
|
||||
const artifactDir = requiredArg(args, "artifact-dir");
|
||||
const timeoutMs = positiveInt(args["timeout-ms"] ?? 30000, "timeout-ms");
|
||||
assertKovaEnvName(envName);
|
||||
|
||||
const startedAtEpochMs = Date.now();
|
||||
const summary = {
|
||||
schemaVersion: SCHEMA_VERSION,
|
||||
env: envName,
|
||||
startedAt: new Date(startedAtEpochMs).toISOString(),
|
||||
finishedAt: null,
|
||||
durationMs: null,
|
||||
gateway: null,
|
||||
initializeMs: null,
|
||||
toolsListMs: null,
|
||||
shutdownMs: null,
|
||||
toolCount: null,
|
||||
toolNames: [],
|
||||
processExited: false,
|
||||
exitStatus: null,
|
||||
exitSignal: null,
|
||||
errors: [],
|
||||
stderrSnippet: ""
|
||||
};
|
||||
|
||||
let child;
|
||||
let tokenFile;
|
||||
|
||||
try {
|
||||
const envInfo = await readOcmEnvInfo(envName, timeoutMs);
|
||||
const config = JSON.parse(await readFile(envInfo.configPath, "utf8"));
|
||||
const token = config?.gateway?.auth?.token;
|
||||
if (typeof token !== "string" || token.length === 0) {
|
||||
throw new Error(`gateway.auth.token missing in ${envInfo.configPath}`);
|
||||
}
|
||||
|
||||
await mkdir(artifactDir, { recursive: true });
|
||||
tokenFile = join(artifactDir, "mcp-gateway-token");
|
||||
await writeFile(tokenFile, token, { encoding: "utf8", mode: 0o600 });
|
||||
await chmod(tokenFile, 0o600);
|
||||
|
||||
const gatewayPort = Number(envInfo.gatewayPort ?? config?.gateway?.port);
|
||||
if (!Number.isInteger(gatewayPort) || gatewayPort <= 0) {
|
||||
throw new Error("gateway port missing from OCM env metadata and OpenClaw config");
|
||||
}
|
||||
const gatewayUrl = `ws://127.0.0.1:${gatewayPort}`;
|
||||
summary.gateway = { port: gatewayPort, url: gatewayUrl };
|
||||
|
||||
child = spawn("ocm", [
|
||||
`@${envName}`,
|
||||
"--",
|
||||
"mcp",
|
||||
"serve",
|
||||
"--url",
|
||||
gatewayUrl,
|
||||
"--token-file",
|
||||
tokenFile,
|
||||
"--claude-channel-mode",
|
||||
"off"
|
||||
], {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
shell: false,
|
||||
env: process.env
|
||||
});
|
||||
|
||||
const transport = createJsonLineTransport(child);
|
||||
await transport.waitForSpawn();
|
||||
|
||||
const initializeStarted = Date.now();
|
||||
await transport.request("initialize", {
|
||||
protocolVersion: "2024-11-05",
|
||||
capabilities: {},
|
||||
clientInfo: { name: "kova-mcp-bridge-smoke", version: "1.0.0" }
|
||||
}, timeoutMs);
|
||||
summary.initializeMs = Date.now() - initializeStarted;
|
||||
|
||||
transport.notify("notifications/initialized", {});
|
||||
|
||||
const listStarted = Date.now();
|
||||
const tools = await transport.request("tools/list", {}, timeoutMs);
|
||||
summary.toolsListMs = Date.now() - listStarted;
|
||||
const toolList = Array.isArray(tools?.tools) ? tools.tools : [];
|
||||
summary.toolCount = toolList.length;
|
||||
summary.toolNames = toolList.map((tool) => tool?.name).filter((name) => typeof name === "string").sort();
|
||||
|
||||
const shutdownStarted = Date.now();
|
||||
child.stdin.end();
|
||||
const exit = await waitForExit(child, Math.min(timeoutMs, 5000));
|
||||
summary.shutdownMs = Date.now() - shutdownStarted;
|
||||
summary.processExited = true;
|
||||
summary.exitStatus = exit.status;
|
||||
summary.exitSignal = exit.signal;
|
||||
} catch (error) {
|
||||
summary.errors.push(formatError(error));
|
||||
if (child && !summary.processExited) {
|
||||
child.kill("SIGTERM");
|
||||
try {
|
||||
const exit = await waitForExit(child, 3000);
|
||||
summary.processExited = true;
|
||||
summary.exitStatus = exit.status;
|
||||
summary.exitSignal = exit.signal;
|
||||
} catch {
|
||||
child.kill("SIGKILL");
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (child?.stderrText) {
|
||||
summary.stderrSnippet = child.stderrText.slice(-4000);
|
||||
}
|
||||
if (tokenFile) {
|
||||
await rm(tokenFile, { force: true });
|
||||
}
|
||||
const finishedAtEpochMs = Date.now();
|
||||
summary.finishedAt = new Date(finishedAtEpochMs).toISOString();
|
||||
summary.durationMs = finishedAtEpochMs - startedAtEpochMs;
|
||||
console.log(JSON.stringify(summary, null, 2));
|
||||
}
|
||||
|
||||
process.exit(summary.errors.length === 0 && summary.processExited ? 0 : 1);
|
||||
|
||||
function createJsonLineTransport(processHandle) {
|
||||
let nextId = 1;
|
||||
let stdout = "";
|
||||
const pending = new Map();
|
||||
let spawnError;
|
||||
let spawned = false;
|
||||
|
||||
processHandle.stderrText = "";
|
||||
|
||||
processHandle.stdout.on("data", (chunk) => {
|
||||
stdout += chunk.toString("utf8");
|
||||
for (;;) {
|
||||
const newline = stdout.indexOf("\n");
|
||||
if (newline < 0) {
|
||||
break;
|
||||
}
|
||||
const line = stdout.slice(0, newline).replace(/\r$/, "");
|
||||
stdout = stdout.slice(newline + 1);
|
||||
if (line.trim().length === 0) {
|
||||
continue;
|
||||
}
|
||||
let message;
|
||||
try {
|
||||
message = JSON.parse(line);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const waiter = pending.get(message.id);
|
||||
if (!waiter) {
|
||||
continue;
|
||||
}
|
||||
pending.delete(message.id);
|
||||
if (message.error) {
|
||||
waiter.reject(new Error(message.error.message ?? JSON.stringify(message.error)));
|
||||
} else {
|
||||
waiter.resolve(message.result);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
processHandle.stderr.on("data", (chunk) => {
|
||||
processHandle.stderrText += chunk.toString("utf8");
|
||||
});
|
||||
processHandle.stdin.on("error", (error) => {
|
||||
for (const waiter of pending.values()) {
|
||||
waiter.reject(error);
|
||||
}
|
||||
pending.clear();
|
||||
});
|
||||
|
||||
processHandle.on("spawn", () => {
|
||||
spawned = true;
|
||||
});
|
||||
processHandle.on("error", (error) => {
|
||||
spawnError = error;
|
||||
for (const waiter of pending.values()) {
|
||||
waiter.reject(error);
|
||||
}
|
||||
pending.clear();
|
||||
});
|
||||
processHandle.on("exit", (status, signal) => {
|
||||
const error = new Error(`MCP bridge exited before reply (status=${status ?? "null"}, signal=${signal ?? "none"})`);
|
||||
for (const waiter of pending.values()) {
|
||||
waiter.reject(error);
|
||||
}
|
||||
pending.clear();
|
||||
});
|
||||
|
||||
return {
|
||||
async waitForSpawn() {
|
||||
const deadline = Date.now() + 5000;
|
||||
while (!spawned) {
|
||||
if (spawnError) {
|
||||
throw spawnError;
|
||||
}
|
||||
if (Date.now() >= deadline) {
|
||||
throw new Error("MCP bridge process did not spawn");
|
||||
}
|
||||
await sleep(25);
|
||||
}
|
||||
},
|
||||
request(method, params, requestTimeoutMs) {
|
||||
const id = nextId;
|
||||
nextId += 1;
|
||||
const payload = { jsonrpc: "2.0", id, method, params };
|
||||
return new Promise((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
pending.delete(id);
|
||||
reject(new Error(`${method} timed out after ${requestTimeoutMs}ms`));
|
||||
}, requestTimeoutMs);
|
||||
pending.set(id, {
|
||||
resolve: (value) => {
|
||||
clearTimeout(timer);
|
||||
resolve(value);
|
||||
},
|
||||
reject: (error) => {
|
||||
clearTimeout(timer);
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
processHandle.stdin.write(`${JSON.stringify(payload)}\n`);
|
||||
});
|
||||
},
|
||||
notify(method, params) {
|
||||
processHandle.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", method, params })}\n`);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
async function readOcmEnvInfo(env, timeoutMs) {
|
||||
const result = await runProcess("ocm", ["env", "show", env, "--json"], timeoutMs);
|
||||
if (result.status !== 0) {
|
||||
throw new Error(`ocm env show failed: ${firstLine(result.stderr) || firstLine(result.stdout) || result.status}`);
|
||||
}
|
||||
return JSON.parse(result.stdout);
|
||||
}
|
||||
|
||||
function runProcess(command, args, timeoutMs) {
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(command, args, { stdio: ["ignore", "pipe", "pipe"], env: process.env });
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let timedOut = false;
|
||||
const timer = setTimeout(() => {
|
||||
timedOut = true;
|
||||
child.kill("SIGTERM");
|
||||
setTimeout(() => child.kill("SIGKILL"), 3000).unref();
|
||||
}, timeoutMs);
|
||||
child.stdout.on("data", (chunk) => {
|
||||
stdout += chunk.toString("utf8");
|
||||
});
|
||||
child.stderr.on("data", (chunk) => {
|
||||
stderr += chunk.toString("utf8");
|
||||
});
|
||||
child.on("error", (error) => {
|
||||
clearTimeout(timer);
|
||||
resolve({ status: 127, signal: null, timedOut, stdout, stderr: error.message });
|
||||
});
|
||||
child.on("close", (status, signal) => {
|
||||
clearTimeout(timer);
|
||||
resolve({ status: timedOut ? 124 : (status ?? 1), signal, timedOut, stdout, stderr });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function waitForExit(child, timeoutMs) {
|
||||
if (child.exitCode !== null || child.signalCode !== null) {
|
||||
return Promise.resolve({ status: child.exitCode, signal: child.signalCode });
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
const timer = setTimeout(() => reject(new Error(`process did not exit within ${timeoutMs}ms`)), timeoutMs);
|
||||
child.once("exit", (status, signal) => {
|
||||
clearTimeout(timer);
|
||||
resolve({ status, signal });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function parseArgs(values) {
|
||||
const parsed = {};
|
||||
for (let index = 0; index < values.length; index += 1) {
|
||||
const value = values[index];
|
||||
if (!value.startsWith("--")) {
|
||||
throw new Error(`unexpected positional argument '${value}'`);
|
||||
}
|
||||
const key = value.slice(2);
|
||||
const next = values[index + 1];
|
||||
if (!next || next.startsWith("--")) {
|
||||
throw new Error(`missing value for --${key}`);
|
||||
}
|
||||
parsed[key] = next;
|
||||
index += 1;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function requiredArg(values, key) {
|
||||
const value = values[key];
|
||||
if (typeof value !== "string" || value.length === 0) {
|
||||
throw new Error(`missing --${key}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function positiveInt(value, key) {
|
||||
const number = Number(value);
|
||||
if (!Number.isInteger(number) || number <= 0) {
|
||||
throw new Error(`--${key} must be a positive integer`);
|
||||
}
|
||||
return number;
|
||||
}
|
||||
|
||||
function assertKovaEnvName(value) {
|
||||
if (!/^kova-[A-Za-z0-9][A-Za-z0-9._-]*$/.test(value)) {
|
||||
throw new Error(`unsafe Kova env name '${value}'`);
|
||||
}
|
||||
}
|
||||
|
||||
function formatError(error) {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
function firstLine(value) {
|
||||
return String(value ?? "").trim().split(/\r?\n/)[0] ?? "";
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
30
surfaces/mcp-runtime.json
Normal file
30
surfaces/mcp-runtime.json
Normal file
@ -0,0 +1,30 @@
|
||||
{
|
||||
"id": "mcp-runtime",
|
||||
"title": "MCP Runtime",
|
||||
"ownerArea": "mcp-runtime",
|
||||
"description": "Start OpenClaw's MCP stdio bridge against a running gateway, list exposed tools, and prove the bridge process stops cleanly.",
|
||||
"requiredStates": ["fresh"],
|
||||
"targetKinds": ["npm", "channel", "runtime", "local-build"],
|
||||
"requiredMetrics": ["mcpInitializeMs", "mcpToolsListMs", "mcpShutdownMs", "mcpToolCountMin", "mcpProcessLeaks", "gatewayReadyMs", "statusMs", "pluginLoadFailures", "peakRssMb"],
|
||||
"processRoles": ["gateway", "command-tree", "mcp-runtime"],
|
||||
"thresholds": {
|
||||
"gatewayReadyMs": 30000,
|
||||
"statusMs": 10000,
|
||||
"mcpInitializeMs": 10000,
|
||||
"mcpToolsListMs": 10000,
|
||||
"mcpShutdownMs": 5000,
|
||||
"mcpToolCountMin": 1,
|
||||
"mcpProcessLeaks": 0,
|
||||
"pluginLoadFailures": 0,
|
||||
"peakRssMb": 900
|
||||
},
|
||||
"roleThresholds": {
|
||||
"gateway": { "peakRssMb": 800, "maxCpuPercent": 250 },
|
||||
"mcp-runtime": { "peakRssMb": 500, "maxCpuPercent": 200 },
|
||||
"command-tree": { "peakRssMb": 900, "maxCpuPercent": 300 }
|
||||
},
|
||||
"diagnostics": {
|
||||
"timelineRequiredForSourceBuild": false,
|
||||
"expectedSpans": ["mcp.runtime.start", "mcp.runtime.stop", "gateway.websocket"]
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user