diff --git a/metrics/known.json b/metrics/known.json index f6e9c75..af8a796 100644 --- a/metrics/known.json +++ b/metrics/known.json @@ -12,6 +12,15 @@ "agentTurnMaxMs", "agentTurnMs", "agentTurnP95Ms", + "browserDoctorMs", + "browserOpenMs", + "browserProcessLeaks", + "browserSnapshotMs", + "browserStartMs", + "browserStopMs", + "browserTabCount", + "browserTabCountMin", + "browserTabsMs", "coldAgentTurnMs", "coldPreProviderMs", "coldReadyMs", diff --git a/process-roles/browser-sidecar.json b/process-roles/browser-sidecar.json index 4b34d0a..c27d875 100644 --- a/process-roles/browser-sidecar.json +++ b/process-roles/browser-sidecar.json @@ -2,6 +2,18 @@ "id": "browser-sidecar", "title": "Browser Sidecar", "description": "Browser automation sidecar processes and browser control services started by OpenClaw.", - "commandPatterns": ["browser", "playwright"], - "processPatterns": ["playwright", "browser/server", "chromium"] + "commandPatterns": [ + "(^|\\s)browser(\\s|$)", + "browser-automation-smoke\\.mjs", + "playwright" + ], + "processPatterns": [ + "openclaw-browser", + "chrome-devtools-mcp", + "Google Chrome", + "Chrome Helper", + "playwright", + "browser/server", + "chromium" + ] } diff --git a/profiles/exhaustive.json b/profiles/exhaustive.json index 522fa83..1fe0b62 100644 --- a/profiles/exhaustive.json +++ b/profiles/exhaustive.json @@ -37,6 +37,7 @@ { "scenario": "dashboard-readiness", "state": "fresh" }, { "scenario": "tui-responsiveness", "state": "fresh" }, { "scenario": "mcp-runtime-start-stop", "state": "fresh" }, + { "scenario": "browser-automation-smoke", "state": "fresh", "timeoutMs": 180000 }, { "scenario": "gateway-performance", "state": "many-bundled-plugins" }, { "scenario": "gateway-performance", "state": "gateway-already-running" }, { "scenario": "gateway-performance", "state": "stale-service-state" }, diff --git a/profiles/release.json b/profiles/release.json index 5cfab2d..0bcfe42 100644 --- a/profiles/release.json +++ b/profiles/release.json @@ -16,7 +16,7 @@ "tui-cli": { "peakRssMb": 650, "maxCpuPercent": 250 }, "dashboard-cli": { "peakRssMb": 650, "maxCpuPercent": 250 }, "mcp-runtime": { "peakRssMb": 500, "maxCpuPercent": 200 }, - "browser-sidecar": { "peakRssMb": 500, "maxCpuPercent": 250 }, + "browser-sidecar": { "peakRssMb": 700, "maxCpuPercent": 250 }, "mock-provider": { "peakRssMb": 300, "maxCpuPercent": 150 } }, "surfaces": { @@ -90,6 +90,20 @@ "statusMs": 10000, "pluginLoadFailures": 0 } + }, + "browser-automation": { + "thresholds": { + "browserDoctorMs": 15000, + "browserStartMs": 30000, + "browserTabsMs": 10000, + "browserOpenMs": 15000, + "browserSnapshotMs": 15000, + "browserStopMs": 10000, + "browserTabCountMin": 1, + "browserProcessLeaks": 0, + "statusMs": 10000, + "pluginLoadFailures": 0 + } } } }, @@ -137,6 +151,7 @@ "soak:large-workspace", "workspace-scan:large-workspace", "mcp-runtime:fresh", + "browser-automation:fresh", "cross-platform-smoke:slow-filesystem" ] }, @@ -159,7 +174,7 @@ "tui", "gateway-performance" ], - "warning": ["failure-containment", "soak", "workspace-scan", "mcp-runtime", "cross-platform-smoke"] + "warning": ["failure-containment", "soak", "workspace-scan", "mcp-runtime", "browser-automation", "cross-platform-smoke"] }, "scenarios": { "blocking": [ @@ -177,7 +192,8 @@ ], "warning": [ "workspace-scan-pressure", - "mcp-runtime-start-stop" + "mcp-runtime-start-stop", + "browser-automation-smoke" ] } }, @@ -216,6 +232,7 @@ { "scenario": "soak", "state": "large-workspace" }, { "scenario": "workspace-scan-pressure", "state": "large-workspace" }, { "scenario": "mcp-runtime-start-stop", "state": "fresh" }, + { "scenario": "browser-automation-smoke", "state": "fresh", "timeoutMs": 180000 }, { "scenario": "cross-platform-smoke", "state": "slow-filesystem" } ] }, @@ -362,6 +379,11 @@ "scenario": "mcp-runtime-start-stop", "state": "fresh" }, + { + "scenario": "browser-automation-smoke", + "state": "fresh", + "timeoutMs": 180000 + }, { "scenario": "cross-platform-smoke", "state": "slow-filesystem" diff --git a/scenarios/browser-automation-smoke.json b/scenarios/browser-automation-smoke.json new file mode 100644 index 0000000..a75fb6f --- /dev/null +++ b/scenarios/browser-automation-smoke.json @@ -0,0 +1,47 @@ +{ + "id": "browser-automation-smoke", + "surface": "browser-automation", + "title": "Browser Automation Smoke", + "objective": "Start OpenClaw's real browser automation surface, open a managed browser tab, capture browser state, stop the profile, and verify the gateway remains healthy.", + "tags": ["browser", "automation", "gateway", "plugins"], + "timeoutMs": 180000, + "thresholds": { + "gatewayReadyMs": 30000, + "gatewayReadyHardTimeoutMs": 120000, + "statusMs": 10000, + "browserDoctorMs": 15000, + "browserStartMs": 30000, + "browserTabsMs": 10000, + "browserOpenMs": 15000, + "browserSnapshotMs": 15000, + "browserStopMs": 10000, + "browserTabCountMin": 1, + "browserProcessLeaks": 0, + "missingDependencyErrors": 0, + "pluginLoadFailures": 0, + "peakRssMb": 1100 + }, + "phases": [ + { + "id": "gateway", + "title": "Gateway Start", + "intent": "Start the gateway and prove it is healthy before browser automation.", + "commands": ["ocm start {env} {startSelector} --json", "ocm @{env} -- status"], + "evidence": ["gateway status", "gateway port", "readiness classification"] + }, + { + "id": "browser-smoke", + "title": "Browser Automation Smoke", + "intent": "Use OpenClaw's browser CLI to start a managed headless profile, open a tab, list tabs, snapshot, and stop.", + "commands": ["node {kovaRoot}/support/browser-automation-smoke.mjs --env {env} --artifact-dir {artifactDir} --timeout-ms 45000"], + "evidence": ["browser start timing", "tabs timing", "opened tab count", "snapshot timing", "browser stop timing"] + }, + { + "id": "post-browser-health", + "title": "Post-Browser Gateway Health", + "intent": "Verify browser automation did not leave the gateway degraded.", + "commands": ["ocm @{env} -- status", "ocm logs {env} --tail 300 --raw"], + "evidence": ["status after browser automation", "browser plugin errors", "gateway errors"] + } + ] +} diff --git a/src/evaluator.mjs b/src/evaluator.mjs index ed63923..51d5512 100644 --- a/src/evaluator.mjs +++ b/src/evaluator.mjs @@ -78,6 +78,7 @@ export function evaluateRecord(record, scenario, options = {}) { const healthP95Ms = collectHealthP95(record); const soakEvidence = collectSoakEvidence(allResults); const mcpBridgeEvidence = collectMcpBridgeEvidence(allResults); + const browserAutomationEvidence = collectBrowserAutomationEvidence(allResults); const listeningFailures = countListeningFailures(record); const tcpConnectMaxMs = collectTcpConnectMax(record); const timeToListeningMs = collectTimeToListening(record); @@ -294,6 +295,56 @@ export function evaluateRecord(record, scenario, options = {}) { } } + if (browserAutomationEvidence.available) { + checkEvidenceThreshold(violations, "browser", "browserDoctorMs", browserAutomationEvidence.browserDoctorMs, thresholds.browserDoctorMs, "Browser doctor"); + checkEvidenceThreshold(violations, "browser", "browserStartMs", browserAutomationEvidence.browserStartMs, thresholds.browserStartMs, "Browser start"); + checkEvidenceThreshold(violations, "browser", "browserTabsMs", browserAutomationEvidence.browserTabsMs, thresholds.browserTabsMs, "Browser tabs"); + checkEvidenceThreshold(violations, "browser", "browserOpenMs", browserAutomationEvidence.browserOpenMs, thresholds.browserOpenMs, "Browser open"); + checkEvidenceThreshold(violations, "browser", "browserSnapshotMs", browserAutomationEvidence.browserSnapshotMs, thresholds.browserSnapshotMs, "Browser snapshot"); + checkEvidenceThreshold(violations, "browser", "browserStopMs", browserAutomationEvidence.browserStopMs, thresholds.browserStopMs, "Browser stop"); + + if (typeof thresholds.browserTabCountMin === "number" && browserAutomationEvidence.browserTabCount !== null && browserAutomationEvidence.browserTabCount < thresholds.browserTabCountMin) { + violations.push({ + kind: "browser", + metric: "browserTabCountMin", + expected: `>= ${thresholds.browserTabCountMin}`, + actual: browserAutomationEvidence.browserTabCount, + message: `Browser automation saw ${browserAutomationEvidence.browserTabCount} tab(s), below required ${thresholds.browserTabCountMin}` + }); + } + + if (browserAutomationEvidence.browserSnapshotOk === false) { + violations.push({ + kind: "browser", + metric: "browserSnapshotOk", + expected: true, + actual: false, + message: "Browser snapshot command did not complete successfully" + }); + } + + const leakCount = browserAutomationEvidence.browserStopped === false ? 1 : 0; + if (typeof thresholds.browserProcessLeaks === "number" && leakCount > thresholds.browserProcessLeaks) { + violations.push({ + kind: "browser", + metric: "browserProcessLeaks", + expected: `<= ${thresholds.browserProcessLeaks}`, + actual: leakCount, + message: "Browser automation did not stop the managed browser profile cleanly" + }); + } + + if (browserAutomationEvidence.errors.length > 0) { + violations.push({ + kind: "browser", + metric: "browserSmokeErrors", + expected: "0", + actual: browserAutomationEvidence.errors.length, + message: `Browser automation smoke reported ${browserAutomationEvidence.errors.length} error(s): ${browserAutomationEvidence.errors[0]}` + }); + } + } + if (typeof thresholds.rssGrowthMb === "number" && rssGrowthMb !== null && rssGrowthMb > thresholds.rssGrowthMb) { violations.push({ kind: "soak", @@ -568,6 +619,18 @@ export function evaluateRecord(record, scenario, options = {}) { mcpProcessExited: mcpBridgeEvidence.processExited, mcpProcessLeaks: mcpBridgeEvidence.available ? (mcpBridgeEvidence.processExited === false ? 1 : 0) : null, mcpErrors: mcpBridgeEvidence.errors, + browserAutomationEvidence, + browserDoctorMs: browserAutomationEvidence.browserDoctorMs, + browserStartMs: browserAutomationEvidence.browserStartMs, + browserTabsMs: browserAutomationEvidence.browserTabsMs, + browserOpenMs: browserAutomationEvidence.browserOpenMs, + browserSnapshotMs: browserAutomationEvidence.browserSnapshotMs, + browserStopMs: browserAutomationEvidence.browserStopMs, + browserTabCount: browserAutomationEvidence.browserTabCount, + browserSnapshotOk: browserAutomationEvidence.browserSnapshotOk, + browserStopped: browserAutomationEvidence.browserStopped, + browserProcessLeaks: browserAutomationEvidence.available ? (browserAutomationEvidence.browserStopped === false ? 1 : 0) : null, + browserErrors: browserAutomationEvidence.errors, soakDurationMs: soakEvidence.durationMs, soakIterations: soakEvidence.iterations, soakCommandP95Ms: soakEvidence.commandP95Ms, @@ -1734,6 +1797,88 @@ function parseMcpBridgeSmokeOutput(result) { } } +function collectBrowserAutomationEvidence(results) { + const smokes = results + .filter((result) => result.command?.includes("browser-automation-smoke.mjs")) + .map((result) => parseBrowserAutomationSmokeOutput(result)) + .filter(Boolean); + + if (smokes.length === 0) { + return { + schemaVersion: "kova.browserAutomationEvidence.v1", + available: false, + browserDoctorMs: null, + browserStartMs: null, + browserTabsMs: null, + browserOpenMs: null, + browserSnapshotMs: null, + browserStopMs: null, + browserTabCount: null, + browserSnapshotOk: null, + browserStopped: null, + errors: [], + smokes: [] + }; + } + + return { + schemaVersion: "kova.browserAutomationEvidence.v1", + available: true, + browserDoctorMs: maxNullable(...smokes.map((smoke) => smoke.browserDoctorMs)), + browserStartMs: maxNullable(...smokes.map((smoke) => smoke.browserStartMs)), + browserTabsMs: maxNullable(...smokes.map((smoke) => smoke.browserTabsMs)), + browserOpenMs: maxNullable(...smokes.map((smoke) => smoke.browserOpenMs)), + browserSnapshotMs: maxNullable(...smokes.map((smoke) => smoke.browserSnapshotMs)), + browserStopMs: maxNullable(...smokes.map((smoke) => smoke.browserStopMs)), + browserTabCount: maxNullable(...smokes.map((smoke) => smoke.browserTabCount)), + browserSnapshotOk: smokes.every((smoke) => smoke.browserSnapshotOk === true), + browserStopped: smokes.every((smoke) => smoke.browserStopped === true), + errors: smokes.flatMap((smoke) => smoke.errors ?? []), + smokes: smokes.map((smoke) => ({ + durationMs: smoke.durationMs ?? null, + browserDoctorMs: smoke.browserDoctorMs ?? null, + browserStartMs: smoke.browserStartMs ?? null, + browserTabsMs: smoke.browserTabsMs ?? null, + browserOpenMs: smoke.browserOpenMs ?? null, + browserSnapshotMs: smoke.browserSnapshotMs ?? null, + browserStopMs: smoke.browserStopMs ?? null, + browserTabCount: smoke.browserTabCount ?? null, + browserSnapshotOk: smoke.browserSnapshotOk ?? null, + browserStopped: smoke.browserStopped ?? null, + errors: smoke.errors ?? [] + })) + }; +} + +function parseBrowserAutomationSmokeOutput(result) { + const text = result.stdout ?? ""; + const jsonStart = text.indexOf("{"); + if (jsonStart < 0) { + return null; + } + try { + const parsed = JSON.parse(text.slice(jsonStart)); + return parsed?.schemaVersion === "kova.browserAutomationSmoke.v1" ? parsed : null; + } catch { + return null; + } +} + +function checkEvidenceThreshold(violations, kind, metric, actual, threshold, label) { + if (typeof threshold !== "number" || actual === null) { + return; + } + if (actual > threshold) { + violations.push({ + kind, + metric, + expected: `<= ${threshold}`, + actual, + message: `${label} took ${actual}ms, over threshold ${threshold}ms` + }); + } +} + function healthFailureCount(samples) { return samples.filter((sample) => sample && !sample.ok).length; } diff --git a/src/report.mjs b/src/report.mjs index 1ef5e83..95c017f 100644 --- a/src/report.mjs +++ b/src/report.mjs @@ -149,6 +149,9 @@ export function renderMarkdownReport(report) { if (record.measurements.mcpBridgeEvidence?.available) { lines.push(`- MCP bridge: initialize ${record.measurements.mcpInitializeMs ?? "unknown"} ms; tools/list ${record.measurements.mcpToolsListMs ?? "unknown"} ms; tools ${record.measurements.mcpToolCount ?? "unknown"}; shutdown ${record.measurements.mcpShutdownMs ?? "unknown"} ms; exited ${record.measurements.mcpProcessExited ?? "unknown"}`); } + if (record.measurements.browserAutomationEvidence?.available) { + lines.push(`- Browser automation: doctor ${record.measurements.browserDoctorMs ?? "unknown"} ms; start ${record.measurements.browserStartMs ?? "unknown"} ms; open ${record.measurements.browserOpenMs ?? "unknown"} ms; tabs ${record.measurements.browserTabsMs ?? "unknown"} ms; snapshot ${record.measurements.browserSnapshotMs ?? "unknown"} ms; stop ${record.measurements.browserStopMs ?? "unknown"} ms; tabs ${record.measurements.browserTabCount ?? "unknown"}; stopped ${record.measurements.browserStopped ?? "unknown"}`); + } lines.push(`- Provider/model timing: ${record.measurements.providerModelTimingMs ?? "unknown"} ms`); lines.push(`- Agent turn: ${record.measurements.agentTurnMs ?? "unknown"} ms (${record.measurements.agentResponseOk ?? "not-run"})`); if (record.measurements.agentTurnCount > 0) { @@ -847,7 +850,7 @@ export function renderPasteSummary(report) { const roleText = compactRolePeaks(record.measurements).slice(0, 4) .map((role) => `${role.role} ${role.peakRssMb ?? "?"}MB/${role.maxCpuPercent ?? "?"}%`) .join(", ") || "unknown"; - lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; MCP init/tools/shutdown ${record.measurements.mcpInitializeMs ?? "unknown"}/${record.measurements.mcpToolsListMs ?? "unknown"}/${record.measurements.mcpShutdownMs ?? "unknown"}ms; MCP tools ${record.measurements.mcpToolCount ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`); + lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; MCP init/tools/shutdown ${record.measurements.mcpInitializeMs ?? "unknown"}/${record.measurements.mcpToolsListMs ?? "unknown"}/${record.measurements.mcpShutdownMs ?? "unknown"}ms; MCP tools ${record.measurements.mcpToolCount ?? "unknown"}; browser start/open/snapshot ${record.measurements.browserStartMs ?? "unknown"}/${record.measurements.browserOpenMs ?? "unknown"}/${record.measurements.browserSnapshotMs ?? "unknown"}ms; browser tabs ${record.measurements.browserTabCount ?? "unknown"}; browser stopped ${record.measurements.browserStopped ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`); } } else if (record.violations?.length > 0) { if (record.measurements) { diff --git a/src/selfcheck.mjs b/src/selfcheck.mjs index be88903..510f50e 100644 --- a/src/selfcheck.mjs +++ b/src/selfcheck.mjs @@ -193,6 +193,15 @@ export async function runSelfCheck(flags = {}) { "mcp-runtime-role-patterns", "node -e \"const role=require('./process-roles/mcp-runtime.json'); if (role.commandPatterns.includes('mcp') || role.processPatterns.includes('mcp') || role.processPatterns.some((p)=>p.includes('modelcontextprotocol'))) process.exit(1);\"" )); + checks.push(await jsonCommandCheck("browser-automation-dry-run-json", `node bin/kova.mjs run --target runtime:stable --scenario browser-automation-smoke --state fresh --report-dir ${quoteShell(tmp)} --json`, async (data) => { + const report = JSON.parse(await readFile(data.jsonPath, "utf8")); + const record = report.records?.[0]; + assertEqual(record?.surface, "browser-automation", "browser automation surface"); + const commands = record?.phases?.flatMap((phase) => phase.commands ?? []) ?? []; + const browserCommand = commands.find((command) => command.includes("browser-automation-smoke.mjs")) ?? ""; + assertEqual(browserCommand.includes("--artifact-dir '"), true, "browser helper receives quoted artifact dir"); + assertEqual(record?.thresholds?.browserProcessLeaks, 0, "browser process leak threshold"); + })); checks.push(await jsonCommandCheck("diagnostic-profile-plan-json", "node bin/kova.mjs matrix plan --profile diagnostic --target local-build:/tmp/openclaw --include scenario:release-runtime-startup --json", (data) => { assertEqual(data.schemaVersion, "kova.matrix.plan.v1", "diagnostic matrix plan schema"); assertEqual(data.profile?.id, "diagnostic", "diagnostic profile id"); @@ -268,6 +277,7 @@ export async function runSelfCheck(flags = {}) { checks.push(await soakLoopRunnerCheck(tmp)); checks.push(soakTrendEvaluationCheck()); checks.push(mcpBridgeEvidenceEvaluationCheck()); + checks.push(browserAutomationEvidenceEvaluationCheck()); checks.push(await jsonCommandCheck( "dry-run-state-lifecycle-json", `node bin/kova.mjs run --target runtime:stable --scenario fresh-install --state missing-plugin-index --report-dir ${quoteShell(tmp)} --json`, @@ -2333,6 +2343,106 @@ function mcpBridgeEvidenceEvaluationCheck() { } } +function browserAutomationEvidenceEvaluationCheck() { + try { + const smoke = { + schemaVersion: "kova.browserAutomationSmoke.v1", + durationMs: 4200, + browserDoctorMs: 120, + browserStartMs: 1800, + browserTabsMs: 90, + browserOpenMs: 300, + browserSnapshotMs: 250, + browserStopMs: 180, + browserTabCount: 2, + browserSnapshotOk: true, + browserStopped: true, + errors: [] + }; + const record = { + scenario: "browser-automation-smoke", + status: "PASS", + phases: [{ + id: "browser-smoke", + results: [{ + command: "node support/browser-automation-smoke.mjs --env kova-self-check --artifact-dir /tmp/kova", + status: 0, + timedOut: false, + durationMs: 4200, + stdout: JSON.stringify(smoke), + stderr: "" + }], + metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() } + }], + finalMetrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() } + }; + evaluateRecord(record, { + id: "browser-automation-smoke", + thresholds: { + browserDoctorMs: 15000, + browserStartMs: 30000, + browserTabsMs: 10000, + browserOpenMs: 15000, + browserSnapshotMs: 15000, + browserStopMs: 10000, + browserTabCountMin: 1, + browserProcessLeaks: 0 + } + }, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } }); + + assertEqual(record.status, "PASS", "browser automation record status"); + assertEqual(record.measurements.browserStartMs, 1800, "browser start ms"); + assertEqual(record.measurements.browserOpenMs, 300, "browser open ms"); + assertEqual(record.measurements.browserSnapshotMs, 250, "browser snapshot ms"); + assertEqual(record.measurements.browserTabCount, 2, "browser tab count"); + assertEqual(record.measurements.browserProcessLeaks, 0, "browser process leak count"); + + const failed = { + ...record, + status: "PASS", + violations: [], + measurements: undefined, + phases: [{ + id: "browser-smoke", + results: [{ + command: "node support/browser-automation-smoke.mjs --env kova-self-check --artifact-dir /tmp/kova", + status: 0, + timedOut: false, + durationMs: 4200, + stdout: JSON.stringify({ ...smoke, browserStopped: false, errors: ["browser stop failed"] }), + stderr: "" + }], + metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() } + }] + }; + evaluateRecord(failed, { + id: "browser-automation-smoke", + thresholds: { browserProcessLeaks: 0 } + }, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } }); + assertEqual(failed.status, "FAIL", "browser failed stop status"); + assertEqual( + failed.violations.some((violation) => violation.metric === "browserProcessLeaks"), + true, + "browser process leak violation" + ); + + return { + id: "browser-automation-evidence-evaluation", + status: "PASS", + command: "evaluate synthetic browser automation evidence", + durationMs: 0 + }; + } catch (error) { + return { + id: "browser-automation-evidence-evaluation", + status: "FAIL", + command: "evaluate synthetic browser automation evidence", + durationMs: 0, + message: error.message + }; + } +} + function agentColdWarmEvaluationCheck() { try { const coldCommand = "ocm @kova -- agent --local --agent main --session-id kova-agent-cold-warm --message hi --json"; @@ -3087,7 +3197,7 @@ async function resourceRootCommandRoleBoundaryCheck() { async function resourceRolePollutionCheck() { try { const processRoles = await loadProcessRoles(); - const mockProviderCommand = "node support/mock-openai-server.mjs --marker KOVA_AGENT_OK"; + const mockProviderCommand = "node /tmp/kova-browser-automation-smoke/mock-openai-server.mjs --marker KOVA_AGENT_OK"; const mockProviderRoles = classifyRegistryRolesForProcess( { command: `/bin/zsh -lc ${mockProviderCommand}` }, { @@ -3109,6 +3219,7 @@ async function resourceRolePollutionCheck() { assertEqual(mockProviderRoles.includes("mock-provider"), true, "mock provider helper remains classified"); assertEqual(mockProviderRoles.includes("agent-cli"), false, "KOVA_AGENT_OK marker must not imply agent-cli"); assertEqual(mockProviderRoles.includes("agent-process"), false, "KOVA_AGENT_OK marker must not imply agent-process"); + assertEqual(mockProviderRoles.includes("browser-sidecar"), false, "browser env name must not imply browser-sidecar"); assertEqual(envNameRoles.includes("runtime-management"), false, "mcp-runtime env name must not imply runtime-management"); assertEqual(envNameRoles.includes("model-cli"), false, "configure-openclaw fixture helper must not imply model-cli"); return { diff --git a/states/fresh.json b/states/fresh.json index 726398e..25b38a3 100644 --- a/states/fresh.json +++ b/states/fresh.json @@ -22,7 +22,8 @@ "plugin-missing-runtime-deps", "dashboard", "tui", - "mcp-runtime" + "mcp-runtime", + "browser-automation" ], "incompatibleSurfaces": [ "upgrade-existing-user" diff --git a/support/browser-automation-smoke.mjs b/support/browser-automation-smoke.mjs new file mode 100644 index 0000000..d5b9dc9 --- /dev/null +++ b/support/browser-automation-smoke.mjs @@ -0,0 +1,235 @@ +#!/usr/bin/env node + +import { spawn } from "node:child_process"; +import { mkdir, writeFile } from "node:fs/promises"; +import { join } from "node:path"; + +const SCHEMA_VERSION = "kova.browserAutomationSmoke.v1"; + +const args = parseArgs(process.argv.slice(2)); +const envName = requiredArg(args, "env"); +const artifactDir = requiredArg(args, "artifact-dir"); +const timeoutMs = positiveInt(args["timeout-ms"] ?? 45000, "timeout-ms"); +const profile = args.profile ?? "openclaw"; +assertKovaEnvName(envName); + +const startedAtEpochMs = Date.now(); +const summary = { + schemaVersion: SCHEMA_VERSION, + env: envName, + profile, + startedAt: new Date(startedAtEpochMs).toISOString(), + finishedAt: null, + durationMs: null, + browserDoctorMs: null, + browserStartMs: null, + browserTabsMs: null, + browserOpenMs: null, + browserSnapshotMs: null, + browserStopMs: null, + browserTabCount: null, + browserSnapshotOk: false, + browserStopped: false, + commands: [], + errors: [] +}; + +try { + await mkdir(artifactDir, { recursive: true }); + + const doctor = await runBrowserCommand("doctor", ["doctor"], { allowFailure: true }); + summary.browserDoctorMs = doctor.durationMs; + + const start = await runBrowserCommand("start", ["start", "--headless"]); + summary.browserStartMs = start.durationMs; + + const open = await runBrowserCommand("open", ["open", "about:blank", "--label", "kova-smoke"]); + summary.browserOpenMs = open.durationMs; + + const tabs = await runBrowserCommand("tabs", ["tabs"]); + summary.browserTabsMs = tabs.durationMs; + summary.browserTabCount = countTabs(tabs); + + const snapshot = await runBrowserCommand("snapshot", ["snapshot"]); + summary.browserSnapshotMs = snapshot.durationMs; + summary.browserSnapshotOk = snapshot.status === 0; +} catch (error) { + summary.errors.push(formatError(error)); +} finally { + try { + const stop = await runBrowserCommand("stop", ["stop"], { allowFailure: true }); + summary.browserStopMs = stop.durationMs; + summary.browserStopped = stop.status === 0; + } catch (error) { + summary.errors.push(`browser stop failed: ${formatError(error)}`); + } + const finishedAtEpochMs = Date.now(); + summary.finishedAt = new Date(finishedAtEpochMs).toISOString(); + summary.durationMs = finishedAtEpochMs - startedAtEpochMs; + await writeFile(join(artifactDir, "browser-automation-smoke.json"), `${JSON.stringify(summary, null, 2)}\n`, "utf8"); + console.log(JSON.stringify(summary, null, 2)); +} + +process.exit(summary.errors.length === 0 && summary.browserStopped ? 0 : 1); + +async function runBrowserCommand(label, browserArgs, options = {}) { + const result = await runProcess("ocm", [ + `@${envName}`, + "--", + "browser", + "--json", + "--browser-profile", + profile, + ...browserArgs + ], timeoutMs); + const commandRecord = { + label, + args: ["browser", "--json", "--browser-profile", profile, ...browserArgs], + status: result.status, + signal: result.signal, + timedOut: result.timedOut, + durationMs: result.durationMs, + stdoutPath: join(artifactDir, `browser-${safeSegment(label)}.stdout.txt`), + stderrPath: join(artifactDir, `browser-${safeSegment(label)}.stderr.txt`), + stdoutSnippet: result.stdout.slice(0, 4000), + stderrSnippet: result.stderr.slice(0, 4000), + parsed: parseJsonOutput(result.stdout) + }; + await writeFile(commandRecord.stdoutPath, result.stdout, "utf8"); + await writeFile(commandRecord.stderrPath, result.stderr, "utf8"); + summary.commands.push(commandRecord); + + if (!options.allowFailure && result.status !== 0) { + throw new Error(`${label} failed: ${firstLine(result.stderr) || firstLine(result.stdout) || result.status}`); + } + return commandRecord; +} + +function runProcess(command, values, commandTimeoutMs) { + const started = Date.now(); + return new Promise((resolve) => { + const child = spawn(command, values, { stdio: ["ignore", "pipe", "pipe"], env: process.env }); + let stdout = ""; + let stderr = ""; + let timedOut = false; + const timer = setTimeout(() => { + timedOut = true; + child.kill("SIGTERM"); + setTimeout(() => child.kill("SIGKILL"), 3000).unref(); + }, commandTimeoutMs); + child.stdout.on("data", (chunk) => { + stdout += chunk.toString("utf8"); + }); + child.stderr.on("data", (chunk) => { + stderr += chunk.toString("utf8"); + }); + child.on("error", (error) => { + clearTimeout(timer); + resolve({ + status: 127, + signal: null, + timedOut, + durationMs: Date.now() - started, + stdout, + stderr: error.message + }); + }); + child.on("close", (status, signal) => { + clearTimeout(timer); + resolve({ + status: timedOut ? 124 : (status ?? 1), + signal, + timedOut, + durationMs: Date.now() - started, + stdout, + stderr + }); + }); + }); +} + +function countTabs(result) { + const parsed = result.parsed; + if (Array.isArray(parsed)) { + return parsed.length; + } + if (Array.isArray(parsed?.tabs)) { + return parsed.tabs.length; + } + const text = `${result.stdoutSnippet}\n${result.stderrSnippet}`; + const matches = text.match(/\bt\d+\b/g); + return matches ? new Set(matches).size : null; +} + +function parseJsonOutput(text) { + const trimmed = String(text ?? "").trim(); + if (!trimmed) { + return null; + } + try { + return JSON.parse(trimmed); + } catch { + const start = trimmed.indexOf("{"); + const end = trimmed.lastIndexOf("}"); + if (start >= 0 && end > start) { + try { + return JSON.parse(trimmed.slice(start, end + 1)); + } catch { + return null; + } + } + return null; + } +} + +function parseArgs(values) { + const parsed = {}; + for (let index = 0; index < values.length; index += 1) { + const value = values[index]; + if (!value.startsWith("--")) { + throw new Error(`unexpected positional argument '${value}'`); + } + const key = value.slice(2); + const next = values[index + 1]; + if (!next || next.startsWith("--")) { + throw new Error(`missing value for --${key}`); + } + parsed[key] = next; + index += 1; + } + return parsed; +} + +function requiredArg(values, key) { + const value = values[key]; + if (typeof value !== "string" || value.length === 0) { + throw new Error(`missing --${key}`); + } + return value; +} + +function positiveInt(value, key) { + const number = Number(value); + if (!Number.isInteger(number) || number <= 0) { + throw new Error(`--${key} must be a positive integer`); + } + return number; +} + +function assertKovaEnvName(value) { + if (!/^kova-[A-Za-z0-9][A-Za-z0-9._-]*$/.test(value)) { + throw new Error(`unsafe Kova env name '${value}'`); + } +} + +function safeSegment(value) { + return String(value ?? "command").toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "") || "command"; +} + +function firstLine(value) { + return String(value ?? "").trim().split(/\r?\n/)[0] ?? ""; +} + +function formatError(error) { + return error instanceof Error ? error.message : String(error); +} diff --git a/surfaces/browser-automation.json b/surfaces/browser-automation.json new file mode 100644 index 0000000..5f6c236 --- /dev/null +++ b/surfaces/browser-automation.json @@ -0,0 +1,45 @@ +{ + "id": "browser-automation", + "title": "Browser Automation", + "ownerArea": "browser-runtime", + "description": "Start OpenClaw's browser control surface, open a tab, inspect browser state, and shut the browser profile down cleanly.", + "requiredStates": ["fresh"], + "targetKinds": ["npm", "channel", "runtime", "local-build"], + "requiredMetrics": [ + "gatewayReadyMs", + "statusMs", + "browserDoctorMs", + "browserStartMs", + "browserTabsMs", + "browserOpenMs", + "browserSnapshotMs", + "browserStopMs", + "browserTabCount", + "browserProcessLeaks", + "pluginLoadFailures" + ], + "processRoles": ["gateway", "gateway-tree", "command-tree", "browser-sidecar", "status-cli"], + "thresholds": { + "gatewayReadyMs": 30000, + "statusMs": 10000, + "browserDoctorMs": 15000, + "browserStartMs": 30000, + "browserTabsMs": 10000, + "browserOpenMs": 15000, + "browserSnapshotMs": 15000, + "browserStopMs": 10000, + "browserTabCountMin": 1, + "browserProcessLeaks": 0, + "pluginLoadFailures": 0, + "peakRssMb": 1100 + }, + "roleThresholds": { + "gateway": { "peakRssMb": 900, "maxCpuPercent": 250 }, + "browser-sidecar": { "peakRssMb": 700, "maxCpuPercent": 250 }, + "status-cli": { "peakRssMb": 700, "maxCpuPercent": 250 } + }, + "diagnostics": { + "timelineRequiredForSourceBuild": false, + "expectedSpans": ["browser.start", "browser.tabs", "browser.snapshot", "runtimeDeps.stage"] + } +}