feat: add media understanding timeout scenario

This commit is contained in:
Shakker 2026-05-01 12:36:39 +01:00
parent 2f53b83de4
commit acadaf2bad
No known key found for this signature in database
11 changed files with 696 additions and 10 deletions

View File

@ -35,10 +35,15 @@
"gatewayResponsive",
"gatewayRssGrowthMb",
"gatewaySurvives",
"healthFailures",
"healthMs",
"healthP95Ms",
"inputLagMs",
"maxCpuPercent",
"mediaDescribeMs",
"mediaGatewayStatusWorks",
"mediaStatusAfterTimeoutMs",
"mediaTimeoutObserved",
"missingDependencyErrors",
"modelsListMs",
"mcpInitializeMs",

View File

@ -38,6 +38,7 @@
{ "scenario": "tui-responsiveness", "state": "fresh" },
{ "scenario": "mcp-runtime-start-stop", "state": "fresh" },
{ "scenario": "browser-automation-smoke", "state": "fresh", "timeoutMs": 180000 },
{ "scenario": "media-understanding-timeout", "state": "fresh", "timeoutMs": 180000 },
{ "scenario": "gateway-performance", "state": "many-bundled-plugins" },
{ "scenario": "gateway-performance", "state": "gateway-already-running" },
{ "scenario": "gateway-performance", "state": "stale-service-state" },

View File

@ -104,6 +104,16 @@
"statusMs": 10000,
"pluginLoadFailures": 0
}
},
"media-understanding": {
"thresholds": {
"mediaDescribeMs": 10000,
"mediaTimeoutObserved": 1,
"mediaStatusAfterTimeoutMs": 10000,
"providerRequestCountMin": 1,
"statusMs": 10000,
"pluginLoadFailures": 0
}
}
}
},
@ -152,6 +162,7 @@
"workspace-scan:large-workspace",
"mcp-runtime:fresh",
"browser-automation:fresh",
"media-understanding:fresh",
"cross-platform-smoke:slow-filesystem"
]
},
@ -174,7 +185,7 @@
"tui",
"gateway-performance"
],
"warning": ["failure-containment", "soak", "workspace-scan", "mcp-runtime", "browser-automation", "cross-platform-smoke"]
"warning": ["failure-containment", "soak", "workspace-scan", "mcp-runtime", "browser-automation", "media-understanding", "cross-platform-smoke"]
},
"scenarios": {
"blocking": [
@ -193,7 +204,8 @@
"warning": [
"workspace-scan-pressure",
"mcp-runtime-start-stop",
"browser-automation-smoke"
"browser-automation-smoke",
"media-understanding-timeout"
]
}
},
@ -233,6 +245,7 @@
{ "scenario": "workspace-scan-pressure", "state": "large-workspace" },
{ "scenario": "mcp-runtime-start-stop", "state": "fresh" },
{ "scenario": "browser-automation-smoke", "state": "fresh", "timeoutMs": 180000 },
{ "scenario": "media-understanding-timeout", "state": "fresh", "timeoutMs": 180000 },
{ "scenario": "cross-platform-smoke", "state": "slow-filesystem" }
]
},
@ -384,6 +397,11 @@
"state": "fresh",
"timeoutMs": 180000
},
{
"scenario": "media-understanding-timeout",
"state": "fresh",
"timeoutMs": 180000
},
{
"scenario": "cross-platform-smoke",
"state": "slow-filesystem"

View File

@ -0,0 +1,48 @@
{
"id": "media-understanding-timeout",
"surface": "media-understanding",
"title": "Media Understanding Timeout",
"objective": "Run OpenClaw's packaged image media-understanding capability against a deterministic mock provider timeout and prove the command fails quickly while the gateway remains healthy.",
"tags": ["media", "image", "timeout", "provider", "gateway", "capability-cli"],
"timeoutMs": 180000,
"mockProvider": {
"mode": "timeout",
"stallMs": 65000
},
"thresholds": {
"gatewayReadyMs": 30000,
"mediaDescribeMs": 10000,
"mediaTimeoutObserved": 1,
"mediaStatusAfterTimeoutMs": 10000,
"providerRequestCountMin": 1,
"statusMs": 10000,
"peakRssMb": 900,
"missingDependencyErrors": 0,
"pluginLoadFailures": 0
},
"phases": [
{
"id": "provision",
"title": "Provision Media Env",
"intent": "Start a disposable OpenClaw gateway before wiring mock auth and running media understanding.",
"commands": ["ocm start {env} {startSelector} --json"],
"evidence": ["gateway port", "runtime binding", "startup readiness"]
},
{
"id": "media-timeout",
"title": "Image Understanding Timeout",
"intent": "Run OpenClaw capability image describe through the real packaged media-understanding CLI with a provider timeout.",
"commands": [
"node {kovaRoot}/support/media-understanding-timeout.mjs --env {env} --artifact-dir {artifactDir} --timeout-ms 1200 --max-command-ms 45000"
],
"evidence": ["image describe command duration", "provider timeout observed", "gateway status after timeout", "mock provider request log"]
},
{
"id": "post-media-health",
"title": "Post-Media Gateway Health",
"intent": "Verify the gateway remains responsive and collect logs after the media timeout path.",
"commands": ["ocm @{env} -- status", "ocm logs {env} --tail 300 --raw"],
"evidence": ["gateway status", "provider timeout logs", "plugin errors", "memory after media timeout"]
}
]
}

View File

@ -79,6 +79,7 @@ export function evaluateRecord(record, scenario, options = {}) {
const soakEvidence = collectSoakEvidence(allResults);
const mcpBridgeEvidence = collectMcpBridgeEvidence(allResults);
const browserAutomationEvidence = collectBrowserAutomationEvidence(allResults);
const mediaUnderstandingEvidence = collectMediaUnderstandingEvidence(allResults);
const listeningFailures = countListeningFailures(record);
const tcpConnectMaxMs = collectTcpConnectMax(record);
const timeToListeningMs = collectTimeToListening(record);
@ -345,6 +346,64 @@ export function evaluateRecord(record, scenario, options = {}) {
}
}
if (mediaUnderstandingEvidence.available) {
checkEvidenceThreshold(violations, "media-understanding", "mediaDescribeMs", mediaUnderstandingEvidence.mediaDescribeMs, thresholds.mediaDescribeMs, "Media understanding image describe");
checkEvidenceThreshold(violations, "media-understanding", "mediaStatusAfterTimeoutMs", mediaUnderstandingEvidence.mediaStatusAfterTimeoutMs, thresholds.mediaStatusAfterTimeoutMs, "Post-media status");
if (typeof thresholds.mediaTimeoutObserved === "number" && mediaUnderstandingEvidence.mediaTimeoutObserved !== true) {
violations.push({
kind: "media-understanding",
metric: "mediaTimeoutObserved",
expected: true,
actual: mediaUnderstandingEvidence.mediaTimeoutObserved,
message: "Media understanding provider timeout was not observed as a bounded command failure"
});
}
if (mediaUnderstandingEvidence.mediaCommandTimedOut === true) {
violations.push({
kind: "media-understanding",
metric: "mediaCommandTimedOut",
expected: false,
actual: true,
message: "Media understanding command hit Kova's outer timeout instead of OpenClaw's provider timeout"
});
}
if (mediaUnderstandingEvidence.gatewayStatusWorks === false) {
violations.push({
kind: "media-understanding",
metric: "mediaGatewayStatusWorks",
expected: true,
actual: false,
message: "Gateway status did not work after media understanding timeout"
});
}
if (mediaUnderstandingEvidence.errors.length > 0) {
violations.push({
kind: "media-understanding",
metric: "mediaUnderstandingErrors",
expected: "0",
actual: mediaUnderstandingEvidence.errors.length,
message: `Media understanding timeout smoke reported ${mediaUnderstandingEvidence.errors.length} error(s): ${mediaUnderstandingEvidence.errors[0]}`
});
}
}
if (typeof thresholds.providerRequestCountMin === "number") {
const requestCount = record.providerEvidence?.requestCount ?? 0;
if (requestCount < thresholds.providerRequestCountMin) {
violations.push({
kind: "provider",
metric: "providerRequestCountMin",
expected: `>= ${thresholds.providerRequestCountMin}`,
actual: requestCount,
message: `Provider saw ${requestCount} request(s), below required ${thresholds.providerRequestCountMin}`
});
}
}
if (typeof thresholds.rssGrowthMb === "number" && rssGrowthMb !== null && rssGrowthMb > thresholds.rssGrowthMb) {
violations.push({
kind: "soak",
@ -631,6 +690,13 @@ export function evaluateRecord(record, scenario, options = {}) {
browserStopped: browserAutomationEvidence.browserStopped,
browserProcessLeaks: browserAutomationEvidence.available ? (browserAutomationEvidence.browserStopped === false ? 1 : 0) : null,
browserErrors: browserAutomationEvidence.errors,
mediaUnderstandingEvidence,
mediaDescribeMs: mediaUnderstandingEvidence.mediaDescribeMs,
mediaTimeoutObserved: mediaUnderstandingEvidence.mediaTimeoutObserved,
mediaCommandTimedOut: mediaUnderstandingEvidence.mediaCommandTimedOut,
mediaStatusAfterTimeoutMs: mediaUnderstandingEvidence.mediaStatusAfterTimeoutMs,
mediaGatewayStatusWorks: mediaUnderstandingEvidence.gatewayStatusWorks,
mediaErrors: mediaUnderstandingEvidence.errors,
soakDurationMs: soakEvidence.durationMs,
soakIterations: soakEvidence.iterations,
soakCommandP95Ms: soakEvidence.commandP95Ms,
@ -1020,7 +1086,7 @@ function checkAgentTurnCorrectness(violations, turns, expectedText) {
function evaluateProviderSimulation({ turns, scenario, record, thresholds }) {
const mode = scenario.mockProvider?.mode ?? "normal";
const expected = mode !== "normal";
const expected = mode !== "normal" && scenario.agent !== undefined;
const issue = classifyProviderIssue(turns);
const expectedFailureTurns = turns.filter((turn) => turn.expectedFailure === true);
const normalTurns = turns.filter((turn) => turn.expectedFailure !== true);
@ -1146,35 +1212,35 @@ function checkProviderSimulation(violations, simulation) {
function buildAgentFailureFixerSummary(latencyDiagnosis, cleanupDiagnosis, providerSimulation, containment) {
const items = [];
if (providerSimulation?.mode === "timeout" || providerSimulation?.observedIssue === "provider-timeout") {
if (providerSimulation?.expected === true && (providerSimulation.mode === "timeout" || providerSimulation.observedIssue === "provider-timeout")) {
items.push({
kind: "provider-timeout",
summary: "Provider timed out; verify OpenClaw surfaces the timeout clearly, cancels the turn, and leaves the gateway responsive.",
likelyOwner: "provider / agent timeout handling"
});
}
if (providerSimulation?.mode === "streaming-stall" || providerSimulation?.observedIssue === "streaming-stall") {
if (providerSimulation?.expected === true && (providerSimulation.mode === "streaming-stall" || providerSimulation.observedIssue === "streaming-stall")) {
items.push({
kind: "streaming-stall",
summary: "Provider stream stalled; verify OpenClaw applies stream idle timeouts and does not freeze gateway/TUI/dashboard.",
likelyOwner: "provider streaming / agent turn cancellation"
});
}
if (providerSimulation?.mode === "malformed" || providerSimulation?.observedIssue === "malformed-response") {
if (providerSimulation?.expected === true && (providerSimulation.mode === "malformed" || providerSimulation.observedIssue === "malformed-response")) {
items.push({
kind: "malformed-response",
summary: "Provider returned malformed output; verify OpenClaw reports a clear provider parse error and keeps the session usable.",
likelyOwner: "provider response parsing"
});
}
if (providerSimulation?.recoveryOk === true) {
if (providerSimulation?.expected === true && providerSimulation.recoveryOk === true) {
items.push({
kind: "provider-recovered",
summary: "Provider failed and later recovered; verify retry/recovery behavior is intentional and latency remains acceptable.",
likelyOwner: "provider retry / agent recovery"
});
}
if (providerSimulation?.mode === "concurrent-pressure") {
if (providerSimulation?.expected === true && providerSimulation.mode === "concurrent-pressure") {
items.push({
kind: "provider-concurrent-pressure",
summary: `Concurrent provider pressure produced ${providerSimulation.providerRequestCount ?? "unknown"} provider request(s), max in-flight ${providerSimulation.providerMaxConcurrency ?? "unknown"}; verify OpenClaw keeps gateway and agent sessions responsive under overlapping turns.`,
@ -1864,6 +1930,62 @@ function parseBrowserAutomationSmokeOutput(result) {
}
}
function collectMediaUnderstandingEvidence(results) {
const smokes = results
.filter((result) => result.command?.includes("media-understanding-timeout.mjs"))
.map((result) => parseMediaUnderstandingTimeoutOutput(result))
.filter(Boolean);
if (smokes.length === 0) {
return {
schemaVersion: "kova.mediaUnderstandingEvidence.v1",
available: false,
mediaDescribeMs: null,
mediaTimeoutObserved: null,
mediaCommandTimedOut: null,
mediaStatusAfterTimeoutMs: null,
gatewayStatusWorks: null,
errors: [],
smokes: []
};
}
return {
schemaVersion: "kova.mediaUnderstandingEvidence.v1",
available: true,
mediaDescribeMs: maxNullable(...smokes.map((smoke) => smoke.mediaDescribeMs)),
mediaTimeoutObserved: smokes.every((smoke) => smoke.mediaTimeoutObserved === true),
mediaCommandTimedOut: smokes.some((smoke) => smoke.mediaCommandTimedOut === true),
mediaStatusAfterTimeoutMs: maxNullable(...smokes.map((smoke) => smoke.mediaStatusAfterTimeoutMs)),
gatewayStatusWorks: smokes.every((smoke) => smoke.gatewayStatusWorks === true),
errors: smokes.flatMap((smoke) => smoke.errors ?? []),
smokes: smokes.map((smoke) => ({
durationMs: smoke.durationMs ?? null,
mediaDescribeMs: smoke.mediaDescribeMs ?? null,
mediaTimeoutObserved: smoke.mediaTimeoutObserved ?? null,
mediaCommandTimedOut: smoke.mediaCommandTimedOut ?? null,
mediaCommandStatus: smoke.mediaCommandStatus ?? null,
mediaStatusAfterTimeoutMs: smoke.mediaStatusAfterTimeoutMs ?? null,
gatewayStatusWorks: smoke.gatewayStatusWorks ?? null,
errors: smoke.errors ?? []
}))
};
}
function parseMediaUnderstandingTimeoutOutput(result) {
const text = result.stdout ?? "";
const jsonStart = text.indexOf("{");
if (jsonStart < 0) {
return null;
}
try {
const parsed = JSON.parse(text.slice(jsonStart));
return parsed?.schemaVersion === "kova.mediaUnderstandingTimeout.v1" ? parsed : null;
} catch {
return null;
}
}
function checkEvidenceThreshold(violations, kind, metric, actual, threshold, label) {
if (typeof threshold !== "number" || actual === null) {
return;

View File

@ -152,6 +152,9 @@ export function renderMarkdownReport(report) {
if (record.measurements.browserAutomationEvidence?.available) {
lines.push(`- Browser automation: doctor ${record.measurements.browserDoctorMs ?? "unknown"} ms; start ${record.measurements.browserStartMs ?? "unknown"} ms; open ${record.measurements.browserOpenMs ?? "unknown"} ms; tabs ${record.measurements.browserTabsMs ?? "unknown"} ms; snapshot ${record.measurements.browserSnapshotMs ?? "unknown"} ms; stop ${record.measurements.browserStopMs ?? "unknown"} ms; tabs ${record.measurements.browserTabCount ?? "unknown"}; stopped ${record.measurements.browserStopped ?? "unknown"}`);
}
if (record.measurements.mediaUnderstandingEvidence?.available) {
lines.push(`- Media understanding: describe ${record.measurements.mediaDescribeMs ?? "unknown"} ms; timeout observed ${record.measurements.mediaTimeoutObserved ?? "unknown"}; command outer timeout ${record.measurements.mediaCommandTimedOut ?? "unknown"}; status after timeout ${record.measurements.mediaStatusAfterTimeoutMs ?? "unknown"} ms; gateway status ${record.measurements.mediaGatewayStatusWorks ?? "unknown"}`);
}
lines.push(`- Provider/model timing: ${record.measurements.providerModelTimingMs ?? "unknown"} ms`);
lines.push(`- Agent turn: ${record.measurements.agentTurnMs ?? "unknown"} ms (${record.measurements.agentResponseOk ?? "not-run"})`);
if (record.measurements.agentTurnCount > 0) {
@ -702,6 +705,12 @@ function summarizeMeasurements(measurements) {
soakHealthFailures: measurements.soakHealthFailures ?? null,
rssGrowthMb: measurements.rssGrowthMb ?? null,
gatewayRssGrowthMb: measurements.gatewayRssGrowthMb ?? null,
mediaUnderstandingEvidence: measurements.mediaUnderstandingEvidence ?? null,
mediaDescribeMs: measurements.mediaDescribeMs ?? null,
mediaTimeoutObserved: measurements.mediaTimeoutObserved ?? null,
mediaCommandTimedOut: measurements.mediaCommandTimedOut ?? null,
mediaStatusAfterTimeoutMs: measurements.mediaStatusAfterTimeoutMs ?? null,
mediaGatewayStatusWorks: measurements.mediaGatewayStatusWorks ?? null,
resourceTrend: measurements.resourceTrend ?? null,
profilingEnabled: measurements.profilingEnabled ?? null,
profilingResourceInterpretation: measurements.profilingResourceInterpretation ?? null,
@ -850,7 +859,7 @@ export function renderPasteSummary(report) {
const roleText = compactRolePeaks(record.measurements).slice(0, 4)
.map((role) => `${role.role} ${role.peakRssMb ?? "?"}MB/${role.maxCpuPercent ?? "?"}%`)
.join(", ") || "unknown";
lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; MCP init/tools/shutdown ${record.measurements.mcpInitializeMs ?? "unknown"}/${record.measurements.mcpToolsListMs ?? "unknown"}/${record.measurements.mcpShutdownMs ?? "unknown"}ms; MCP tools ${record.measurements.mcpToolCount ?? "unknown"}; browser start/open/snapshot ${record.measurements.browserStartMs ?? "unknown"}/${record.measurements.browserOpenMs ?? "unknown"}/${record.measurements.browserSnapshotMs ?? "unknown"}ms; browser tabs ${record.measurements.browserTabCount ?? "unknown"}; browser stopped ${record.measurements.browserStopped ?? "unknown"}; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`);
lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; cold/warm ${record.measurements.coldAgentTurnMs ?? "unknown"}/${record.measurements.warmAgentTurnMs ?? "unknown"}ms; cold-warm delta ${record.measurements.agentColdWarmDeltaMs ?? "unknown"}ms; pre-provider ${record.measurements.agentPreProviderMs ?? "unknown"}ms; provider work ${record.measurements.agentProviderFinalMs ?? "unknown"}ms; cleanup max ${record.measurements.agentCleanupMaxMs ?? "unknown"}ms; diagnosis ${record.measurements.agentLatencyDiagnosis?.kind ?? "unknown"}; cleanup diagnosis ${record.measurements.agentCleanupDiagnosis?.kind ?? "none"}; provider simulation ${record.measurements.agentProviderMode ?? "normal"}/${record.measurements.agentProviderIssue ?? "none"} containment ${record.measurements.agentProviderContainmentOk ?? "n/a"} recovery ${record.measurements.agentProviderRecoveryOk ?? "n/a"}; agent process leaks ${record.measurements.agentProcessLeakCount ?? "unknown"}; MCP init/tools/shutdown ${record.measurements.mcpInitializeMs ?? "unknown"}/${record.measurements.mcpToolsListMs ?? "unknown"}/${record.measurements.mcpShutdownMs ?? "unknown"}ms; MCP tools ${record.measurements.mcpToolCount ?? "unknown"}; browser start/open/snapshot ${record.measurements.browserStartMs ?? "unknown"}/${record.measurements.browserOpenMs ?? "unknown"}/${record.measurements.browserSnapshotMs ?? "unknown"}ms; browser tabs ${record.measurements.browserTabCount ?? "unknown"}; browser stopped ${record.measurements.browserStopped ?? "unknown"}; media describe ${record.measurements.mediaDescribeMs ?? "unknown"}ms; media timeout ${record.measurements.mediaTimeoutObserved ?? "unknown"}; media status ${record.measurements.mediaStatusAfterTimeoutMs ?? "unknown"}ms; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}; warm runtime deps restages ${record.measurements.warmRuntimeDepsRestageCount ?? "unknown"}; warm reuse ${record.measurements.runtimeDepsWarmReuseOk ?? "unknown"}.`);
}
} else if (record.violations?.length > 0) {
if (record.measurements) {

View File

@ -202,6 +202,19 @@ export async function runSelfCheck(flags = {}) {
assertEqual(browserCommand.includes("--artifact-dir '"), true, "browser helper receives quoted artifact dir");
assertEqual(record?.thresholds?.browserProcessLeaks, 0, "browser process leak threshold");
}));
checks.push(await jsonCommandCheck("media-understanding-dry-run-json", `node bin/kova.mjs run --target runtime:stable --scenario media-understanding-timeout --state fresh --report-dir ${quoteShell(tmp)} --json`, async (data) => {
const report = JSON.parse(await readFile(data.jsonPath, "utf8"));
const record = report.records?.[0];
assertEqual(record?.surface, "media-understanding", "media understanding surface");
const commands = record?.phases?.flatMap((phase) => phase.commands ?? []) ?? [];
const mediaCommand = commands.find((command) => command.includes("media-understanding-timeout.mjs")) ?? "";
assertEqual(mediaCommand.includes("--artifact-dir '"), true, "media helper receives quoted artifact dir");
assertEqual(mediaCommand.includes("--timeout-ms 1200"), true, "media helper receives provider timeout");
assertEqual(mediaCommand.includes("--max-command-ms 45000"), true, "media helper allows cold CLI evidence before outer timeout");
assertEqual(record?.auth?.mockProvider?.mode, "timeout", "media scenario mock timeout mode");
assertEqual(record?.thresholds?.mediaTimeoutObserved, 1, "media timeout threshold");
assertEqual(record?.thresholds?.providerRequestCountMin, 1, "media provider request threshold");
}));
checks.push(await jsonCommandCheck("diagnostic-profile-plan-json", "node bin/kova.mjs matrix plan --profile diagnostic --target local-build:/tmp/openclaw --include scenario:release-runtime-startup --json", (data) => {
assertEqual(data.schemaVersion, "kova.matrix.plan.v1", "diagnostic matrix plan schema");
assertEqual(data.profile?.id, "diagnostic", "diagnostic profile id");
@ -278,6 +291,7 @@ export async function runSelfCheck(flags = {}) {
checks.push(soakTrendEvaluationCheck());
checks.push(mcpBridgeEvidenceEvaluationCheck());
checks.push(browserAutomationEvidenceEvaluationCheck());
checks.push(mediaUnderstandingEvidenceEvaluationCheck());
checks.push(await jsonCommandCheck(
"dry-run-state-lifecycle-json",
`node bin/kova.mjs run --target runtime:stable --scenario fresh-install --state missing-plugin-index --report-dir ${quoteShell(tmp)} --json`,
@ -2443,6 +2457,116 @@ function browserAutomationEvidenceEvaluationCheck() {
}
}
function mediaUnderstandingEvidenceEvaluationCheck() {
try {
const smoke = {
schemaVersion: "kova.mediaUnderstandingTimeout.v1",
ok: true,
durationMs: 1600,
mediaDescribeMs: 1250,
mediaTimeoutObserved: true,
mediaCommandTimedOut: false,
mediaCommandStatus: 1,
mediaStatusAfterTimeoutMs: 180,
gatewayStatusWorks: true,
errors: []
};
const record = {
scenario: "media-understanding-timeout",
status: "PASS",
providerEvidence: { requestCount: 1 },
phases: [{
id: "media-timeout",
results: [{
command: "node support/media-understanding-timeout.mjs --env kova-self-check --artifact-dir /tmp/kova",
status: 0,
timedOut: false,
durationMs: 1600,
stdout: JSON.stringify(smoke),
stderr: ""
}],
metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
}],
finalMetrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
};
evaluateRecord(record, {
id: "media-understanding-timeout",
thresholds: {
mediaDescribeMs: 10000,
mediaTimeoutObserved: 1,
mediaStatusAfterTimeoutMs: 10000,
providerRequestCountMin: 1
}
}, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } });
assertEqual(record.status, "PASS", "media understanding record status");
assertEqual(record.measurements.mediaDescribeMs, 1250, "media describe ms");
assertEqual(record.measurements.mediaTimeoutObserved, true, "media timeout observed");
assertEqual(record.measurements.mediaCommandTimedOut, false, "media command did not hit outer timeout");
assertEqual(record.measurements.mediaStatusAfterTimeoutMs, 180, "post-media status ms");
assertEqual(record.measurements.mediaGatewayStatusWorks, true, "gateway status after media timeout");
const failed = {
...record,
status: "PASS",
providerEvidence: { requestCount: 0 },
violations: [],
measurements: undefined,
phases: [{
id: "media-timeout",
results: [{
command: "node support/media-understanding-timeout.mjs --env kova-self-check --artifact-dir /tmp/kova",
status: 0,
timedOut: false,
durationMs: 1600,
stdout: JSON.stringify({
...smoke,
ok: false,
mediaTimeoutObserved: false,
gatewayStatusWorks: false,
errors: ["media timeout not observed"]
}),
stderr: ""
}],
metrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
}]
};
evaluateRecord(failed, {
id: "media-understanding-timeout",
thresholds: {
mediaTimeoutObserved: 1,
providerRequestCountMin: 1
}
}, { surface: { thresholds: {} }, targetPlan: { kind: "npm" } });
assertEqual(failed.status, "FAIL", "media failure status");
assertEqual(
failed.violations.some((violation) => violation.metric === "mediaTimeoutObserved"),
true,
"media timeout observed violation"
);
assertEqual(
failed.violations.some((violation) => violation.metric === "providerRequestCountMin"),
true,
"media provider request count violation"
);
return {
id: "media-understanding-evidence-evaluation",
status: "PASS",
command: "evaluate synthetic media understanding timeout evidence",
durationMs: 0
};
} catch (error) {
return {
id: "media-understanding-evidence-evaluation",
status: "FAIL",
command: "evaluate synthetic media understanding timeout evidence",
durationMs: 0,
message: error.message
};
}
}
function agentColdWarmEvaluationCheck() {
try {
const coldCommand = "ocm @kova -- agent --local --agent main --session-id kova-agent-cold-warm --message hi --json";

View File

@ -23,7 +23,8 @@
"dashboard",
"tui",
"mcp-runtime",
"browser-automation"
"browser-automation",
"media-understanding"
],
"incompatibleSurfaces": [
"upgrade-existing-user"

View File

@ -0,0 +1,105 @@
#!/usr/bin/env node
import fs from "node:fs";
import path from "node:path";
const options = parseArgs(process.argv.slice(2));
const timeoutMs = positiveInteger(options.timeoutMs ?? "1200", "--timeout-ms");
const timeoutSeconds = Math.max(1, Math.ceil(timeoutMs / 1000));
const stateDir = process.env.OPENCLAW_STATE_DIR || path.join(requiredEnv("OPENCLAW_HOME"), ".openclaw");
const configPath = process.env.OPENCLAW_CONFIG_PATH || path.join(stateDir, "openclaw.json");
let config = {};
try {
config = JSON.parse(fs.readFileSync(configPath, "utf8"));
} catch {
config = {};
}
const openaiProvider = config.models?.providers?.openai || {};
const models = Array.isArray(openaiProvider.models)
? openaiProvider.models.filter((model) => model?.id !== "gpt-5.5")
: [];
config.models = {
...(config.models || {}),
mode: "merge",
providers: {
...(config.models?.providers || {}),
openai: {
...openaiProvider,
models: [
...models,
{
id: "gpt-5.5",
name: "gpt-5.5",
api: "openai-responses",
reasoning: false,
input: ["text", "image"],
contextWindow: 128000,
contextTokens: 96000,
maxTokens: 4096
}
]
}
}
};
config.tools = {
...(config.tools || {}),
media: {
...(config.tools?.media || {}),
image: {
...(config.tools?.media?.image || {}),
enabled: true,
timeoutSeconds,
models: [
{
provider: "openai",
model: "gpt-5.5",
capabilities: ["image"],
timeoutSeconds
}
]
}
}
};
fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");
console.log(configPath);
function parseArgs(args) {
const parsed = {};
for (let index = 0; index < args.length; index += 1) {
const arg = args[index];
if (!arg.startsWith("--")) {
throw new Error(`unexpected argument: ${arg}`);
}
const key = arg.slice(2).replaceAll("-", "");
const value = args[index + 1];
if (!value || value.startsWith("--")) {
throw new Error(`${arg} requires a value`);
}
parsed[key] = value;
index += 1;
}
return {
timeoutMs: parsed.timeoutms
};
}
function positiveInteger(value, flag) {
const number = Number(value);
if (!Number.isInteger(number) || number <= 0) {
throw new Error(`${flag} must be a positive integer`);
}
return number;
}
function requiredEnv(name) {
const value = process.env[name];
if (!value) {
throw new Error(`${name} is required`);
}
return value;
}

View File

@ -0,0 +1,220 @@
#!/usr/bin/env node
import { spawn } from "node:child_process";
import fs from "node:fs/promises";
import path from "node:path";
import { fileURLToPath } from "node:url";
const options = parseArgs(process.argv.slice(2));
const envName = requiredString(options.env, "--env");
const artifactDir = requiredString(options.artifactDir, "--artifact-dir");
const timeoutMs = positiveInteger(options.timeoutMs ?? "1200", "--timeout-ms");
const maxCommandMs = positiveInteger(options.maxCommandMs ?? "10000", "--max-command-ms");
const expectedPattern = /timeout|timed out|abort|aborted|deadline/i;
if (!/^kova-[a-z0-9][a-z0-9-]*$/i.test(envName)) {
failUsage(`refusing to run media understanding smoke against non-Kova env: ${JSON.stringify(envName)}`);
}
await fs.mkdir(artifactDir, { recursive: true });
const mediaDir = path.join(artifactDir, "media-understanding");
await fs.mkdir(mediaDir, { recursive: true });
const imagePath = path.join(mediaDir, "kova-timeout.png");
await fs.writeFile(imagePath, Buffer.from(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=",
"base64"
));
const configure = await run("ocm", [
"env",
"exec",
envName,
"--",
"node",
supportPath("configure-openclaw-media-understanding.mjs"),
"--timeout-ms",
String(timeoutMs)
], { timeoutMs: 15000 });
const startedAtEpochMs = Date.now();
const startedAt = new Date(startedAtEpochMs).toISOString();
const describe = await run("ocm", [
`@${envName}`,
"--",
"capability",
"image",
"describe",
"--file",
imagePath,
"--model",
"openai/gpt-5.5",
"--prompt",
"Reply with exact ASCII text KOVA_AGENT_OK only.",
"--timeout-ms",
String(timeoutMs),
"--json"
], { timeoutMs: maxCommandMs + 3000 });
const finishedAtEpochMs = Date.now();
const status = await run("ocm", [`@${envName}`, "--", "status"], { timeoutMs: 15000 });
const timeoutObserved = describe.status !== 0 &&
describe.timedOut !== true &&
describe.durationMs <= maxCommandMs &&
expectedPattern.test(`${describe.stdout}\n${describe.stderr}`);
const statusWorks = status.status === 0 && status.timedOut !== true;
const ok = configure.status === 0 && timeoutObserved && statusWorks;
const summary = {
schemaVersion: "kova.mediaUnderstandingTimeout.v1",
ok,
env: envName,
imagePath,
timeoutMs,
maxCommandMs,
startedAt,
startedAtEpochMs,
finishedAt: new Date(finishedAtEpochMs).toISOString(),
finishedAtEpochMs,
durationMs: finishedAtEpochMs - startedAtEpochMs,
mediaDescribeMs: describe.durationMs,
mediaTimeoutObserved: timeoutObserved,
mediaCommandTimedOut: describe.timedOut === true,
mediaCommandStatus: describe.status,
mediaStatusAfterTimeoutMs: status.durationMs,
gatewayStatusWorks: statusWorks,
configureStatus: configure.status,
errors: [
...(configure.status === 0 ? [] : [`configure failed: ${snippet(configure.stderr || configure.stdout)}`]),
...(timeoutObserved ? [] : [`media timeout not observed: status=${describe.status} timedOut=${describe.timedOut} duration=${describe.durationMs} stderr=${snippet(describe.stderr)}`]),
...(statusWorks ? [] : [`status after media timeout failed: status=${status.status} stderr=${snippet(status.stderr || status.stdout)}`])
],
commands: {
configure: compactCommand(configure),
describe: compactCommand(describe),
status: compactCommand(status)
}
};
await fs.writeFile(path.join(mediaDir, "summary.json"), `${JSON.stringify(summary, null, 2)}\n`, "utf8");
process.stdout.write(`${JSON.stringify(summary)}\n`);
process.exit(ok ? 0 : 1);
function run(command, args, options = {}) {
const started = Date.now();
return new Promise((resolve) => {
const child = spawn(command, args, {
env: process.env,
shell: false,
stdio: ["ignore", "pipe", "pipe"]
});
let stdout = "";
let stderr = "";
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
child.kill("SIGTERM");
setTimeout(() => child.kill("SIGKILL"), 3000).unref();
}, options.timeoutMs ?? 30000);
child.stdout.on("data", (chunk) => {
stdout += chunk.toString();
});
child.stderr.on("data", (chunk) => {
stderr += chunk.toString();
});
child.on("error", (error) => {
clearTimeout(timer);
resolve({
command: [command, ...args].join(" "),
status: 127,
signal: null,
timedOut,
durationMs: Date.now() - started,
stdout: "",
stderr: error.message
});
});
child.on("close", (status, signal) => {
clearTimeout(timer);
resolve({
command: [command, ...args].join(" "),
status: timedOut ? 124 : (status ?? 1),
signal,
timedOut,
durationMs: Date.now() - started,
stdout: truncate(stdout),
stderr: truncate(stderr)
});
});
});
}
function compactCommand(result) {
return {
command: result.command,
status: result.status,
signal: result.signal,
timedOut: result.timedOut,
durationMs: result.durationMs,
stdout: truncate(result.stdout, 1200),
stderr: truncate(result.stderr, 1200)
};
}
function supportPath(file) {
return path.join(path.dirname(fileURLToPath(import.meta.url)), file);
}
function parseArgs(args) {
const parsed = {};
for (let index = 0; index < args.length; index += 1) {
const arg = args[index];
if (!arg.startsWith("--")) {
failUsage(`unexpected positional argument ${JSON.stringify(arg)}`);
}
const key = arg.slice(2).replaceAll("-", "_");
const value = args[index + 1];
if (value === undefined || value.startsWith("--")) {
failUsage(`${arg} requires a value`);
}
parsed[key] = value;
index += 1;
}
return {
env: parsed.env,
artifactDir: parsed.artifact_dir,
timeoutMs: parsed.timeout_ms,
maxCommandMs: parsed.max_command_ms
};
}
function requiredString(value, flag) {
if (typeof value !== "string" || value.length === 0) {
failUsage(`${flag} is required`);
}
return value;
}
function positiveInteger(value, flag) {
const number = Number(value);
if (!Number.isInteger(number) || number <= 0) {
failUsage(`${flag} must be a positive integer`);
}
return number;
}
function truncate(value, limit = 4000) {
const text = String(value ?? "");
if (text.length <= limit) {
return text;
}
return `${text.slice(0, limit)}\n[truncated ${text.length - limit} chars]`;
}
function snippet(value) {
return truncate(String(value ?? "").replace(/\s+/g, " ").trim(), 500);
}
function failUsage(message) {
process.stderr.write(`${message}\n`);
process.stderr.write("usage: media-understanding-timeout.mjs --env <kova-env> --artifact-dir <dir> [--timeout-ms <ms>] [--max-command-ms <ms>]\n");
process.exit(2);
}

View File

@ -0,0 +1,33 @@
{
"id": "media-understanding",
"title": "Media Understanding",
"ownerArea": "media-understanding",
"description": "Exercise OpenClaw media understanding through the packaged capability CLI and verify provider timeouts do not stall the gateway or command path.",
"requiredStates": ["fresh"],
"targetKinds": ["npm", "channel", "runtime", "local-build"],
"requiredMetrics": [
"mediaDescribeMs",
"mediaTimeoutObserved",
"mediaStatusAfterTimeoutMs",
"mediaGatewayStatusWorks",
"providerRequestCountMin",
"peakRssMb"
],
"processRoles": ["gateway", "command-tree", "mock-provider"],
"thresholds": {
"mediaDescribeMs": 10000,
"mediaTimeoutObserved": 1,
"mediaStatusAfterTimeoutMs": 10000,
"providerRequestCountMin": 1,
"peakRssMb": 900
},
"roleThresholds": {
"gateway": { "peakRssMb": 800, "maxCpuPercent": 250 },
"command-tree": { "peakRssMb": 900, "maxCpuPercent": 300 },
"mock-provider": { "peakRssMb": 300, "maxCpuPercent": 150 }
},
"diagnostics": {
"timelineRequiredForSourceBuild": true,
"expectedSpans": ["media-understanding.image", "provider.request"]
}
}