feat: harden diagnostic timeline collectors

This commit is contained in:
Shakker 2026-04-30 10:11:04 +01:00
parent aea91cfb02
commit f504e84b61
No known key found for this signature in database
10 changed files with 552 additions and 5 deletions

View File

@ -164,8 +164,8 @@ Current metrics include:
functions, slowest OpenClaw span, event-loop delay, runtime dependency
staging, and provider/model timing
- OpenClaw diagnostics timeline availability, event count, parse errors,
slowest spans, repeated spans, event-loop max, provider request max, and child
process failures
slowest spans, repeated spans, open spans, key span summaries, event-loop max,
provider request max, and child process failures
- runtime dependency staging grouped by bundled plugin when OpenClaw emits
`runtimeDeps.stage` spans with `pluginId` attributes
@ -179,6 +179,27 @@ JSONL timeline under the run artifacts and summarizes it in `metrics.timeline`.
If OpenClaw does not emit it, the collector reports `INFO` and the scenario can
still complete.
Diagnostic source-build runs can make the timeline mandatory through the active
profile. In that mode, missing timeline evidence fails the scenario because Kova
cannot inspect OpenClaw internals. NPM/release runs keep missing timelines as
informational unless the active profile explicitly requires them.
Timeline-derived measurements include:
- `openclawOpenSpanCount`: number of `span.start` events without a matching
`span.end` or `span.error`
- `openclawOpenRequiredSpanCount`: open spans that match required diagnostics
for the surface/profile
- `openclawOpenSpans`: compact open-span evidence with name, age, phase,
span id, parent span id, plugin id, provider, and operation when available
- `openclawKeySpans`: compact summaries for OpenClaw's required operational
spans: `gateway.startup`, `gateway.ready`, `config.normalize`,
`plugins.metadata.scan`, `runtimeDeps.stage`, `providers.load`,
`models.catalog`, `agent.turn`, and `agent.cleanup`
Open required spans are failures for diagnostic source-build runs because they
usually mean OpenClaw started a critical operation and never reported completion.
## Run Receipt
`kova run --json` prints a receipt instead of text paths:

View File

@ -0,0 +1,4 @@
{"schemaVersion":"openclaw.diagnostics.v1","type":"span.start","timestamp":"2026-04-29T15:30:00.000Z","runId":"kova-fixture","envName":"kova-fixture-env","pid":100,"phase":"startup","name":"gateway.startup","spanId":"1"}
{"schemaVersion":"openclaw.diagnostics.v1","type":"span.end","timestamp":"2026-04-29T15:30:02.000Z","runId":"kova-fixture","envName":"kova-fixture-env","pid":100,"phase":"startup","name":"gateway.startup","spanId":"1","durationMs":2000}
{"schemaVersion":"openclaw.diagnostics.v1","type":"span.start","timestamp":"2026-04-29T15:30:03.000Z","runId":"kova-fixture","envName":"kova-fixture-env","pid":100,"phase":"startup","name":"runtimeDeps.stage","spanId":"2","attributes":{"pluginId":"browser"}}
{"schemaVersion":"openclaw.diagnostics.v1","type":"eventLoop.sample","timestamp":"2026-04-29T15:30:08.000Z","name":"eventLoop","p95Ms":120,"maxMs":700,"activeSpanName":"runtimeDeps.stage"}

71
profiles/diagnostic.json Normal file
View File

@ -0,0 +1,71 @@
{
"id": "diagnostic",
"title": "Source-Built Diagnostics",
"objective": "Run release-shaped local OpenClaw builds with timeline diagnostics enabled so Kova can attribute startup, plugin, provider, agent, event-loop, CPU, and heap behavior to OpenClaw phases.",
"targetKinds": ["local-build"],
"diagnostics": {
"timelineRequired": true,
"timelineRequiredForTargetKinds": ["local-build"],
"requiredKeySpans": [
"gateway.startup",
"gateway.ready",
"config.normalize",
"plugins.metadata.scan",
"runtimeDeps.stage",
"providers.load",
"models.catalog",
"agent.turn",
"agent.cleanup"
]
},
"gate": {
"id": "openclaw-diagnostic",
"coverage": {
"surfaces": {
"blocking": ["release-runtime-startup", "gateway-performance", "bundled-runtime-deps", "agent-message"]
},
"states": {
"blocking": ["fresh", "missing-plugin-index", "many-bundled-plugins", "mock-openai-provider"]
},
"stateSurfaces": {
"blocking": [
"release-runtime-startup:fresh",
"gateway-performance:many-bundled-plugins",
"bundled-runtime-deps:missing-plugin-index",
"agent-message:mock-openai-provider"
]
},
"scenarios": {
"blocking": ["release-runtime-startup", "gateway-performance", "bundled-runtime-deps", "agent-message-latency"]
}
},
"blocking": [
{ "scenario": "release-runtime-startup", "state": "fresh" },
{ "scenario": "gateway-performance", "state": "many-bundled-plugins" },
{ "scenario": "bundled-runtime-deps", "state": "missing-plugin-index" },
{ "scenario": "agent-message-latency", "state": "mock-openai-provider" }
]
},
"entries": [
{
"scenario": "release-runtime-startup",
"state": "fresh",
"timeoutMs": 180000
},
{
"scenario": "gateway-performance",
"state": "many-bundled-plugins",
"timeoutMs": 180000
},
{
"scenario": "bundled-runtime-deps",
"state": "missing-plugin-index",
"timeoutMs": 180000
},
{
"scenario": "agent-message-latency",
"state": "mock-openai-provider",
"timeoutMs": 240000
}
]
}

View File

@ -3,6 +3,17 @@ import { join } from "node:path";
const SCHEMA_VERSION = "openclaw.diagnostics.v1";
export const TIMELINE_COLLECTOR_SCHEMA = "kova.timelineCollector.v1";
export const KEY_OPENCLAW_SPANS = [
"gateway.startup",
"gateway.ready",
"config.normalize",
"plugins.metadata.scan",
"runtimeDeps.stage",
"providers.load",
"models.catalog",
"agent.turn",
"agent.cleanup"
];
export async function collectTimelineMetrics(artifactDir) {
const startedAt = Date.now();
@ -35,6 +46,8 @@ export async function collectTimelineMetrics(artifactDir) {
slowestSpans: timeline.slowestSpans,
spanTotals: timeline.spanTotals,
repeatedSpans: timeline.repeatedSpans,
openSpans: timeline.openSpans,
keySpans: timeline.keySpans,
runtimeDeps: timeline.runtimeDeps,
eventLoop: timeline.eventLoop,
providers: timeline.providers,
@ -86,11 +99,13 @@ export function parseTimelineText(text) {
}
export function summarizeTimeline(events, parseErrors = []) {
const spanStarts = events.filter((event) => event.type === "span.start");
const spanEvents = events.filter((event) => event.type === "span.end" || event.type === "span.error");
const eventLoopSamples = events.filter((event) => event.type === "eventLoop.sample");
const providerRequests = events.filter((event) => event.type === "provider.request");
const childProcesses = events.filter((event) => event.type === "childProcess.exit");
const spanTotals = summarizeSpans(spanEvents);
const openSpans = summarizeOpenSpans({ starts: spanStarts, terminals: spanEvents, events });
const runtimeDeps = summarizeRuntimeDeps(spanEvents);
const slowestSpans = spanEvents
.filter((event) => typeof event.durationMs === "number")
@ -104,6 +119,7 @@ export function summarizeTimeline(events, parseErrors = []) {
eventCount: events.length,
parseErrorCount: parseErrors.length,
parseErrors: parseErrors.slice(0, 20),
spanStartCount: spanStarts.length,
spanCount: spanEvents.length,
slowestSpans,
spanTotals,
@ -111,6 +127,9 @@ export function summarizeTimeline(events, parseErrors = []) {
.filter((span) => span.count > 1)
.toSorted((left, right) => (right.totalDurationMs - left.totalDurationMs) || (right.count - left.count))
.slice(0, 10),
openSpanCount: openSpans.length,
openSpans,
keySpans: summarizeKeySpans({ spanEvents, openSpans }),
runtimeDeps,
eventLoop: summarizeEventLoop(eventLoopSamples),
providers: summarizeTimedCollection(providerRequests),
@ -130,6 +149,9 @@ function emptyTimeline(extra = {}) {
slowestSpans: [],
spanTotals: {},
repeatedSpans: [],
openSpanCount: 0,
openSpans: [],
keySpans: emptyKeySpans(),
runtimeDeps: {
count: 0,
totalDurationMs: 0,
@ -254,6 +276,75 @@ function summarizeRuntimeDeps(events) {
};
}
function summarizeOpenSpans({ starts, terminals, events }) {
const terminalKeys = new Set(terminals.map(spanIdentity).filter(Boolean));
const terminalNames = countNames(terminals);
const latestTimestamp = latestEventTimestamp(events);
const open = [];
for (const start of starts) {
const key = spanIdentity(start);
if (key && terminalKeys.has(key)) {
continue;
}
if (!key && (terminalNames.get(start.name) ?? 0) > 0) {
terminalNames.set(start.name, terminalNames.get(start.name) - 1);
continue;
}
open.push({
type: start.type,
name: start.name,
spanId: start.spanId ?? null,
parentSpanId: start.parentSpanId ?? null,
timestamp: start.timestamp ?? null,
ageMs: spanAgeMs(start, latestTimestamp),
phase: start.phase ?? null,
provider: start.provider ?? start.attributes?.provider ?? null,
operation: start.operation ?? start.attributes?.operation ?? null,
pluginId: start.pluginId ?? start.attributes?.pluginId ?? null
});
}
return open.toSorted((left, right) => (right.ageMs ?? -1) - (left.ageMs ?? -1)).slice(0, 25);
}
function summarizeKeySpans({ spanEvents, openSpans }) {
const byName = {};
for (const name of KEY_OPENCLAW_SPANS) {
const spans = spanEvents.filter((event) => event.name === name);
const open = openSpans.filter((event) => event.name === name);
const durations = spans.map((event) => event.durationMs).filter(isNumber);
const slowest = spans
.filter((event) => typeof event.durationMs === "number")
.toSorted((left, right) => right.durationMs - left.durationMs)
.at(0);
byName[name] = {
name,
count: spans.length,
errorCount: spans.filter((event) => event.type === "span.error").length,
openCount: open.length,
totalDurationMs: round(durations.reduce((total, value) => total + value, 0)),
maxDurationMs: maxOrNull(durations),
slowest: slowest ? compactTimedEvent(slowest) : null,
open: open.slice(0, 5)
};
}
return byName;
}
function emptyKeySpans() {
return Object.fromEntries(KEY_OPENCLAW_SPANS.map((name) => [name, {
name,
count: 0,
errorCount: 0,
openCount: 0,
totalDurationMs: 0,
maxDurationMs: null,
slowest: null,
open: []
}]));
}
function summarizeEventLoop(samples) {
const p95Values = samples.map((sample) => numberOrNull(sample.p95Ms)).filter(isNumber);
const p99Values = samples.map((sample) => numberOrNull(sample.p99Ms)).filter(isNumber);
@ -303,6 +394,8 @@ function compactTimedEvent(event) {
return {
type: event.type,
name: event.name,
spanId: event.spanId ?? null,
parentSpanId: event.parentSpanId ?? null,
durationMs: event.durationMs ?? null,
timestamp: event.timestamp ?? null,
phase: event.phase ?? null,
@ -316,6 +409,36 @@ function compactTimedEvent(event) {
};
}
function spanIdentity(event) {
if (event.spanId !== undefined && event.spanId !== null && String(event.spanId).length > 0) {
return `id:${event.spanId}`;
}
return null;
}
function countNames(events) {
const counts = new Map();
for (const event of events) {
counts.set(event.name, (counts.get(event.name) ?? 0) + 1);
}
return counts;
}
function latestEventTimestamp(events) {
const times = events
.map((event) => Date.parse(event.timestamp ?? ""))
.filter((time) => Number.isFinite(time));
return times.length === 0 ? null : Math.max(...times);
}
function spanAgeMs(event, latestTimestamp) {
const start = Date.parse(event.timestamp ?? "");
if (!Number.isFinite(start) || latestTimestamp === null || latestTimestamp < start) {
return null;
}
return latestTimestamp - start;
}
function maxOrNull(values) {
return values.length === 0 ? null : Math.max(...values);
}

View File

@ -31,6 +31,9 @@ export function evaluateRecord(record, scenario, options = {}) {
const diagnosticReportBytes = countDiagnosticReportMetric(record, "artifactBytes");
const openclawDiagnostics = collectOpenClawDiagnostics(record);
const timelineSummary = collectTimelineSummary(record);
const timelineRequirement = timelineRequirementFor(options);
const requiredOpenSpans = requiredTimelineSpans(options);
const openRequiredSpans = timelineSummary.openSpans.filter((span) => requiredOpenSpans.has(span.name));
const runtimeDepsStagingMs = maxNullable(openclawDiagnostics.runtimeDepsStagingMs, timelineSummary.runtimeDepsStageMaxMs);
const eventLoopDelayMs = maxNullable(openclawDiagnostics.eventLoopDelayMs, timelineSummary.eventLoopMaxMs);
const providerModelTimingMs = maxNullable(openclawDiagnostics.providerModelTimingMs, timelineSummary.providerRequestMaxMs);
@ -239,6 +242,16 @@ export function evaluateRecord(record, scenario, options = {}) {
}
const allowedTimelineParseErrors = typeof thresholds.openclawTimelineParseErrors === "number" ? thresholds.openclawTimelineParseErrors : 0;
if (timelineRequirement.required && !timelineSummary.available) {
violations.push({
kind: "diagnostics",
metric: "openclawTimelineAvailable",
expected: "available",
actual: false,
message: `OpenClaw diagnostics timeline was required for ${timelineRequirement.reason} but was not emitted`
});
}
if (timelineSummary.available && timelineSummary.parseErrorCount > allowedTimelineParseErrors) {
violations.push({
kind: "diagnostics",
@ -249,6 +262,17 @@ export function evaluateRecord(record, scenario, options = {}) {
});
}
if (openRequiredSpans.length > 0) {
const slowestOpen = openRequiredSpans[0];
violations.push({
kind: "diagnostics",
metric: "openclawOpenRequiredSpanCount",
expected: "0",
actual: openRequiredSpans.length,
message: `${openRequiredSpans.length} required OpenClaw diagnostics span(s) were left open; slowest ${slowestOpen.name}${slowestOpen.ageMs !== null ? ` age ${slowestOpen.ageMs}ms` : ""}`
});
}
if (agentResponseOk === false) {
violations.push({
kind: "agent",
@ -327,6 +351,10 @@ export function evaluateRecord(record, scenario, options = {}) {
openclawSlowestSpanName: timelineSummary.slowestSpanName,
openclawSlowestSpanMs: timelineSummary.slowestSpanMs,
openclawRepeatedSpanCount: timelineSummary.repeatedSpanCount,
openclawOpenSpanCount: timelineSummary.openSpanCount,
openclawOpenRequiredSpanCount: openRequiredSpans.length,
openclawOpenSpans: timelineSummary.openSpans,
openclawKeySpans: timelineSummary.keySpans,
openclawEventLoopMaxMs: timelineSummary.eventLoopMaxMs,
openclawProviderRequestMaxMs: timelineSummary.providerRequestMaxMs,
openclawChildProcessFailedCount: timelineSummary.childProcessFailedCount,
@ -359,6 +387,32 @@ export function evaluateRecord(record, scenario, options = {}) {
return record;
}
function timelineRequirementFor(options) {
const targetKind = options.targetPlan?.kind ?? null;
const profileDiagnostics = options.profile?.diagnostics ?? {};
const requiredForTargetKinds = profileDiagnostics.timelineRequiredForTargetKinds ?? [];
if (profileDiagnostics.timelineRequired === true && (requiredForTargetKinds.length === 0 || requiredForTargetKinds.includes(targetKind))) {
return {
required: true,
reason: `profile '${options.profile?.id ?? "unknown"}' on target kind '${targetKind ?? "unknown"}'`
};
}
if (options.surface?.diagnostics?.timelineRequiredForSourceBuild === true && targetKind === "local-build" && profileDiagnostics.timelineRequired === true) {
return {
required: true,
reason: `surface '${options.surface.id}' source-build diagnostics`
};
}
return { required: false, reason: null };
}
function requiredTimelineSpans(options) {
return new Set([
...(options.surface?.diagnostics?.expectedSpans ?? []),
...(options.profile?.diagnostics?.requiredKeySpans ?? [])
]);
}
function maxDurationWhere(results, predicate) {
const durations = results
.filter((result) => predicate(result.command))
@ -756,12 +810,18 @@ function collectTimelineSummary(record) {
let repeatedSpanCount = 0;
let runtimeDepsStageMaxMs = null;
let slowestRuntimeDepsPlugin = null;
let openSpanCount = 0;
let openSpans = [];
const keySpans = {};
for (const timeline of timelines) {
eventCount = Math.max(eventCount, timeline.eventCount ?? 0);
parseErrorCount = Math.max(parseErrorCount, timeline.parseErrorCount ?? 0);
childProcessFailedCount = Math.max(childProcessFailedCount, timeline.childProcesses?.failedCount ?? 0);
repeatedSpanCount = Math.max(repeatedSpanCount, timeline.repeatedSpans?.length ?? 0);
openSpanCount = Math.max(openSpanCount, timeline.openSpanCount ?? timeline.openSpans?.length ?? 0);
openSpans = mergeOpenSpans(openSpans, timeline.openSpans ?? []);
mergeKeySpans(keySpans, timeline.keySpans ?? {});
eventLoopMaxMs = maxNullable(eventLoopMaxMs, timeline.eventLoop?.maxMs);
providerRequestMaxMs = maxNullable(providerRequestMaxMs, timeline.providers?.maxDurationMs);
runtimeDepsStageMaxMs = maxNullable(
@ -791,6 +851,9 @@ function collectTimelineSummary(record) {
slowestSpanName: slowestSpan?.name ?? null,
slowestSpanMs: slowestSpan?.durationMs ?? null,
repeatedSpanCount,
openSpanCount,
openSpans,
keySpans,
eventLoopMaxMs,
providerRequestMaxMs,
childProcessFailedCount,
@ -799,6 +862,38 @@ function collectTimelineSummary(record) {
};
}
function mergeOpenSpans(current, candidate) {
return [...current, ...candidate]
.toSorted((left, right) => (right.ageMs ?? -1) - (left.ageMs ?? -1))
.slice(0, 25);
}
function mergeKeySpans(target, source) {
for (const [name, summary] of Object.entries(source)) {
const existing = target[name] ?? {
name,
count: 0,
errorCount: 0,
openCount: 0,
totalDurationMs: 0,
maxDurationMs: null,
slowest: null,
open: []
};
existing.count += summary.count ?? 0;
existing.errorCount += summary.errorCount ?? 0;
existing.openCount += summary.openCount ?? 0;
existing.totalDurationMs = roundNumber(existing.totalDurationMs + (summary.totalDurationMs ?? 0));
existing.maxDurationMs = maxNullable(existing.maxDurationMs, summary.maxDurationMs);
if (summary.slowest?.durationMs !== undefined &&
(!existing.slowest || summary.slowest.durationMs > existing.slowest.durationMs)) {
existing.slowest = summary.slowest;
}
existing.open = mergeOpenSpans(existing.open, summary.open ?? []).slice(0, 5);
target[name] = existing;
}
}
function collectCpuPercentMax(record) {
const values = [];
for (const phase of record.phases ?? []) {
@ -823,6 +918,10 @@ function maxNullable(left, right) {
return left === null ? right : Math.max(left, right);
}
function roundNumber(value) {
return Math.round(value * 100) / 100;
}
function mergeRoles(left, right) {
const roles = new Set(`${left ?? ""},${right ?? ""}`.split(",").filter(Boolean));
return [...roles].join(",");
@ -962,6 +1061,15 @@ function buildDiagnosticCorrelation({
durationMs: timelineSummary.slowestSpanMs
});
}
if (timelineSummary.openSpans.length > 0) {
const span = timelineSummary.openSpans[0];
findings.push({
kind: "openclaw-open-span",
summary: `Open OpenClaw span: ${span.name}${span.ageMs !== null ? ` age ${span.ageMs}ms` : ""}`,
span: span.name,
ageMs: span.ageMs
});
}
if (eventLoopDelayMs !== null) {
findings.push({
kind: "event-loop",

View File

@ -154,6 +154,7 @@ async function matrixCommand(flags) {
const profile = await loadProfile(required(flags.profile, "--profile"));
const target = required(flags.target, "--target");
const targetPlan = resolveTarget(target, "target");
validateProfileTarget(profile, targetPlan);
if (flags.from) {
resolveTarget(flags.from, "from");
}
@ -268,6 +269,7 @@ async function matrixRun(flags) {
const profile = await loadProfile(required(flags.profile, "--profile"));
const target = required(flags.target, "--target");
const targetPlan = resolveTarget(target, "target");
validateProfileTarget(profile, targetPlan);
const fromPlan = flags.from ? resolveTarget(flags.from, "from") : null;
const entries = applyMatrixControls(await expandProfile(profile), flags, platformInfo());
const controls = matrixControlSummary(flags, targetPlan);
@ -284,6 +286,7 @@ async function matrixRun(flags) {
const context = {
target,
targetPlan,
profile,
from: flags.from,
fromPlan,
state: entry.state,
@ -606,6 +609,8 @@ function profileSummary(profile) {
title: profile.title,
objective: profile.objective,
entryCount: profile.entries.length,
targetKinds: profile.targetKinds ?? null,
diagnostics: profile.diagnostics ?? null,
gate: profile.gate ? {
id: profile.gate.id ?? `${profile.id}-gate`,
blockingCount: Array.isArray(profile.gate.blocking) ? profile.gate.blocking.length : profile.entries.length,
@ -614,6 +619,16 @@ function profileSummary(profile) {
};
}
function validateProfileTarget(profile, targetPlan) {
const targetKinds = profile.targetKinds ?? [];
if (targetKinds.length === 0) {
return;
}
if (!targetKinds.includes(targetPlan.kind)) {
throw new Error(`profile '${profile.id}' requires target kind ${targetKinds.join(", ")}, got ${targetPlan.kind}`);
}
}
async function cleanupCommand(flags) {
const [subcommand] = flags._;
if (subcommand !== "envs") {

View File

@ -22,6 +22,8 @@ export function validateProfileShape(profile, sourceName = "profile") {
requireString(profile, "title", errors);
requireString(profile, "objective", errors);
requireArray(profile, "entries", errors);
validateStringArray(profile.targetKinds, "targetKinds", errors, { optional: true });
validateDiagnostics(profile.diagnostics, "diagnostics", errors);
validateEntries(profile.entries, errors);
if (profile.gate !== undefined) {
@ -31,6 +33,21 @@ export function validateProfileShape(profile, sourceName = "profile") {
assertNoShapeErrors(errors, sourceName);
}
function validateDiagnostics(diagnostics, prefix, errors) {
if (diagnostics === undefined) {
return;
}
if (!diagnostics || typeof diagnostics !== "object" || Array.isArray(diagnostics)) {
errors.push(`${prefix} must be an object when set`);
return;
}
if (diagnostics.timelineRequired !== undefined && typeof diagnostics.timelineRequired !== "boolean") {
errors.push(`${prefix}.timelineRequired must be boolean when set`);
}
validateStringArray(diagnostics.timelineRequiredForTargetKinds, `${prefix}.timelineRequiredForTargetKinds`, errors, { optional: true });
validateStringArray(diagnostics.requiredKeySpans, `${prefix}.requiredKeySpans`, errors, { optional: true });
}
function validateEntries(entries, errors) {
if (!Array.isArray(entries)) {
return;

View File

@ -103,6 +103,19 @@ export function renderMarkdownReport(report) {
lines.push(`- Event-loop delay mentions: ${record.measurements.eventLoopDelayMentions ?? "unknown"}`);
lines.push(`- OpenClaw timeline: ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"} (${record.measurements.openclawTimelineEventCount ?? 0} events, ${record.measurements.openclawTimelineParseErrors ?? 0} parse errors)`);
lines.push(`- Slowest OpenClaw span: ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"} ms`);
lines.push(`- Open OpenClaw spans: ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required)`);
if (record.measurements.openclawOpenSpans?.length > 0) {
const span = record.measurements.openclawOpenSpans[0];
lines.push(`- Slowest open span: ${span.name}${span.ageMs !== null ? ` ${span.ageMs} ms` : ""}`);
}
if (record.measurements.openclawKeySpans) {
const keySpanText = compactKeySpans(record.measurements.openclawKeySpans).slice(0, 5)
.map((span) => `${span.name} max ${span.maxDurationMs ?? "?"}ms open ${span.openCount ?? 0}`)
.join("; ");
if (keySpanText) {
lines.push(`- Key OpenClaw spans: ${keySpanText}`);
}
}
lines.push(`- OpenClaw event-loop max: ${record.measurements.openclawEventLoopMaxMs ?? "unknown"} ms`);
lines.push(`- OpenClaw provider request max: ${record.measurements.openclawProviderRequestMaxMs ?? "unknown"} ms`);
lines.push(`- Structured event-loop delay: ${record.measurements.eventLoopDelayMs ?? "unknown"} ms`);
@ -462,6 +475,10 @@ function summarizeMeasurements(measurements) {
openclawTimelineAvailable: measurements.openclawTimelineAvailable ?? null,
openclawSlowestSpanName: measurements.openclawSlowestSpanName ?? null,
openclawSlowestSpanMs: measurements.openclawSlowestSpanMs ?? null,
openclawOpenSpanCount: measurements.openclawOpenSpanCount ?? null,
openclawOpenRequiredSpanCount: measurements.openclawOpenRequiredSpanCount ?? null,
openclawOpenSpans: measurements.openclawOpenSpans ?? null,
openclawKeySpans: measurements.openclawKeySpans ?? null,
nodeCpuProfileCount: measurements.nodeCpuProfileCount ?? null,
nodeHeapProfileCount: measurements.nodeHeapProfileCount ?? null,
nodeTraceEventCount: measurements.nodeTraceEventCount ?? null,
@ -540,7 +557,7 @@ export function renderPasteSummary(report) {
const roleText = compactRolePeaks(record.measurements).slice(0, 4)
.map((role) => `${role.role} ${role.peakRssMb ?? "?"}MB/${role.maxCpuPercent ?? "?"}%`)
.join(", ") || "unknown";
lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}.`);
lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}.`);
}
} else if (record.violations?.length > 0) {
lines.push("Violations:");
@ -635,6 +652,10 @@ function briefEvidence(measurements, violations) {
if (measurements.pluginLoadFailures !== null && measurements.pluginLoadFailures !== undefined) {
items.push(`pluginLoadFailures: ${measurements.pluginLoadFailures}`);
}
if (measurements.openclawOpenRequiredSpanCount > 0) {
const span = measurements.openclawOpenSpans?.[0];
items.push(`openRequiredSpans: ${measurements.openclawOpenRequiredSpanCount}${span ? `, slowest ${span.name}` : ""}`);
}
for (const finding of measurements.diagnosticCorrelation?.findings?.slice(0, 3) ?? []) {
items.push(finding.summary);
}
@ -646,6 +667,12 @@ function briefEvidence(measurements, violations) {
return items.slice(0, 8);
}
function compactKeySpans(keySpans) {
return Object.values(keySpans ?? {})
.filter((span) => (span.count ?? 0) > 0 || (span.openCount ?? 0) > 0)
.toSorted((left, right) => (right.maxDurationMs ?? 0) - (left.maxDurationMs ?? 0) || (right.openCount ?? 0) - (left.openCount ?? 0));
}
function compactRolePeaks(measurements) {
const byRole = new Map();
for (const role of measurements?.resourceTopRolesByRss ?? []) {

View File

@ -512,7 +512,9 @@ function runScenarioCommand(command, context, envName, artifactDir, phaseId, com
function evaluatorContext(context, scenario) {
return {
surface: context.surfacesById?.[scenario.surface] ?? null
surface: context.surfacesById?.[scenario.surface] ?? null,
targetPlan: context.targetPlan ?? null,
profile: context.profile ?? null
};
}

View File

@ -11,7 +11,7 @@ import { validateStateShape } from "./registries/states.mjs";
import { validateRegistryReferences } from "./registries/validate.mjs";
import { assertSafeScenarioCommand } from "./safety.mjs";
import { parseTimelineText } from "./collectors/timeline.mjs";
import { renderReportSummary } from "./report.mjs";
import { renderPasteSummary, renderReportSummary } from "./report.mjs";
export async function runSelfCheck(flags = {}) {
const checks = [];
@ -51,6 +51,17 @@ export async function runSelfCheck(flags = {}) {
assertEqual(data.entries.length, 1, "matrix include filter count");
assertEqual(data.controls?.requestedParallel, 2, "matrix requested parallel");
}));
checks.push(await jsonCommandCheck("diagnostic-profile-plan-json", "node bin/kova.mjs matrix plan --profile diagnostic --target local-build:/tmp/openclaw --include scenario:release-runtime-startup --json", (data) => {
assertEqual(data.schemaVersion, "kova.matrix.plan.v1", "diagnostic matrix plan schema");
assertEqual(data.profile?.id, "diagnostic", "diagnostic profile id");
assertEqual(data.profile?.diagnostics?.timelineRequired, true, "diagnostic timeline required");
assertArrayNotEmpty(data.entries, "diagnostic entries");
}));
checks.push(await failingCommandCheck(
"diagnostic-profile-rejects-non-local-build",
"node bin/kova.mjs matrix plan --profile diagnostic --target runtime:stable --json",
"profile 'diagnostic' requires target kind local-build"
));
checks.push(await failingCommandCheck(
"invalid-parallel-rejected",
"node bin/kova.mjs matrix plan --profile smoke --target runtime:stable --parallel nope --json",
@ -67,6 +78,8 @@ export async function runSelfCheck(flags = {}) {
assertArray(data.envs, "cleanup envs");
}));
checks.push(await diagnosticsTimelineCheck());
checks.push(await diagnosticsOpenSpanCheck());
checks.push(diagnosticsTimelineEvaluationCheck());
checks.push(readinessClassificationCheck());
checks.push(await resourceRoleAttributionCheck(tmp));
checks.push(roleThresholdEvaluationCheck());
@ -504,6 +517,7 @@ async function diagnosticsTimelineCheck() {
assertEqual(timeline.eventLoop.maxMs, 214, "event loop max");
assertEqual(timeline.providers.maxDurationMs, 1220, "provider duration");
assertEqual(timeline.childProcesses.failedCount, 1, "child process failures");
assertEqual(timeline.keySpans["gateway.startup"].maxDurationMs, 2450, "gateway startup key span");
return {
id: "diagnostics-timeline-parser",
status: "PASS",
@ -521,6 +535,151 @@ async function diagnosticsTimelineCheck() {
}
}
async function diagnosticsOpenSpanCheck() {
try {
const text = await readFile("fixtures/diagnostics/timeline-open-span.jsonl", "utf8");
const timeline = parseTimelineText(text);
assertEqual(timeline.available, true, "open timeline available");
assertEqual(timeline.openSpanCount, 1, "open span count");
assertEqual(timeline.openSpans[0]?.name, "runtimeDeps.stage", "open span name");
assertEqual(timeline.openSpans[0]?.ageMs, 5000, "open span age");
assertEqual(timeline.keySpans["runtimeDeps.stage"].openCount, 1, "key open span count");
return {
id: "diagnostics-open-span-parser",
status: "PASS",
command: "parse fixtures/diagnostics/timeline-open-span.jsonl",
durationMs: 0
};
} catch (error) {
return {
id: "diagnostics-open-span-parser",
status: "FAIL",
command: "parse fixtures/diagnostics/timeline-open-span.jsonl",
durationMs: 0,
message: error.message
};
}
}
function diagnosticsTimelineEvaluationCheck() {
try {
const missingTimelineRecord = {
scenario: "diagnostic-missing-timeline",
status: "PASS",
phases: [],
finalMetrics: {
service: { gatewayState: "running" },
logs: zeroLogMetrics(),
timeline: {
available: false,
eventCount: 0,
parseErrorCount: 0,
openSpanCount: 0,
openSpans: [],
keySpans: {},
runtimeDeps: {},
eventLoop: {},
providers: {},
childProcesses: {}
}
}
};
evaluateRecord(missingTimelineRecord, { thresholds: {} }, {
targetPlan: { kind: "local-build" },
profile: {
id: "diagnostic",
diagnostics: {
timelineRequired: true,
timelineRequiredForTargetKinds: ["local-build"]
}
},
surface: {
id: "release-runtime-startup",
diagnostics: { expectedSpans: ["runtimeDeps.stage"] },
thresholds: {}
}
});
assertEqual(missingTimelineRecord.status, "FAIL", "missing diagnostic timeline status");
assertEqual(
missingTimelineRecord.violations.some((violation) => violation.metric === "openclawTimelineAvailable"),
true,
"missing diagnostic timeline violation"
);
const openSpanRecord = {
scenario: "diagnostic-open-span",
status: "PASS",
phases: [],
finalMetrics: {
service: { gatewayState: "running" },
logs: zeroLogMetrics(),
timeline: parseTimelineText([
"{\"type\":\"span.start\",\"timestamp\":\"2026-04-29T15:30:00.000Z\",\"name\":\"runtimeDeps.stage\",\"spanId\":\"1\"}",
"{\"type\":\"eventLoop.sample\",\"timestamp\":\"2026-04-29T15:30:06.000Z\",\"name\":\"eventLoop\",\"maxMs\":400}"
].join("\n"))
}
};
evaluateRecord(openSpanRecord, { thresholds: {} }, {
targetPlan: { kind: "local-build" },
profile: { id: "diagnostic", diagnostics: { timelineRequired: true } },
surface: {
id: "bundled-runtime-deps",
diagnostics: { expectedSpans: ["runtimeDeps.stage"] },
thresholds: {}
}
});
assertEqual(openSpanRecord.status, "FAIL", "open required span status");
assertEqual(openSpanRecord.measurements.openclawOpenRequiredSpanCount, 1, "open required span measurement");
assertEqual(
openSpanRecord.violations.some((violation) => violation.metric === "openclawOpenRequiredSpanCount"),
true,
"open required span violation"
);
const reportSummary = renderReportSummary({
schemaVersion: "kova.report.v1",
generatedAt: "2026-04-29T15:30:10.000Z",
runId: "self-check-diagnostics",
summary: { total: 1, statuses: { FAIL: 1 } },
records: [openSpanRecord]
}, { structured: true });
assertEqual(
reportSummary.scenarios[0]?.measurements?.openclawOpenRequiredSpanCount,
1,
"structured report open span evidence"
);
assertEqual(
reportSummary.scenarios[0]?.measurements?.openclawOpenSpans?.[0]?.name,
"runtimeDeps.stage",
"structured report open span name"
);
assertEqual(
renderPasteSummary({
runId: "self-check-diagnostics",
target: "local-build:/tmp/openclaw",
mode: "self-check",
records: [openSpanRecord]
}).includes("openRequiredSpans: 1"),
true,
"brief evidence includes open required spans"
);
return {
id: "diagnostics-timeline-evaluation",
status: "PASS",
command: "evaluate synthetic diagnostic timeline records",
durationMs: 0
};
} catch (error) {
return {
id: "diagnostics-timeline-evaluation",
status: "FAIL",
command: "evaluate synthetic diagnostic timeline records",
durationMs: 0,
message: error.message
};
}
}
function readinessClassificationCheck() {
try {
const record = {