feat: harden diagnostic timeline collectors

2026-04-30 10:11:04 +01:00 · 2026-04-30 10:11:04 +01:00 · f504e84b61
commit f504e84b61
parent aea91cfb02
10 changed files with 552 additions and 5 deletions
--- a/docs/REPORT_SCHEMA.md
+++ b/docs/REPORT_SCHEMA.md
@ -164,8 +164,8 @@ Current metrics include:
  functions, slowest OpenClaw span, event-loop delay, runtime dependency
  staging, and provider/model timing
 - OpenClaw diagnostics timeline availability, event count, parse errors,
-  slowest spans, repeated spans, event-loop max, provider request max, and child
-  process failures
+  slowest spans, repeated spans, open spans, key span summaries, event-loop max,
+  provider request max, and child process failures
 - runtime dependency staging grouped by bundled plugin when OpenClaw emits
  `runtimeDeps.stage` spans with `pluginId` attributes

@ -179,6 +179,27 @@ JSONL timeline under the run artifacts and summarizes it in `metrics.timeline`.
 If OpenClaw does not emit it, the collector reports `INFO` and the scenario can
 still complete.

+Diagnostic source-build runs can make the timeline mandatory through the active
+profile. In that mode, missing timeline evidence fails the scenario because Kova
+cannot inspect OpenClaw internals. NPM/release runs keep missing timelines as
+informational unless the active profile explicitly requires them.
+
+Timeline-derived measurements include:
+
+- `openclawOpenSpanCount`: number of `span.start` events without a matching
+  `span.end` or `span.error`
+- `openclawOpenRequiredSpanCount`: open spans that match required diagnostics
+  for the surface/profile
+- `openclawOpenSpans`: compact open-span evidence with name, age, phase,
+  span id, parent span id, plugin id, provider, and operation when available
+- `openclawKeySpans`: compact summaries for OpenClaw's required operational
+  spans: `gateway.startup`, `gateway.ready`, `config.normalize`,
+  `plugins.metadata.scan`, `runtimeDeps.stage`, `providers.load`,
+  `models.catalog`, `agent.turn`, and `agent.cleanup`
+
+Open required spans are failures for diagnostic source-build runs because they
+usually mean OpenClaw started a critical operation and never reported completion.
+
 ## Run Receipt

 `kova run --json` prints a receipt instead of text paths:
--- a/fixtures/diagnostics/timeline-open-span.jsonl
+++ b/fixtures/diagnostics/timeline-open-span.jsonl
@ -0,0 +1,4 @@
+{"schemaVersion":"openclaw.diagnostics.v1","type":"span.start","timestamp":"2026-04-29T15:30:00.000Z","runId":"kova-fixture","envName":"kova-fixture-env","pid":100,"phase":"startup","name":"gateway.startup","spanId":"1"}
+{"schemaVersion":"openclaw.diagnostics.v1","type":"span.end","timestamp":"2026-04-29T15:30:02.000Z","runId":"kova-fixture","envName":"kova-fixture-env","pid":100,"phase":"startup","name":"gateway.startup","spanId":"1","durationMs":2000}
+{"schemaVersion":"openclaw.diagnostics.v1","type":"span.start","timestamp":"2026-04-29T15:30:03.000Z","runId":"kova-fixture","envName":"kova-fixture-env","pid":100,"phase":"startup","name":"runtimeDeps.stage","spanId":"2","attributes":{"pluginId":"browser"}}
+{"schemaVersion":"openclaw.diagnostics.v1","type":"eventLoop.sample","timestamp":"2026-04-29T15:30:08.000Z","name":"eventLoop","p95Ms":120,"maxMs":700,"activeSpanName":"runtimeDeps.stage"}
--- a/profiles/diagnostic.json
+++ b/profiles/diagnostic.json
@ -0,0 +1,71 @@
+{
+  "id": "diagnostic",
+  "title": "Source-Built Diagnostics",
+  "objective": "Run release-shaped local OpenClaw builds with timeline diagnostics enabled so Kova can attribute startup, plugin, provider, agent, event-loop, CPU, and heap behavior to OpenClaw phases.",
+  "targetKinds": ["local-build"],
+  "diagnostics": {
+    "timelineRequired": true,
+    "timelineRequiredForTargetKinds": ["local-build"],
+    "requiredKeySpans": [
+      "gateway.startup",
+      "gateway.ready",
+      "config.normalize",
+      "plugins.metadata.scan",
+      "runtimeDeps.stage",
+      "providers.load",
+      "models.catalog",
+      "agent.turn",
+      "agent.cleanup"
+    ]
+  },
+  "gate": {
+    "id": "openclaw-diagnostic",
+    "coverage": {
+      "surfaces": {
+        "blocking": ["release-runtime-startup", "gateway-performance", "bundled-runtime-deps", "agent-message"]
+      },
+      "states": {
+        "blocking": ["fresh", "missing-plugin-index", "many-bundled-plugins", "mock-openai-provider"]
+      },
+      "stateSurfaces": {
+        "blocking": [
+          "release-runtime-startup:fresh",
+          "gateway-performance:many-bundled-plugins",
+          "bundled-runtime-deps:missing-plugin-index",
+          "agent-message:mock-openai-provider"
+        ]
+      },
+      "scenarios": {
+        "blocking": ["release-runtime-startup", "gateway-performance", "bundled-runtime-deps", "agent-message-latency"]
+      }
+    },
+    "blocking": [
+      { "scenario": "release-runtime-startup", "state": "fresh" },
+      { "scenario": "gateway-performance", "state": "many-bundled-plugins" },
+      { "scenario": "bundled-runtime-deps", "state": "missing-plugin-index" },
+      { "scenario": "agent-message-latency", "state": "mock-openai-provider" }
+    ]
+  },
+  "entries": [
+    {
+      "scenario": "release-runtime-startup",
+      "state": "fresh",
+      "timeoutMs": 180000
+    },
+    {
+      "scenario": "gateway-performance",
+      "state": "many-bundled-plugins",
+      "timeoutMs": 180000
+    },
+    {
+      "scenario": "bundled-runtime-deps",
+      "state": "missing-plugin-index",
+      "timeoutMs": 180000
+    },
+    {
+      "scenario": "agent-message-latency",
+      "state": "mock-openai-provider",
+      "timeoutMs": 240000
+    }
+  ]
+}
--- a/src/collectors/timeline.mjs
+++ b/src/collectors/timeline.mjs
@ -3,6 +3,17 @@ import { join } from "node:path";

 const SCHEMA_VERSION = "openclaw.diagnostics.v1";
 export const TIMELINE_COLLECTOR_SCHEMA = "kova.timelineCollector.v1";
+export const KEY_OPENCLAW_SPANS = [
+  "gateway.startup",
+  "gateway.ready",
+  "config.normalize",
+  "plugins.metadata.scan",
+  "runtimeDeps.stage",
+  "providers.load",
+  "models.catalog",
+  "agent.turn",
+  "agent.cleanup"
+];

 export async function collectTimelineMetrics(artifactDir) {
  const startedAt = Date.now();
@ -35,6 +46,8 @@ export async function collectTimelineMetrics(artifactDir) {
    slowestSpans: timeline.slowestSpans,
    spanTotals: timeline.spanTotals,
    repeatedSpans: timeline.repeatedSpans,
+    openSpans: timeline.openSpans,
+    keySpans: timeline.keySpans,
    runtimeDeps: timeline.runtimeDeps,
    eventLoop: timeline.eventLoop,
    providers: timeline.providers,
@ -86,11 +99,13 @@ export function parseTimelineText(text) {
 }

 export function summarizeTimeline(events, parseErrors = []) {
+  const spanStarts = events.filter((event) => event.type === "span.start");
  const spanEvents = events.filter((event) => event.type === "span.end" || event.type === "span.error");
  const eventLoopSamples = events.filter((event) => event.type === "eventLoop.sample");
  const providerRequests = events.filter((event) => event.type === "provider.request");
  const childProcesses = events.filter((event) => event.type === "childProcess.exit");
  const spanTotals = summarizeSpans(spanEvents);
+  const openSpans = summarizeOpenSpans({ starts: spanStarts, terminals: spanEvents, events });
  const runtimeDeps = summarizeRuntimeDeps(spanEvents);
  const slowestSpans = spanEvents
    .filter((event) => typeof event.durationMs === "number")
@ -104,6 +119,7 @@ export function summarizeTimeline(events, parseErrors = []) {
    eventCount: events.length,
    parseErrorCount: parseErrors.length,
    parseErrors: parseErrors.slice(0, 20),
+    spanStartCount: spanStarts.length,
    spanCount: spanEvents.length,
    slowestSpans,
    spanTotals,
@ -111,6 +127,9 @@ export function summarizeTimeline(events, parseErrors = []) {
      .filter((span) => span.count > 1)
      .toSorted((left, right) => (right.totalDurationMs - left.totalDurationMs) || (right.count - left.count))
      .slice(0, 10),
+    openSpanCount: openSpans.length,
+    openSpans,
+    keySpans: summarizeKeySpans({ spanEvents, openSpans }),
    runtimeDeps,
    eventLoop: summarizeEventLoop(eventLoopSamples),
    providers: summarizeTimedCollection(providerRequests),
@ -130,6 +149,9 @@ function emptyTimeline(extra = {}) {
    slowestSpans: [],
    spanTotals: {},
    repeatedSpans: [],
+    openSpanCount: 0,
+    openSpans: [],
+    keySpans: emptyKeySpans(),
    runtimeDeps: {
      count: 0,
      totalDurationMs: 0,
@ -254,6 +276,75 @@ function summarizeRuntimeDeps(events) {
  };
 }

+function summarizeOpenSpans({ starts, terminals, events }) {
+  const terminalKeys = new Set(terminals.map(spanIdentity).filter(Boolean));
+  const terminalNames = countNames(terminals);
+  const latestTimestamp = latestEventTimestamp(events);
+  const open = [];
+
+  for (const start of starts) {
+    const key = spanIdentity(start);
+    if (key && terminalKeys.has(key)) {
+      continue;
+    }
+    if (!key && (terminalNames.get(start.name) ?? 0) > 0) {
+      terminalNames.set(start.name, terminalNames.get(start.name) - 1);
+      continue;
+    }
+    open.push({
+      type: start.type,
+      name: start.name,
+      spanId: start.spanId ?? null,
+      parentSpanId: start.parentSpanId ?? null,
+      timestamp: start.timestamp ?? null,
+      ageMs: spanAgeMs(start, latestTimestamp),
+      phase: start.phase ?? null,
+      provider: start.provider ?? start.attributes?.provider ?? null,
+      operation: start.operation ?? start.attributes?.operation ?? null,
+      pluginId: start.pluginId ?? start.attributes?.pluginId ?? null
+    });
+  }
+
+  return open.toSorted((left, right) => (right.ageMs ?? -1) - (left.ageMs ?? -1)).slice(0, 25);
+}
+
+function summarizeKeySpans({ spanEvents, openSpans }) {
+  const byName = {};
+  for (const name of KEY_OPENCLAW_SPANS) {
+    const spans = spanEvents.filter((event) => event.name === name);
+    const open = openSpans.filter((event) => event.name === name);
+    const durations = spans.map((event) => event.durationMs).filter(isNumber);
+    const slowest = spans
+      .filter((event) => typeof event.durationMs === "number")
+      .toSorted((left, right) => right.durationMs - left.durationMs)
+      .at(0);
+    byName[name] = {
+      name,
+      count: spans.length,
+      errorCount: spans.filter((event) => event.type === "span.error").length,
+      openCount: open.length,
+      totalDurationMs: round(durations.reduce((total, value) => total + value, 0)),
+      maxDurationMs: maxOrNull(durations),
+      slowest: slowest ? compactTimedEvent(slowest) : null,
+      open: open.slice(0, 5)
+    };
+  }
+  return byName;
+}
+
+function emptyKeySpans() {
+  return Object.fromEntries(KEY_OPENCLAW_SPANS.map((name) => [name, {
+    name,
+    count: 0,
+    errorCount: 0,
+    openCount: 0,
+    totalDurationMs: 0,
+    maxDurationMs: null,
+    slowest: null,
+    open: []
+  }]));
+}
+
 function summarizeEventLoop(samples) {
  const p95Values = samples.map((sample) => numberOrNull(sample.p95Ms)).filter(isNumber);
  const p99Values = samples.map((sample) => numberOrNull(sample.p99Ms)).filter(isNumber);
@ -303,6 +394,8 @@ function compactTimedEvent(event) {
  return {
    type: event.type,
    name: event.name,
+    spanId: event.spanId ?? null,
+    parentSpanId: event.parentSpanId ?? null,
    durationMs: event.durationMs ?? null,
    timestamp: event.timestamp ?? null,
    phase: event.phase ?? null,
@ -316,6 +409,36 @@ function compactTimedEvent(event) {
  };
 }

+function spanIdentity(event) {
+  if (event.spanId !== undefined && event.spanId !== null && String(event.spanId).length > 0) {
+    return `id:${event.spanId}`;
+  }
+  return null;
+}
+
+function countNames(events) {
+  const counts = new Map();
+  for (const event of events) {
+    counts.set(event.name, (counts.get(event.name) ?? 0) + 1);
+  }
+  return counts;
+}
+
+function latestEventTimestamp(events) {
+  const times = events
+    .map((event) => Date.parse(event.timestamp ?? ""))
+    .filter((time) => Number.isFinite(time));
+  return times.length === 0 ? null : Math.max(...times);
+}
+
+function spanAgeMs(event, latestTimestamp) {
+  const start = Date.parse(event.timestamp ?? "");
+  if (!Number.isFinite(start) || latestTimestamp === null || latestTimestamp < start) {
+    return null;
+  }
+  return latestTimestamp - start;
+}
+
 function maxOrNull(values) {
  return values.length === 0 ? null : Math.max(...values);
 }
--- a/src/evaluator.mjs
+++ b/src/evaluator.mjs
@ -31,6 +31,9 @@ export function evaluateRecord(record, scenario, options = {}) {
  const diagnosticReportBytes = countDiagnosticReportMetric(record, "artifactBytes");
  const openclawDiagnostics = collectOpenClawDiagnostics(record);
  const timelineSummary = collectTimelineSummary(record);
+  const timelineRequirement = timelineRequirementFor(options);
+  const requiredOpenSpans = requiredTimelineSpans(options);
+  const openRequiredSpans = timelineSummary.openSpans.filter((span) => requiredOpenSpans.has(span.name));
  const runtimeDepsStagingMs = maxNullable(openclawDiagnostics.runtimeDepsStagingMs, timelineSummary.runtimeDepsStageMaxMs);
  const eventLoopDelayMs = maxNullable(openclawDiagnostics.eventLoopDelayMs, timelineSummary.eventLoopMaxMs);
  const providerModelTimingMs = maxNullable(openclawDiagnostics.providerModelTimingMs, timelineSummary.providerRequestMaxMs);
@ -239,6 +242,16 @@ export function evaluateRecord(record, scenario, options = {}) {
  }

  const allowedTimelineParseErrors = typeof thresholds.openclawTimelineParseErrors === "number" ? thresholds.openclawTimelineParseErrors : 0;
+  if (timelineRequirement.required && !timelineSummary.available) {
+    violations.push({
+      kind: "diagnostics",
+      metric: "openclawTimelineAvailable",
+      expected: "available",
+      actual: false,
+      message: `OpenClaw diagnostics timeline was required for ${timelineRequirement.reason} but was not emitted`
+    });
+  }
+
  if (timelineSummary.available && timelineSummary.parseErrorCount > allowedTimelineParseErrors) {
    violations.push({
      kind: "diagnostics",
@ -249,6 +262,17 @@ export function evaluateRecord(record, scenario, options = {}) {
    });
  }

+  if (openRequiredSpans.length > 0) {
+    const slowestOpen = openRequiredSpans[0];
+    violations.push({
+      kind: "diagnostics",
+      metric: "openclawOpenRequiredSpanCount",
+      expected: "0",
+      actual: openRequiredSpans.length,
+      message: `${openRequiredSpans.length} required OpenClaw diagnostics span(s) were left open; slowest ${slowestOpen.name}${slowestOpen.ageMs !== null ? ` age ${slowestOpen.ageMs}ms` : ""}`
+    });
+  }
+
  if (agentResponseOk === false) {
    violations.push({
      kind: "agent",
@ -327,6 +351,10 @@ export function evaluateRecord(record, scenario, options = {}) {
    openclawSlowestSpanName: timelineSummary.slowestSpanName,
    openclawSlowestSpanMs: timelineSummary.slowestSpanMs,
    openclawRepeatedSpanCount: timelineSummary.repeatedSpanCount,
+    openclawOpenSpanCount: timelineSummary.openSpanCount,
+    openclawOpenRequiredSpanCount: openRequiredSpans.length,
+    openclawOpenSpans: timelineSummary.openSpans,
+    openclawKeySpans: timelineSummary.keySpans,
    openclawEventLoopMaxMs: timelineSummary.eventLoopMaxMs,
    openclawProviderRequestMaxMs: timelineSummary.providerRequestMaxMs,
    openclawChildProcessFailedCount: timelineSummary.childProcessFailedCount,
@ -359,6 +387,32 @@ export function evaluateRecord(record, scenario, options = {}) {
  return record;
 }

+function timelineRequirementFor(options) {
+  const targetKind = options.targetPlan?.kind ?? null;
+  const profileDiagnostics = options.profile?.diagnostics ?? {};
+  const requiredForTargetKinds = profileDiagnostics.timelineRequiredForTargetKinds ?? [];
+  if (profileDiagnostics.timelineRequired === true && (requiredForTargetKinds.length === 0 || requiredForTargetKinds.includes(targetKind))) {
+    return {
+      required: true,
+      reason: `profile '${options.profile?.id ?? "unknown"}' on target kind '${targetKind ?? "unknown"}'`
+    };
+  }
+  if (options.surface?.diagnostics?.timelineRequiredForSourceBuild === true && targetKind === "local-build" && profileDiagnostics.timelineRequired === true) {
+    return {
+      required: true,
+      reason: `surface '${options.surface.id}' source-build diagnostics`
+    };
+  }
+  return { required: false, reason: null };
+}
+
+function requiredTimelineSpans(options) {
+  return new Set([
+    ...(options.surface?.diagnostics?.expectedSpans ?? []),
+    ...(options.profile?.diagnostics?.requiredKeySpans ?? [])
+  ]);
+}
+
 function maxDurationWhere(results, predicate) {
  const durations = results
    .filter((result) => predicate(result.command))
@ -756,12 +810,18 @@ function collectTimelineSummary(record) {
  let repeatedSpanCount = 0;
  let runtimeDepsStageMaxMs = null;
  let slowestRuntimeDepsPlugin = null;
+  let openSpanCount = 0;
+  let openSpans = [];
+  const keySpans = {};

  for (const timeline of timelines) {
    eventCount = Math.max(eventCount, timeline.eventCount ?? 0);
    parseErrorCount = Math.max(parseErrorCount, timeline.parseErrorCount ?? 0);
    childProcessFailedCount = Math.max(childProcessFailedCount, timeline.childProcesses?.failedCount ?? 0);
    repeatedSpanCount = Math.max(repeatedSpanCount, timeline.repeatedSpans?.length ?? 0);
+    openSpanCount = Math.max(openSpanCount, timeline.openSpanCount ?? timeline.openSpans?.length ?? 0);
+    openSpans = mergeOpenSpans(openSpans, timeline.openSpans ?? []);
+    mergeKeySpans(keySpans, timeline.keySpans ?? {});
    eventLoopMaxMs = maxNullable(eventLoopMaxMs, timeline.eventLoop?.maxMs);
    providerRequestMaxMs = maxNullable(providerRequestMaxMs, timeline.providers?.maxDurationMs);
    runtimeDepsStageMaxMs = maxNullable(
@ -791,6 +851,9 @@ function collectTimelineSummary(record) {
    slowestSpanName: slowestSpan?.name ?? null,
    slowestSpanMs: slowestSpan?.durationMs ?? null,
    repeatedSpanCount,
+    openSpanCount,
+    openSpans,
+    keySpans,
    eventLoopMaxMs,
    providerRequestMaxMs,
    childProcessFailedCount,
@ -799,6 +862,38 @@ function collectTimelineSummary(record) {
  };
 }

+function mergeOpenSpans(current, candidate) {
+  return [...current, ...candidate]
+    .toSorted((left, right) => (right.ageMs ?? -1) - (left.ageMs ?? -1))
+    .slice(0, 25);
+}
+
+function mergeKeySpans(target, source) {
+  for (const [name, summary] of Object.entries(source)) {
+    const existing = target[name] ?? {
+      name,
+      count: 0,
+      errorCount: 0,
+      openCount: 0,
+      totalDurationMs: 0,
+      maxDurationMs: null,
+      slowest: null,
+      open: []
+    };
+    existing.count += summary.count ?? 0;
+    existing.errorCount += summary.errorCount ?? 0;
+    existing.openCount += summary.openCount ?? 0;
+    existing.totalDurationMs = roundNumber(existing.totalDurationMs + (summary.totalDurationMs ?? 0));
+    existing.maxDurationMs = maxNullable(existing.maxDurationMs, summary.maxDurationMs);
+    if (summary.slowest?.durationMs !== undefined &&
+      (!existing.slowest || summary.slowest.durationMs > existing.slowest.durationMs)) {
+      existing.slowest = summary.slowest;
+    }
+    existing.open = mergeOpenSpans(existing.open, summary.open ?? []).slice(0, 5);
+    target[name] = existing;
+  }
+}
+
 function collectCpuPercentMax(record) {
  const values = [];
  for (const phase of record.phases ?? []) {
@ -823,6 +918,10 @@ function maxNullable(left, right) {
  return left === null ? right : Math.max(left, right);
 }

+function roundNumber(value) {
+  return Math.round(value * 100) / 100;
+}
+
 function mergeRoles(left, right) {
  const roles = new Set(`${left ?? ""},${right ?? ""}`.split(",").filter(Boolean));
  return [...roles].join(",");
@ -962,6 +1061,15 @@ function buildDiagnosticCorrelation({
      durationMs: timelineSummary.slowestSpanMs
    });
  }
+  if (timelineSummary.openSpans.length > 0) {
+    const span = timelineSummary.openSpans[0];
+    findings.push({
+      kind: "openclaw-open-span",
+      summary: `Open OpenClaw span: ${span.name}${span.ageMs !== null ? ` age ${span.ageMs}ms` : ""}`,
+      span: span.name,
+      ageMs: span.ageMs
+    });
+  }
  if (eventLoopDelayMs !== null) {
    findings.push({
      kind: "event-loop",
--- a/src/main.mjs
+++ b/src/main.mjs
@ -154,6 +154,7 @@ async function matrixCommand(flags) {
    const profile = await loadProfile(required(flags.profile, "--profile"));
    const target = required(flags.target, "--target");
    const targetPlan = resolveTarget(target, "target");
+    validateProfileTarget(profile, targetPlan);
    if (flags.from) {
      resolveTarget(flags.from, "from");
    }
@ -268,6 +269,7 @@ async function matrixRun(flags) {
  const profile = await loadProfile(required(flags.profile, "--profile"));
  const target = required(flags.target, "--target");
  const targetPlan = resolveTarget(target, "target");
+  validateProfileTarget(profile, targetPlan);
  const fromPlan = flags.from ? resolveTarget(flags.from, "from") : null;
  const entries = applyMatrixControls(await expandProfile(profile), flags, platformInfo());
  const controls = matrixControlSummary(flags, targetPlan);
@ -284,6 +286,7 @@ async function matrixRun(flags) {
    const context = {
      target,
      targetPlan,
+      profile,
      from: flags.from,
      fromPlan,
      state: entry.state,
@ -606,6 +609,8 @@ function profileSummary(profile) {
    title: profile.title,
    objective: profile.objective,
    entryCount: profile.entries.length,
+    targetKinds: profile.targetKinds ?? null,
+    diagnostics: profile.diagnostics ?? null,
    gate: profile.gate ? {
      id: profile.gate.id ?? `${profile.id}-gate`,
      blockingCount: Array.isArray(profile.gate.blocking) ? profile.gate.blocking.length : profile.entries.length,
@ -614,6 +619,16 @@ function profileSummary(profile) {
  };
 }

+function validateProfileTarget(profile, targetPlan) {
+  const targetKinds = profile.targetKinds ?? [];
+  if (targetKinds.length === 0) {
+    return;
+  }
+  if (!targetKinds.includes(targetPlan.kind)) {
+    throw new Error(`profile '${profile.id}' requires target kind ${targetKinds.join(", ")}, got ${targetPlan.kind}`);
+  }
+}
+
 async function cleanupCommand(flags) {
  const [subcommand] = flags._;
  if (subcommand !== "envs") {
--- a/src/registries/profiles.mjs
+++ b/src/registries/profiles.mjs
@ -22,6 +22,8 @@ export function validateProfileShape(profile, sourceName = "profile") {
  requireString(profile, "title", errors);
  requireString(profile, "objective", errors);
  requireArray(profile, "entries", errors);
+  validateStringArray(profile.targetKinds, "targetKinds", errors, { optional: true });
+  validateDiagnostics(profile.diagnostics, "diagnostics", errors);
  validateEntries(profile.entries, errors);

  if (profile.gate !== undefined) {
@ -31,6 +33,21 @@ export function validateProfileShape(profile, sourceName = "profile") {
  assertNoShapeErrors(errors, sourceName);
 }

+function validateDiagnostics(diagnostics, prefix, errors) {
+  if (diagnostics === undefined) {
+    return;
+  }
+  if (!diagnostics || typeof diagnostics !== "object" || Array.isArray(diagnostics)) {
+    errors.push(`${prefix} must be an object when set`);
+    return;
+  }
+  if (diagnostics.timelineRequired !== undefined && typeof diagnostics.timelineRequired !== "boolean") {
+    errors.push(`${prefix}.timelineRequired must be boolean when set`);
+  }
+  validateStringArray(diagnostics.timelineRequiredForTargetKinds, `${prefix}.timelineRequiredForTargetKinds`, errors, { optional: true });
+  validateStringArray(diagnostics.requiredKeySpans, `${prefix}.requiredKeySpans`, errors, { optional: true });
+}
+
 function validateEntries(entries, errors) {
  if (!Array.isArray(entries)) {
    return;
--- a/src/report.mjs
+++ b/src/report.mjs
@ -103,6 +103,19 @@ export function renderMarkdownReport(report) {
      lines.push(`- Event-loop delay mentions: ${record.measurements.eventLoopDelayMentions ?? "unknown"}`);
      lines.push(`- OpenClaw timeline: ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"} (${record.measurements.openclawTimelineEventCount ?? 0} events, ${record.measurements.openclawTimelineParseErrors ?? 0} parse errors)`);
      lines.push(`- Slowest OpenClaw span: ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"} ms`);
+      lines.push(`- Open OpenClaw spans: ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required)`);
+      if (record.measurements.openclawOpenSpans?.length > 0) {
+        const span = record.measurements.openclawOpenSpans[0];
+        lines.push(`- Slowest open span: ${span.name}${span.ageMs !== null ? ` ${span.ageMs} ms` : ""}`);
+      }
+      if (record.measurements.openclawKeySpans) {
+        const keySpanText = compactKeySpans(record.measurements.openclawKeySpans).slice(0, 5)
+          .map((span) => `${span.name} max ${span.maxDurationMs ?? "?"}ms open ${span.openCount ?? 0}`)
+          .join("; ");
+        if (keySpanText) {
+          lines.push(`- Key OpenClaw spans: ${keySpanText}`);
+        }
+      }
      lines.push(`- OpenClaw event-loop max: ${record.measurements.openclawEventLoopMaxMs ?? "unknown"} ms`);
      lines.push(`- OpenClaw provider request max: ${record.measurements.openclawProviderRequestMaxMs ?? "unknown"} ms`);
      lines.push(`- Structured event-loop delay: ${record.measurements.eventLoopDelayMs ?? "unknown"} ms`);
@ -462,6 +475,10 @@ function summarizeMeasurements(measurements) {
    openclawTimelineAvailable: measurements.openclawTimelineAvailable ?? null,
    openclawSlowestSpanName: measurements.openclawSlowestSpanName ?? null,
    openclawSlowestSpanMs: measurements.openclawSlowestSpanMs ?? null,
+    openclawOpenSpanCount: measurements.openclawOpenSpanCount ?? null,
+    openclawOpenRequiredSpanCount: measurements.openclawOpenRequiredSpanCount ?? null,
+    openclawOpenSpans: measurements.openclawOpenSpans ?? null,
+    openclawKeySpans: measurements.openclawKeySpans ?? null,
    nodeCpuProfileCount: measurements.nodeCpuProfileCount ?? null,
    nodeHeapProfileCount: measurements.nodeHeapProfileCount ?? null,
    nodeTraceEventCount: measurements.nodeTraceEventCount ?? null,
@ -540,7 +557,7 @@ export function renderPasteSummary(report) {
        const roleText = compactRolePeaks(record.measurements).slice(0, 4)
          .map((role) => `${role.role} ${role.peakRssMb ?? "?"}MB/${role.maxCpuPercent ?? "?"}%`)
          .join(", ") || "unknown";
-        lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}.`);
+        lines.push(`Measurements: cold ready ${record.measurements.coldReadyMs ?? "unknown"}ms; warm ready ${record.measurements.warmReadyMs ?? "unknown"}ms; listening ${record.measurements.timeToListeningMs ?? "unknown"}ms; health ready ${record.measurements.timeToHealthReadyMs ?? "unknown"}ms; readiness ${record.measurements.readinessClassification ?? "unknown"}; peak RSS ${record.measurements.peakRssMb ?? "unknown"} MB; max CPU ${record.measurements.cpuPercentMax ?? "unknown"}%; role peaks ${roleText}; samples ${record.measurements.resourceSampleCount ?? "unknown"}; final gateway ${record.measurements.finalGatewayState ?? "unknown"}; health failures ${record.measurements.healthFailures ?? "unknown"}; health p95 ${record.measurements.healthP95Ms ?? "unknown"}ms; missing deps ${record.measurements.missingDependencyErrors ?? "unknown"}; plugin load failures ${record.measurements.pluginLoadFailures ?? "unknown"}; restarts ${record.measurements.gatewayRestartCount ?? "unknown"}; agent turn ${record.measurements.agentTurnMs ?? "not-run"}ms; provider/model timeouts ${record.measurements.providerTimeoutMentions ?? "unknown"}; event-loop signals ${record.measurements.eventLoopDelayMentions ?? "unknown"}; timeline ${record.measurements.openclawTimelineAvailable ? "available" : "unavailable"}; slowest span ${record.measurements.openclawSlowestSpanName ?? "unknown"} ${record.measurements.openclawSlowestSpanMs ?? "unknown"}ms; open spans ${record.measurements.openclawOpenSpanCount ?? "unknown"} (${record.measurements.openclawOpenRequiredSpanCount ?? "unknown"} required); node profiles ${record.measurements.nodeCpuProfileCount ?? "unknown"}/${record.measurements.nodeHeapProfileCount ?? "unknown"}/${record.measurements.nodeTraceEventCount ?? "unknown"}; top CPU ${record.measurements.nodeProfileTopFunction ?? "unknown"} ${record.measurements.nodeProfileTopFunctionMs ?? "unknown"}ms; top heap ${record.measurements.nodeHeapTopFunction ?? "unknown"} ${record.measurements.nodeHeapTopFunctionMb ?? "unknown"}MB; runtime deps staging ${record.measurements.runtimeDepsStagingMs ?? "unknown"}ms${runtimeDepsPlugin}.`);
      }
    } else if (record.violations?.length > 0) {
      lines.push("Violations:");
@ -635,6 +652,10 @@ function briefEvidence(measurements, violations) {
  if (measurements.pluginLoadFailures !== null && measurements.pluginLoadFailures !== undefined) {
    items.push(`pluginLoadFailures: ${measurements.pluginLoadFailures}`);
  }
+  if (measurements.openclawOpenRequiredSpanCount > 0) {
+    const span = measurements.openclawOpenSpans?.[0];
+    items.push(`openRequiredSpans: ${measurements.openclawOpenRequiredSpanCount}${span ? `, slowest ${span.name}` : ""}`);
+  }
  for (const finding of measurements.diagnosticCorrelation?.findings?.slice(0, 3) ?? []) {
    items.push(finding.summary);
  }
@ -646,6 +667,12 @@ function briefEvidence(measurements, violations) {
  return items.slice(0, 8);
 }

+function compactKeySpans(keySpans) {
+  return Object.values(keySpans ?? {})
+    .filter((span) => (span.count ?? 0) > 0 || (span.openCount ?? 0) > 0)
+    .toSorted((left, right) => (right.maxDurationMs ?? 0) - (left.maxDurationMs ?? 0) || (right.openCount ?? 0) - (left.openCount ?? 0));
+}
+
 function compactRolePeaks(measurements) {
  const byRole = new Map();
  for (const role of measurements?.resourceTopRolesByRss ?? []) {
--- a/src/runner.mjs
+++ b/src/runner.mjs
@ -512,7 +512,9 @@ function runScenarioCommand(command, context, envName, artifactDir, phaseId, com

 function evaluatorContext(context, scenario) {
  return {
-    surface: context.surfacesById?.[scenario.surface] ?? null
+    surface: context.surfacesById?.[scenario.surface] ?? null,
+    targetPlan: context.targetPlan ?? null,
+    profile: context.profile ?? null
  };
 }

--- a/src/selfcheck.mjs
+++ b/src/selfcheck.mjs
@ -11,7 +11,7 @@ import { validateStateShape } from "./registries/states.mjs";
 import { validateRegistryReferences } from "./registries/validate.mjs";
 import { assertSafeScenarioCommand } from "./safety.mjs";
 import { parseTimelineText } from "./collectors/timeline.mjs";
-import { renderReportSummary } from "./report.mjs";
+import { renderPasteSummary, renderReportSummary } from "./report.mjs";

 export async function runSelfCheck(flags = {}) {
  const checks = [];
@ -51,6 +51,17 @@ export async function runSelfCheck(flags = {}) {
      assertEqual(data.entries.length, 1, "matrix include filter count");
      assertEqual(data.controls?.requestedParallel, 2, "matrix requested parallel");
    }));
+    checks.push(await jsonCommandCheck("diagnostic-profile-plan-json", "node bin/kova.mjs matrix plan --profile diagnostic --target local-build:/tmp/openclaw --include scenario:release-runtime-startup --json", (data) => {
+      assertEqual(data.schemaVersion, "kova.matrix.plan.v1", "diagnostic matrix plan schema");
+      assertEqual(data.profile?.id, "diagnostic", "diagnostic profile id");
+      assertEqual(data.profile?.diagnostics?.timelineRequired, true, "diagnostic timeline required");
+      assertArrayNotEmpty(data.entries, "diagnostic entries");
+    }));
+    checks.push(await failingCommandCheck(
+      "diagnostic-profile-rejects-non-local-build",
+      "node bin/kova.mjs matrix plan --profile diagnostic --target runtime:stable --json",
+      "profile 'diagnostic' requires target kind local-build"
+    ));
    checks.push(await failingCommandCheck(
      "invalid-parallel-rejected",
      "node bin/kova.mjs matrix plan --profile smoke --target runtime:stable --parallel nope --json",
@ -67,6 +78,8 @@ export async function runSelfCheck(flags = {}) {
      assertArray(data.envs, "cleanup envs");
    }));
    checks.push(await diagnosticsTimelineCheck());
+    checks.push(await diagnosticsOpenSpanCheck());
+    checks.push(diagnosticsTimelineEvaluationCheck());
    checks.push(readinessClassificationCheck());
    checks.push(await resourceRoleAttributionCheck(tmp));
    checks.push(roleThresholdEvaluationCheck());
@ -504,6 +517,7 @@ async function diagnosticsTimelineCheck() {
    assertEqual(timeline.eventLoop.maxMs, 214, "event loop max");
    assertEqual(timeline.providers.maxDurationMs, 1220, "provider duration");
    assertEqual(timeline.childProcesses.failedCount, 1, "child process failures");
+    assertEqual(timeline.keySpans["gateway.startup"].maxDurationMs, 2450, "gateway startup key span");
    return {
      id: "diagnostics-timeline-parser",
      status: "PASS",
@ -521,6 +535,151 @@ async function diagnosticsTimelineCheck() {
  }
 }

+async function diagnosticsOpenSpanCheck() {
+  try {
+    const text = await readFile("fixtures/diagnostics/timeline-open-span.jsonl", "utf8");
+    const timeline = parseTimelineText(text);
+    assertEqual(timeline.available, true, "open timeline available");
+    assertEqual(timeline.openSpanCount, 1, "open span count");
+    assertEqual(timeline.openSpans[0]?.name, "runtimeDeps.stage", "open span name");
+    assertEqual(timeline.openSpans[0]?.ageMs, 5000, "open span age");
+    assertEqual(timeline.keySpans["runtimeDeps.stage"].openCount, 1, "key open span count");
+    return {
+      id: "diagnostics-open-span-parser",
+      status: "PASS",
+      command: "parse fixtures/diagnostics/timeline-open-span.jsonl",
+      durationMs: 0
+    };
+  } catch (error) {
+    return {
+      id: "diagnostics-open-span-parser",
+      status: "FAIL",
+      command: "parse fixtures/diagnostics/timeline-open-span.jsonl",
+      durationMs: 0,
+      message: error.message
+    };
+  }
+}
+
+function diagnosticsTimelineEvaluationCheck() {
+  try {
+    const missingTimelineRecord = {
+      scenario: "diagnostic-missing-timeline",
+      status: "PASS",
+      phases: [],
+      finalMetrics: {
+        service: { gatewayState: "running" },
+        logs: zeroLogMetrics(),
+        timeline: {
+          available: false,
+          eventCount: 0,
+          parseErrorCount: 0,
+          openSpanCount: 0,
+          openSpans: [],
+          keySpans: {},
+          runtimeDeps: {},
+          eventLoop: {},
+          providers: {},
+          childProcesses: {}
+        }
+      }
+    };
+    evaluateRecord(missingTimelineRecord, { thresholds: {} }, {
+      targetPlan: { kind: "local-build" },
+      profile: {
+        id: "diagnostic",
+        diagnostics: {
+          timelineRequired: true,
+          timelineRequiredForTargetKinds: ["local-build"]
+        }
+      },
+      surface: {
+        id: "release-runtime-startup",
+        diagnostics: { expectedSpans: ["runtimeDeps.stage"] },
+        thresholds: {}
+      }
+    });
+    assertEqual(missingTimelineRecord.status, "FAIL", "missing diagnostic timeline status");
+    assertEqual(
+      missingTimelineRecord.violations.some((violation) => violation.metric === "openclawTimelineAvailable"),
+      true,
+      "missing diagnostic timeline violation"
+    );
+
+    const openSpanRecord = {
+      scenario: "diagnostic-open-span",
+      status: "PASS",
+      phases: [],
+      finalMetrics: {
+        service: { gatewayState: "running" },
+        logs: zeroLogMetrics(),
+        timeline: parseTimelineText([
+          "{\"type\":\"span.start\",\"timestamp\":\"2026-04-29T15:30:00.000Z\",\"name\":\"runtimeDeps.stage\",\"spanId\":\"1\"}",
+          "{\"type\":\"eventLoop.sample\",\"timestamp\":\"2026-04-29T15:30:06.000Z\",\"name\":\"eventLoop\",\"maxMs\":400}"
+        ].join("\n"))
+      }
+    };
+    evaluateRecord(openSpanRecord, { thresholds: {} }, {
+      targetPlan: { kind: "local-build" },
+      profile: { id: "diagnostic", diagnostics: { timelineRequired: true } },
+      surface: {
+        id: "bundled-runtime-deps",
+        diagnostics: { expectedSpans: ["runtimeDeps.stage"] },
+        thresholds: {}
+      }
+    });
+    assertEqual(openSpanRecord.status, "FAIL", "open required span status");
+    assertEqual(openSpanRecord.measurements.openclawOpenRequiredSpanCount, 1, "open required span measurement");
+    assertEqual(
+      openSpanRecord.violations.some((violation) => violation.metric === "openclawOpenRequiredSpanCount"),
+      true,
+      "open required span violation"
+    );
+    const reportSummary = renderReportSummary({
+      schemaVersion: "kova.report.v1",
+      generatedAt: "2026-04-29T15:30:10.000Z",
+      runId: "self-check-diagnostics",
+      summary: { total: 1, statuses: { FAIL: 1 } },
+      records: [openSpanRecord]
+    }, { structured: true });
+    assertEqual(
+      reportSummary.scenarios[0]?.measurements?.openclawOpenRequiredSpanCount,
+      1,
+      "structured report open span evidence"
+    );
+    assertEqual(
+      reportSummary.scenarios[0]?.measurements?.openclawOpenSpans?.[0]?.name,
+      "runtimeDeps.stage",
+      "structured report open span name"
+    );
+    assertEqual(
+      renderPasteSummary({
+        runId: "self-check-diagnostics",
+        target: "local-build:/tmp/openclaw",
+        mode: "self-check",
+        records: [openSpanRecord]
+      }).includes("openRequiredSpans: 1"),
+      true,
+      "brief evidence includes open required spans"
+    );
+
+    return {
+      id: "diagnostics-timeline-evaluation",
+      status: "PASS",
+      command: "evaluate synthetic diagnostic timeline records",
+      durationMs: 0
+    };
+  } catch (error) {
+    return {
+      id: "diagnostics-timeline-evaluation",
+      status: "FAIL",
+      command: "evaluate synthetic diagnostic timeline records",
+      durationMs: 0,
+      message: error.message
+    };
+  }
+}
+
 function readinessClassificationCheck() {
  try {
    const record = {