From a995487433583edea6ea97fd9e08a3c8e1163c5a Mon Sep 17 00:00:00 2001
From: Shakker <shakkerdroid@gmail.com>
Date: Thu, 7 May 2026 10:32:11 +0100
Subject: [PATCH] fix: separate gateway measurement scope

---
 src/evaluator.mjs                           | 127 ++++++++++++++++--
 src/measurement-contract.mjs                |  68 ++++++++++
 src/reporting/compare.mjs                   |   4 +
 src/reporting/report.mjs                    |  11 +-
 src/runner.mjs                              |  53 +++++++-
 src/selfcheck.mjs                           | 141 +++++++++++++++++++-
 support/run-dashboard-session-send-turn.mjs |   4 +
 surfaces/dashboard-session-send-turn.json   |   1 +
 8 files changed, 389 insertions(+), 20 deletions(-)
 create mode 100644 src/measurement-contract.mjs

diff --git a/src/evaluator.mjs b/src/evaluator.mjs
index 794171e..8013333 100644
--- a/src/evaluator.mjs
+++ b/src/evaluator.mjs
@@ -11,6 +11,7 @@ import { computeProviderTurnAttribution } from "./collectors/provider.mjs";
 import { summarizeRuntimeDepsLogs } from "./collectors/logs.mjs";
 import { buildHealthMeasurement, healthReadinessClassification } from "./health.mjs";
 import { resolveThresholdPolicy } from "./evaluation/thresholds.mjs";
+import { measuredProductPhase, measurementScopeForPhase, normalizeMeasurementScope } from "./measurement-contract.mjs";
 import {
   checkAggregateThreshold,
   checkDuration,
@@ -30,13 +31,18 @@ export function evaluateRecord(record, scenario, options = {}) {
   const roleThresholds = thresholdPolicy.roleThresholds;
   const violations = [];
   const allResults = collectResults(record);
-  const measuredResults = collectResults(record, { excludePhaseIds: ["target-setup"] });
+  const measurementScopeSummary = summarizeMeasurementScopes(record);
+  const measuredResults = collectResults(record, { productOnly: true });
   const resourceSummary = collectResourceSummary(measuredResults);
-  const peakRssMb = maxNullable(
-    collectPeakRss(record, { excludePhaseIds: ["target-setup"] }),
+  const primaryResourceRole = options.surface?.resourcePrimaryRole ?? null;
+  const primaryRoleResources = primaryResourceRole ? resourceSummary.byRole[primaryResourceRole] : null;
+  const peakTrackedRssMb = maxNullable(
+    collectPeakRss(record, { productOnly: true }),
     resourceSummary.peakTotalRssMb
   );
-  const cpuPercentMax = maxNullable(collectCpuPercentMax(record), resourceSummary.maxTotalCpuPercent);
+  const cpuPercentMaxTracked = maxNullable(collectCpuPercentMax(record, { productOnly: true }), resourceSummary.maxTotalCpuPercent);
+  const peakRssMb = typeof primaryRoleResources?.peakRssMb === "number" ? primaryRoleResources.peakRssMb : peakTrackedRssMb;
+  const cpuPercentMax = typeof primaryRoleResources?.maxCpuPercent === "number" ? primaryRoleResources.maxCpuPercent : cpuPercentMaxTracked;
   const missingDependencyErrors = countMissingDependencyErrors(allResults) + countLogMetric(record, "missingDependencyErrors");
   const pluginLoadFailures = countLogMetric(record, "pluginLoadFailures");
   const metadataScanMentions = countLogMetric(record, "metadataScanMentions");
@@ -67,6 +73,7 @@ export function evaluateRecord(record, scenario, options = {}) {
   const timelineRequirement = timelineRequirementFor(options);
   const requiredOpenSpans = requiredTimelineSpans(options);
   const openRequiredSpans = timelineSummary.openSpans.filter((span) => requiredOpenSpans.has(span.name));
+  const missingRequiredSpans = missingTimelineSpans(timelineSummary, requiredOpenSpans);
   const runtimeDepsStagingMs = maxNullable(
     openclawDiagnostics.runtimeDepsStagingMs,
     timelineSummary.runtimeDepsStageMaxMs,
@@ -719,6 +726,18 @@ export function evaluateRecord(record, scenario, options = {}) {
     });
   }
 
+  if (timelineSummary.available && missingRequiredSpans.length > 0) {
+    violations.push({
+      kind: "diagnostics",
+      metric: "openclawMissingRequiredSpanCount",
+      expected: "0",
+      actual: missingRequiredSpans.length,
+      message: `${missingRequiredSpans.length} required OpenClaw diagnostics span(s) were not observed: ${missingRequiredSpans.slice(0, 5).join(", ")}`
+    });
+  }
+
+  checkGatewaySessionTransport(violations, agentTurns, scenario);
+
   if (agentResponseOk === false) {
     violations.push({
       kind: "agent",
@@ -737,6 +756,11 @@ export function evaluateRecord(record, scenario, options = {}) {
   record.measurements = {
     peakRssMb,
     cpuPercentMax,
+    measurementScopeSummary,
+    resourceMeasurementScope: "product",
+    resourcePrimaryRole: primaryResourceRole,
+    resourcePeakTrackedRssMb: peakTrackedRssMb,
+    resourceCpuPercentMaxTracked: cpuPercentMaxTracked,
     coldReadyMs,
     warmReadyMs,
     upgradeMs,
@@ -917,6 +941,8 @@ export function evaluateRecord(record, scenario, options = {}) {
     openclawRepeatedSpanCount: timelineSummary.repeatedSpanCount,
     openclawOpenSpanCount: timelineSummary.openSpanCount,
     openclawOpenRequiredSpanCount: openRequiredSpans.length,
+    openclawMissingRequiredSpanCount: missingRequiredSpans.length,
+    openclawMissingRequiredSpans: missingRequiredSpans,
     openclawOpenSpans: timelineSummary.openSpans,
     openclawKeySpans: timelineSummary.keySpans,
     openclawEventLoopMaxMs: timelineSummary.eventLoopMaxMs,
@@ -1089,6 +1115,29 @@ function preferredPreProviderAttributionSummary(...summaries) {
   return summaries.find((summary) => summary?.count > 0) ?? summaries[0];
 }
 
+function checkGatewaySessionTransport(violations, agentTurns, scenario) {
+  if (scenario.id !== "dashboard-session-send-turn") {
+    return;
+  }
+  for (const turn of agentTurns) {
+    if (!turn.gatewaySession) {
+      continue;
+    }
+    const transport = turn.gatewaySession.gatewayTransportKind;
+    if (transport === "direct-gateway-rpc") {
+      continue;
+    }
+    violations.push({
+      kind: "harness",
+      metric: "gatewayTransport.kind",
+      expected: "direct-gateway-rpc",
+      actual: transport ?? "unknown",
+      phaseId: turn.phaseId,
+      message: `dashboard session benchmark used ${transport ?? "unknown"} transport; direct Gateway RPC is required for Gateway product measurement${turn.gatewaySession.gatewayTransportFallbackReason ? ` (${turn.gatewaySession.gatewayTransportFallbackReason})` : ""}`
+    });
+  }
+}
+
 function extractGatewaySessionTurn(result) {
   if (!result?.command?.includes("run-dashboard-session-send-turn.mjs")) {
     return null;
@@ -1115,6 +1164,9 @@ function extractGatewaySessionTurn(result) {
     minAssistantCount: numberOrNull(payload.minAssistantCount),
     sessionKey: payload.sessionKey ?? null,
     runId: payload.runId ?? null,
+    gatewayTransportKind: payload.gatewayTransport?.kind ?? null,
+    gatewayTransportFallbackReason: payload.gatewayTransport?.fallbackReason ?? null,
+    gatewayTransportFallbackUsed: typeof payload.gatewayTransport?.kind === "string" && payload.gatewayTransport.kind !== "direct-gateway-rpc",
     activeStartedAtEpochMs,
     activeFinishedAtEpochMs,
     activeTurnMs,
@@ -2017,6 +2069,26 @@ function requiredTimelineSpans(options) {
   ]);
 }
 
+function missingTimelineSpans(timelineSummary, requiredSpans) {
+  return [...requiredSpans].filter((name) => !timelineSpanObserved(timelineSummary, name));
+}
+
+function timelineSpanObserved(timelineSummary, name) {
+  const exact = timelineSummary.keySpans?.[name] ?? timelineSummary.spanTotals?.[name];
+  if ((exact?.count ?? 0) > 0 || (exact?.openCount ?? 0) > 0) {
+    return true;
+  }
+  if ((timelineSummary.openSpans ?? []).some((span) => span.name === name)) {
+    return true;
+  }
+  if (name === "gateway.chat_send" || name === "auto_reply" || name === "reply" || name === "models.catalog") {
+    return Object.entries(timelineSummary.spanTotals ?? {}).some(([spanName, summary]) =>
+      spanName === name || (spanName.startsWith(`${name}.`) && (summary.count ?? 0) > 0)
+    );
+  }
+  return false;
+}
+
 function maxDurationWhere(results, predicate) {
   const durations = results
     .filter((result) => predicate(result.command))
@@ -2515,6 +2587,28 @@ function healthFailureCount(samples) {
   return samples.filter((sample) => sample && !sample.ok).length;
 }
 
+function summarizeMeasurementScopes(record) {
+  const phases = { product: 0, harness: 0, cleanup: 0 };
+  const results = { product: 0, harness: 0, cleanup: 0 };
+  for (const phase of record.phases ?? []) {
+    const phaseScope = measurementScopeForPhase(phase);
+    phases[phaseScope] += 1;
+    for (const result of phase.results ?? []) {
+      const resultScope = result.measurementScope ? normalizeMeasurementScope(result.measurementScope, phase.id) : phaseScope;
+      results[resultScope] += 1;
+    }
+  }
+  return {
+    schemaVersion: "kova.measurementScopeSummary.v1",
+    productPhaseCount: phases.product,
+    harnessPhaseCount: phases.harness,
+    cleanupPhaseCount: phases.cleanup,
+    productCommandCount: results.product,
+    harnessCommandCount: results.harness,
+    cleanupCommandCount: results.cleanup
+  };
+}
+
 function collectResults(record, options = {}) {
   const excludePhaseIds = new Set(options.excludePhaseIds ?? []);
   const results = [];
@@ -2522,6 +2616,9 @@ function collectResults(record, options = {}) {
     if (excludePhaseIds.has(phase.id)) {
       continue;
     }
+    if (options.productOnly === true && !measuredProductPhase(phase)) {
+      continue;
+    }
     for (const result of phase.results ?? []) {
       results.push(result);
     }
@@ -2546,6 +2643,9 @@ function collectPeakRss(record, options = {}) {
     if (excludePhaseIds.has(phase.id)) {
       continue;
     }
+    if (options.productOnly === true && !measuredProductPhase(phase)) {
+      continue;
+    }
     const rss = phase.metrics?.process?.rssMb;
     if (typeof rss === "number") {
       peak = peak === null ? rss : Math.max(peak, rss);
@@ -2720,8 +2820,8 @@ function collectTimelineSummary(record) {
   let repeatedSpanCount = 0;
   let runtimeDepsStageMaxMs = null;
   let slowestRuntimeDepsPlugin = null;
-  let openSpanCount = 0;
-  let openSpans = [];
+  let latestOpenSpanCount = 0;
+  let latestOpenSpans = [];
   let latestEventCount = -1;
   let events = [];
   let turnAttributionEvents = [];
@@ -2734,6 +2834,10 @@ function collectTimelineSummary(record) {
       latestEventCount = timeline.eventCount ?? 0;
       events = timeline.events;
       turnAttributionEvents = Array.isArray(timeline.turnAttributionEvents) ? timeline.turnAttributionEvents : [];
+      latestOpenSpanCount = timeline.openSpanCount ?? timeline.openSpans?.length ?? 0;
+      latestOpenSpans = [...(timeline.openSpans ?? [])]
+        .toSorted((left, right) => (right.ageMs ?? -1) - (left.ageMs ?? -1))
+        .slice(0, 25);
     }
     for (const artifact of timeline.artifacts ?? []) {
       artifacts.add(artifact);
@@ -2742,8 +2846,6 @@ function collectTimelineSummary(record) {
     parseErrorCount = Math.max(parseErrorCount, timeline.parseErrorCount ?? 0);
     childProcessFailedCount = Math.max(childProcessFailedCount, timeline.childProcesses?.failedCount ?? 0);
     repeatedSpanCount = Math.max(repeatedSpanCount, timeline.repeatedSpans?.length ?? 0);
-    openSpanCount = Math.max(openSpanCount, timeline.openSpanCount ?? timeline.openSpans?.length ?? 0);
-    openSpans = mergeOpenSpans(openSpans, timeline.openSpans ?? []);
     mergeKeySpans(keySpans, timeline.keySpans ?? {});
     mergeSpanTotals(spanTotals, timeline.spanTotals ?? {});
     eventLoopMaxMs = maxNullable(eventLoopMaxMs, timeline.eventLoop?.maxMs);
@@ -2775,8 +2877,8 @@ function collectTimelineSummary(record) {
     slowestSpanName: slowestSpan?.name ?? null,
     slowestSpanMs: slowestSpan?.durationMs ?? null,
     repeatedSpanCount,
-    openSpanCount,
-    openSpans,
+    openSpanCount: latestOpenSpanCount,
+    openSpans: latestOpenSpans,
     artifacts: [...artifacts],
     timelineArtifacts: [...artifacts],
     events,
@@ -2847,9 +2949,12 @@ function mergeKeySpans(target, source) {
   }
 }
 
-function collectCpuPercentMax(record) {
+function collectCpuPercentMax(record, options = {}) {
   const values = [];
   for (const phase of record.phases ?? []) {
+    if (options.productOnly === true && !measuredProductPhase(phase)) {
+      continue;
+    }
     const cpu = phase.metrics?.process?.cpuPercent;
     if (typeof cpu === "number") {
       values.push(cpu);
diff --git a/src/measurement-contract.mjs b/src/measurement-contract.mjs
new file mode 100644
index 0000000..430db34
--- /dev/null
+++ b/src/measurement-contract.mjs
@@ -0,0 +1,68 @@
+export const MEASUREMENT_SCOPES = new Set(["product", "harness", "cleanup"]);
+
+export function normalizeMeasurementScope(value, phaseId = null) {
+  if (MEASUREMENT_SCOPES.has(value)) {
+    return value;
+  }
+  if (phaseId === "target-setup" || phaseId === "auth-prepare" || phaseId === "auth-setup" || phaseId === "prepare" || phaseId?.startsWith("state-")) {
+    return "harness";
+  }
+  if (phaseId === "cleanup" || phaseId === "auth-cleanup" || phaseId === "env-cleanup") {
+    return "cleanup";
+  }
+  return "product";
+}
+
+export function measuredProductPhase(phase) {
+  return measurementScopeForPhase(phase) === "product";
+}
+
+export function measurementScopeForPhase(phase) {
+  if (MEASUREMENT_SCOPES.has(phase?.measurementScope)) {
+    return phase.measurementScope;
+  }
+  if (phase?.id === "provision" && (phase.commands ?? []).some((command) => /(?:^|\s)--no-service(?:\s|$)/.test(command))) {
+    return "harness";
+  }
+  return normalizeMeasurementScope(phase?.measurementScope, phase?.id);
+}
+
+export function driverKindForCommand(command) {
+  const text = String(command ?? "");
+  if (text.includes("run-dashboard-session-send-turn.mjs")) {
+    return "gateway-rpc";
+  }
+  if (text.includes("run-openai-compatible-turn.mjs")) {
+    return "gateway-http";
+  }
+  if (text.includes("run-tui-message-turn.mjs")) {
+    return "gateway-rpc";
+  }
+  if (/\bocm\s+@[^ ]+\s+--\s+agent\b/.test(text)) {
+    return text.includes("--local") ? "openclaw-cli-local" : "openclaw-cli-gateway";
+  }
+  if (/\bocm\s+@[^ ]+\s+--\s+gateway\s+call\b/.test(text)) {
+    return "gateway-rpc-via-cli";
+  }
+  if (/\bocm\b/.test(text)) {
+    return "ocm";
+  }
+  if (/\bnode\b/.test(text)) {
+    return "kova-helper";
+  }
+  return "unknown";
+}
+
+export function phaseDriverKind(phase, commands = phase?.commands ?? []) {
+  if (phase?.driverKind) {
+    return phase.driverKind;
+  }
+  const kinds = new Set(commands.map(driverKindForCommand));
+  if (kinds.size === 1) {
+    return [...kinds][0];
+  }
+  if (kinds.size === 0) {
+    return "none";
+  }
+  return "mixed";
+}
diff --git a/src/reporting/compare.mjs b/src/reporting/compare.mjs
index 65b16ca..2553508 100644
--- a/src/reporting/compare.mjs
+++ b/src/reporting/compare.mjs
@@ -44,6 +44,8 @@ const defaultThresholds = {
   heapSnapshotBytes: 50 * 1024 * 1024,
   resourcePeakCommandTreeRssMb: 100,
   resourcePeakGatewayRssMb: 100,
+  resourcePeakTrackedRssMb: 100,
+  resourceCpuPercentMaxTracked: 25,
   openclawTimelineParseErrors: 0,
   openclawSlowestSpanMs: 5000,
   openclawEventLoopMaxMs: 250,
@@ -457,6 +459,8 @@ function metricDeltas(baseline, current) {
     "nodeProfileTopFunctionMs",
     "heapSnapshotBytes",
     "resourceSampleCount",
+    "resourcePeakTrackedRssMb",
+    "resourceCpuPercentMaxTracked",
     "resourcePeakCommandTreeRssMb",
     "resourcePeakGatewayRssMb",
     "openclawTimelineEventCount",
diff --git a/src/reporting/report.mjs b/src/reporting/report.mjs
index 15273de..7d84045 100644
--- a/src/reporting/report.mjs
+++ b/src/reporting/report.mjs
@@ -237,7 +237,9 @@ export function renderMarkdownReport(report) {
           const expectedFailure = turn.expectedFailure ? "; expected failure observed " + turn.expectedFailureObserved : "";
           lines.push(`  - ${turn.label}: total ${turn.totalTurnMs ?? "unknown"} ms; pre-provider ${turn.preProviderMs ?? "unknown"} ms; provider ${turn.providerFinalMs ?? "unknown"} ms; post-provider ${turn.postProviderMs ?? "unknown"} ms; route ${route}; status ${status}; issue ${issue}; response ${turn.responseOk}; leaks ${turn.processLeakCount ?? "unknown"}${providerTiming}${expectedFailure}`);
           if (turn.gatewaySession) {
-            lines.push(`    - gateway session: create ${turn.gatewaySession.createSession}; session create ${turn.gatewaySession.sessionCreateDurationMs ?? "n/a"} ms; send ${turn.gatewaySession.sendDurationMs ?? "unknown"} ms; first assistant ${turn.gatewaySession.timeToFirstAssistantMs ?? "unknown"} ms; matched assistant ${turn.gatewaySession.timeToMatchedAssistantMs ?? "unknown"} ms; polls ${turn.gatewaySession.historyPollCount ?? "unknown"} (${turn.gatewaySession.historyErrorCount ?? "unknown"} errors)`);
+            const transport = turn.gatewaySession.gatewayTransportKind ?? "unknown";
+            const fallback = turn.gatewaySession.gatewayTransportFallbackReason ? `; fallback ${turn.gatewaySession.gatewayTransportFallbackReason}` : "";
+            lines.push(`    - gateway session: transport ${transport}${fallback}; create ${turn.gatewaySession.createSession}; session create ${turn.gatewaySession.sessionCreateDurationMs ?? "n/a"} ms; send ${turn.gatewaySession.sendDurationMs ?? "unknown"} ms; first assistant ${turn.gatewaySession.timeToFirstAssistantMs ?? "unknown"} ms; matched assistant ${turn.gatewaySession.timeToMatchedAssistantMs ?? "unknown"} ms; polls ${turn.gatewaySession.historyPollCount ?? "unknown"} (${turn.gatewaySession.historyErrorCount ?? "unknown"} errors)`);
           }
           if (turn.turnDiagnostics) {
             lines.push(`    - active window: metadata scans ${turn.metadataScanCount ?? "unknown"} (${turn.metadataScanTotalMs ?? "unknown"} ms total, max ${turn.metadataScanMaxMs ?? "unknown"} ms); event-loop samples ${turn.turnDiagnostics.eventLoop?.sampleCount ?? "unknown"} max ${turn.eventLoopMaxMs ?? "unknown"} ms`);
@@ -703,6 +705,11 @@ function summarizeMeasurements(measurements) {
   return {
     peakRssMb: measurements.peakRssMb ?? null,
     cpuPercentMax: measurements.cpuPercentMax ?? null,
+    measurementScopeSummary: measurements.measurementScopeSummary ?? null,
+    resourceMeasurementScope: measurements.resourceMeasurementScope ?? null,
+    resourcePrimaryRole: measurements.resourcePrimaryRole ?? null,
+    resourcePeakTrackedRssMb: measurements.resourcePeakTrackedRssMb ?? null,
+    resourceCpuPercentMaxTracked: measurements.resourceCpuPercentMaxTracked ?? null,
     health: measurements.health ?? null,
     missingDependencyErrors: measurements.missingDependencyErrors ?? null,
     pluginLoadFailures: measurements.pluginLoadFailures ?? null,
@@ -719,6 +726,8 @@ function summarizeMeasurements(measurements) {
     openclawSlowestSpanMs: measurements.openclawSlowestSpanMs ?? null,
     openclawOpenSpanCount: measurements.openclawOpenSpanCount ?? null,
     openclawOpenRequiredSpanCount: measurements.openclawOpenRequiredSpanCount ?? null,
+    openclawMissingRequiredSpanCount: measurements.openclawMissingRequiredSpanCount ?? null,
+    openclawMissingRequiredSpans: measurements.openclawMissingRequiredSpans ?? null,
     openclawOpenSpans: measurements.openclawOpenSpans ?? null,
     openclawKeySpans: measurements.openclawKeySpans ?? null,
     providerRequestCount: measurements.providerRequestCount ?? null,
diff --git a/src/runner.mjs b/src/runner.mjs
index d6bc1e8..5f48659 100644
--- a/src/runner.mjs
+++ b/src/runner.mjs
@@ -14,6 +14,7 @@ import { collectEnvMetrics, collectNodeProfileMetrics } from "./metrics.mjs";
 import { collectorArtifactDirs, prepareCollectorArtifactDirs } from "./collectors/artifacts.mjs";
 import { collectProviderEvidence } from "./collectors/provider.mjs";
 import { evaluateRecord } from "./evaluator.mjs";
+import { driverKindForCommand, measurementScopeForPhase, normalizeMeasurementScope, phaseDriverKind } from "./measurement-contract.mjs";
 import { artifactsDir } from "./paths.mjs";
 import { repoRoot } from "./paths.mjs";
 import { assertKovaEnvName, assertSafeScenarioCommand } from "./safety.mjs";
@@ -82,6 +83,8 @@ export async function executeScenario(scenario, context) {
         id: "target-setup",
         title: "Target Runtime Setup",
         intent: "Prepare the target OpenClaw runtime selector for the scenario.",
+        measurementScope: "harness",
+        driverKind: "ocm",
         commands: setupResults.map((result) => result.command),
         evidence: [],
         results: setupResults
@@ -143,6 +146,8 @@ export async function executeScenario(scenario, context) {
           title: phase.title,
           intent: phase.intent,
           healthScope: phase.healthScope,
+          measurementScope: phaseMeasurementScope(phase),
+          driverKind: phaseDriverKind(phase, commands),
           expectedAgentFailure: phase.expectedAgentFailure === true,
           commands,
           evidence: phase.evidence ?? [],
@@ -332,7 +337,7 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
 
   const authPreparePhase = buildAuthPreparePhase(authPolicy, artifactDir);
   if (authPreparePhase) {
-    phases.push(authPreparePhase);
+    phases.push(withPhaseContract(authPreparePhase, "harness"));
   }
 
   const preparePhase = buildStateLifecyclePhase(context, envName, scenario, "prepare", context.state?.prepare ?? [], artifactDir);
@@ -344,20 +349,23 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
     if (phase.id === "cleanup") {
       continue;
     }
+    const commands = materializeScenarioPhaseCommands(phase, context, envName, artifactDir);
     phases.push({
       id: phase.id,
       title: phase.title,
       intent: phase.intent,
       healthScope: phase.healthScope,
+      measurementScope: phaseMeasurementScope(phase),
+      driverKind: phaseDriverKind(phase, commands),
       expectedAgentFailure: phase.expectedAgentFailure === true,
-      commands: materializeScenarioPhaseCommands(phase, context, envName, artifactDir),
+      commands,
       evidence: phase.evidence ?? []
     });
 
     if (phaseSupportsAuthSetup(phase, authPolicy) && !phases.some((planned) => planned.id === "auth-setup")) {
       const authSetupPhase = buildAuthSetupPhase(authPolicy, envName, artifactDir);
       if (authSetupPhase) {
-        phases.push(authSetupPhase);
+        phases.push(withPhaseContract(authSetupPhase, "harness"));
       }
     }
 
@@ -378,7 +386,7 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
   if (!context.keepEnv) {
     const authCleanupPhase = buildAuthCleanupPhase(authPolicy, artifactDir);
     if (authCleanupPhase) {
-      phases.push(authCleanupPhase);
+      phases.push(withPhaseContract(authCleanupPhase, "cleanup"));
     }
     const cleanupPhase = buildStateLifecyclePhase(context, envName, scenario, "cleanup", context.state?.cleanup ?? [], artifactDir);
     if (cleanupPhase) {
@@ -388,6 +396,8 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
       id: "env-cleanup",
       title: "Environment Cleanup",
       intent: "Destroy the disposable Kova env after the scenario finishes.",
+      measurementScope: "cleanup",
+      driverKind: "ocm",
       commands: [ocmEnvDestroy(envName)],
       evidence: ["temporary env destroyed"]
     });
@@ -405,6 +415,8 @@ function buildTargetSetupPhase(context, envName) {
     id: "target-setup",
     title: "Target Runtime Setup",
     intent: "Prepare the target OpenClaw runtime selector for the scenario.",
+    measurementScope: "harness",
+    driverKind: "ocm",
     commands: [targetSetupCommand(context.targetPlan)],
     evidence: [`local-build runtime ${context.targetPlan.runtimeName}`, `kova env ${envName}`]
   };
@@ -426,6 +438,8 @@ function buildStateLifecyclePhase(context, envName, scenario, kind, steps, artif
     id: kind,
     title: stateLifecycleTitle(context.state?.id, kind, phaseId),
     intent: stateLifecycleIntent(context.state?.id, kind, phaseId),
+    measurementScope: normalizeMeasurementScope(null, kind),
+    driverKind: phaseDriverKind(null, commands),
     commands,
     evidence,
     scenario: scenario.id
@@ -459,6 +473,8 @@ async function executeStateLifecycleSteps(context, envName, scenario, kind, step
     id: kind,
     title: stateLifecycleTitle(context.state?.id, kind, phaseId),
     intent: stateLifecycleIntent(context.state?.id, kind, phaseId),
+    measurementScope: normalizeMeasurementScope(null, kind),
+    driverKind: phaseDriverKind(null, commands),
     commands,
     evidence,
     results,
@@ -476,6 +492,8 @@ async function executeAuthPhase(phase, context, envName, artifactDir, authPolicy
   }
   return {
     ...phase,
+    measurementScope: normalizeMeasurementScope(phase.measurementScope, phase.id),
+    driverKind: phaseDriverKind(phase),
     results,
     metrics: await collectEnvMetrics(envName, metricOptions(context, null, { id: phase.id }, artifactDir))
   };
@@ -569,7 +587,7 @@ async function executeTargetSetup(context, envName, artifactDir) {
   }
 
   const results = [
-    await runCommand(targetSetupCommand(context.targetPlan), {
+    tagCommandResult(await runCommand(targetSetupCommand(context.targetPlan), {
       timeoutMs: context.timeoutMs,
       env: { KOVA_ENV_NAME: envName },
       resourceSample: context.resourceSampling === false ? null : {
@@ -578,7 +596,7 @@ async function executeTargetSetup(context, envName, artifactDir) {
         processRoles: context.processRoles ?? [],
         artifactPath: join(collectorArtifactDirs(artifactDir).resourceSamples, "target-setup-1.jsonl")
       }
-    })
+    }), "target-setup")
   ];
   if (results.every((result) => result.status === 0) && context.targetSetup) {
     context.targetSetup.completed = true;
@@ -617,6 +635,7 @@ async function runScenarioCommand(command, context, envName, artifactDir, phaseI
       artifactPath: join(collectorArtifactDirs(artifactDir).resourceSamples, `${safeSegment(phaseId)}-${commandIndex + 1}.jsonl`)
     }
   });
+  tagCommandResult(result, phaseId);
   if (agentCommand) {
     await sleep(1000);
     const afterSnapshot = captureProcessSnapshot(snapshotOptions);
@@ -638,6 +657,28 @@ async function runScenarioCommand(command, context, envName, artifactDir, phaseI
   return result;
 }
 
+function phaseMeasurementScope(phase) {
+  return measurementScopeForPhase(phase);
+}
+
+function withPhaseContract(phase, scope = null) {
+  return {
+    ...phase,
+    measurementScope: normalizeMeasurementScope(scope ?? phase.measurementScope, phase.id),
+    driverKind: phaseDriverKind(phase)
+  };
+}
+
+function tagCommandResult(result, phaseId) {
+  result.measurementScope = measurementScopeForPhase({
+    id: phaseId,
+    measurementScope: result.measurementScope,
+    commands: [result.command]
+  });
+  result.driverKind = driverKindForCommand(result.command);
+  return result;
+}
+
 function isAgentMessageCommand(command) {
   return (command.includes(" -- agent ") && command.includes("--message")) ||
     command.includes("run-concurrent-agent-turns.mjs") ||
diff --git a/src/selfcheck.mjs b/src/selfcheck.mjs
index c483ef9..b3a6b5f 100644
--- a/src/selfcheck.mjs
+++ b/src/selfcheck.mjs
@@ -608,6 +608,7 @@ function localBuildTargetSetupResourceExclusionCheck() {
       phases: [
         {
           id: "target-setup",
+          measurementScope: "harness",
           results: [{
             command: "ocm runtime build-local kova-local-test --repo /tmp/openclaw --force",
             status: 0,
@@ -619,8 +620,23 @@ function localBuildTargetSetupResourceExclusionCheck() {
             })
           }]
         },
+        {
+          id: "auth-prepare",
+          measurementScope: "harness",
+          results: [{
+            command: "node support/mock-openai-server.mjs",
+            status: 0,
+            durationMs: 500,
+            resourceSamples: syntheticResourceSamples({
+              peakRssMb: 1900,
+              maxCpuPercent: 320,
+              role: "mock-provider"
+            })
+          }]
+        },
         {
           id: "scenario-command",
+          measurementScope: "product",
           results: [{
             command: "ocm @kova-self-check -- status",
             status: 0,
@@ -630,6 +646,29 @@ function localBuildTargetSetupResourceExclusionCheck() {
               maxCpuPercent: 20,
               role: "gateway"
             })
+          }, {
+            command: "node support/kova-helper.mjs",
+            status: 0,
+            durationMs: 100,
+            resourceSamples: syntheticResourceSamples({
+              peakRssMb: 600,
+              maxCpuPercent: 30,
+              role: "command-tree"
+            })
+          }]
+        },
+        {
+          id: "auth-cleanup",
+          measurementScope: "cleanup",
+          results: [{
+            command: "kill $(cat mock/pid)",
+            status: 0,
+            durationMs: 50,
+            resourceSamples: syntheticResourceSamples({
+              peakRssMb: 1800,
+              maxCpuPercent: 300,
+              role: "mock-provider"
+            })
           }]
         }
       ],
@@ -638,14 +677,19 @@ function localBuildTargetSetupResourceExclusionCheck() {
         logs: zeroLogMetrics()
       }
     };
-    evaluateRecord(record, { thresholds: { peakRssMb: 900 } }, {
-      surface: { thresholds: {} },
+    evaluateRecord(record, { thresholds: { peakRssMb: 200 } }, {
+      surface: { thresholds: {}, resourcePrimaryRole: "gateway" },
       targetPlan: { kind: "local-build" }
     });
     assertEqual(record.status, "PASS", "local-build target setup resources ignored status");
     assertEqual(record.measurements.peakRssMb, 100, "local-build target setup resources ignored RSS");
+    assertEqual(record.measurements.resourcePeakTrackedRssMb, 600, "tracked product helper RSS retained separately");
+    assertEqual(record.measurements.resourcePrimaryRole, "gateway", "primary resource role retained");
     assertEqual(record.measurements.resourceByRole.gateway.peakRssMb, 100, "scenario role RSS retained");
     assertEqual(record.measurements.resourceByRole["build-tooling"], undefined, "target setup role excluded");
+    assertEqual(record.measurements.resourceByRole["mock-provider"], undefined, "harness auth resources excluded");
+    assertEqual(record.measurements.measurementScopeSummary.harnessCommandCount, 2, "harness command count");
+    assertEqual(record.measurements.measurementScopeSummary.cleanupCommandCount, 1, "cleanup command count");
     assertEqual(record.violations, undefined, "no-service local-build record has no gateway violation");
     return {
       id: "local-build-target-setup-resource-exclusion",
@@ -2125,6 +2169,7 @@ function gatewaySessionTurnEvaluationCheck() {
       minAssistantCount: 1,
       sessionKey: "kova-dashboard-session-send",
       runId: "cold-run",
+      gatewayTransport: { kind: "direct-gateway-rpc", fallbackReason: null },
       activeStartedAtEpochMs: base + 1000,
       activeFinishedAtEpochMs: base + 2500,
       activeTurnMs: 1500,
@@ -2150,6 +2195,7 @@ function gatewaySessionTurnEvaluationCheck() {
       minAssistantCount: 2,
       sessionKey: "kova-dashboard-session-send",
       runId: "warm-run",
+      gatewayTransport: { kind: "direct-gateway-rpc", fallbackReason: null },
       activeStartedAtEpochMs: base + 11000,
       activeFinishedAtEpochMs: base + 11800,
       activeTurnMs: 800,
@@ -2284,6 +2330,7 @@ function gatewaySessionTurnEvaluationCheck() {
     assertEqual(record.measurements.agentEventLoopMaxMs, 9, "active-window event-loop max");
     assertEqual(record.measurements.agentSessionPollCount, 5, "session polling total");
     assertEqual(record.measurements.agentTurns[1].gatewaySession.createSession, false, "warm turn reuses session");
+    assertEqual(record.measurements.agentTurns[0].gatewaySession.gatewayTransportKind, "direct-gateway-rpc", "dashboard turn direct Gateway transport");
 
     const rendered = renderMarkdownReport({
       generatedAt: "2026-05-01T00:00:00.000Z",
@@ -2295,8 +2342,57 @@ function gatewaySessionTurnEvaluationCheck() {
       summary: { statuses: { PASS: 1 } }
     });
     assertEqual(rendered.includes("gateway session:"), true, "markdown includes gateway session detail");
+    assertEqual(rendered.includes("transport direct-gateway-rpc"), true, "markdown includes direct Gateway transport");
     assertEqual(rendered.includes("active window:"), true, "markdown includes active turn diagnostics");
 
+    const fallbackPayload = {
+      ...coldPayload,
+      gatewayTransport: { kind: "shell", fallbackReason: "gateway-token-unavailable" }
+    };
+    const fallbackRecord = {
+      scenario: "dashboard-session-send-turn",
+      surface: "dashboard-session-send-turn",
+      title: "Gateway session shell fallback",
+      status: "PASS",
+      phases: [{
+        id: "cold-dashboard-session-turn",
+        title: "Cold Gateway Session Turn",
+        intent: "Synthetic shell fallback",
+        commands: ["node support/run-dashboard-session-send-turn.mjs --create-session true"],
+        evidence: [],
+        results: [{
+          command: "node support/run-dashboard-session-send-turn.mjs --create-session true",
+          status: 0,
+          timedOut: false,
+          startedAt: new Date(base).toISOString(),
+          startedAtEpochMs: base,
+          finishedAt: new Date(base + 5000).toISOString(),
+          finishedAtEpochMs: base + 5000,
+          durationMs: 5000,
+          stdout: JSON.stringify(fallbackPayload),
+          stderr: ""
+        }],
+        metrics: { logs: zeroLogMetrics(), health: { ok: true } }
+      }],
+      providerEvidence: {
+        available: true,
+        requestCount: 1,
+        requests: [record.providerEvidence.requests[0]]
+      },
+      finalMetrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
+    };
+    evaluateRecord(fallbackRecord, {
+      id: "dashboard-session-send-turn",
+      agent: { expectedText: "KOVA_AGENT_OK" },
+      thresholds: {}
+    }, { surface: { thresholds: {} }, targetPlan: { kind: "runtime" } });
+    assertEqual(fallbackRecord.status, "FAIL", "dashboard session shell fallback rejected");
+    assertEqual(
+      fallbackRecord.violations.some((violation) => violation.metric === "gatewayTransport.kind"),
+      true,
+      "dashboard session shell fallback violation"
+    );
+
     return {
       id: "gateway-session-turn-evaluation",
       status: "PASS",
@@ -4144,6 +4240,47 @@ function diagnosticsTimelineEvaluationCheck() {
       "missing diagnostic timeline violation"
     );
 
+    const missingSpanRecord = {
+      scenario: "diagnostic-missing-span",
+      status: "PASS",
+      phases: [],
+      finalMetrics: {
+        service: { gatewayState: "running" },
+        logs: zeroLogMetrics(),
+        timeline: {
+          available: true,
+          eventCount: 1,
+          parseErrorCount: 0,
+          openSpanCount: 0,
+          openSpans: [],
+          keySpans: {},
+          spanTotals: {
+            "gateway.startup": { count: 1, totalDurationMs: 100, maxDurationMs: 100 }
+          },
+          runtimeDeps: {},
+          eventLoop: {},
+          providers: {},
+          childProcesses: {}
+        }
+      }
+    };
+    evaluateRecord(missingSpanRecord, { thresholds: {} }, {
+      targetPlan: { kind: "local-build" },
+      profile: { id: "diagnostic", diagnostics: { timelineRequired: true } },
+      surface: {
+        id: "bundled-runtime-deps",
+        diagnostics: { expectedSpans: ["runtimeDeps.stage"] },
+        thresholds: {}
+      }
+    });
+    assertEqual(missingSpanRecord.status, "FAIL", "missing required span status");
+    assertEqual(missingSpanRecord.measurements.openclawMissingRequiredSpanCount, 1, "missing required span measurement");
+    assertEqual(
+      missingSpanRecord.violations.some((violation) => violation.metric === "openclawMissingRequiredSpanCount"),
+      true,
+      "missing required span violation"
+    );
+
     const openSpanRecord = {
       scenario: "diagnostic-open-span",
       status: "PASS",
diff --git a/support/run-dashboard-session-send-turn.mjs b/support/run-dashboard-session-send-turn.mjs
index 82776ab..79c6e73 100755
--- a/support/run-dashboard-session-send-turn.mjs
+++ b/support/run-dashboard-session-send-turn.mjs
@@ -24,7 +24,11 @@ try {
   const sessionKey = args["session-key"] ?? `kova-dashboard-${randomUUID()}`;
   const createSession = readBoolean(args["create-session"], true);
   const minAssistantCount = readPositiveInteger(args["min-assistant-count"], 1);
+  const allowShellFallback = readBoolean(args["allow-shell-fallback"], false);
   const gatewayTransport = await openDirectGatewayRpcClient(runtimeContext);
+  if (!gatewayTransport.client && !allowShellFallback) {
+    throw new Error(`direct Gateway RPC is required for dashboard-session-send-turn; fallback=${gatewayTransport.transport}; reason=${gatewayTransport.fallbackReason ?? "unknown"}`);
+  }
 
   try {
     let created = null;
diff --git a/surfaces/dashboard-session-send-turn.json b/surfaces/dashboard-session-send-turn.json
index c12b4c0..d14ac53 100644
--- a/surfaces/dashboard-session-send-turn.json
+++ b/surfaces/dashboard-session-send-turn.json
@@ -10,6 +10,7 @@
     "agent-process",
     "mock-provider"
   ],
+  "resourcePrimaryRole": "gateway",
   "thresholds": {
     "agentTurnMs": 45000,
     "coldAgentTurnMs": 45000,