fix: separate gateway measurement scope

This commit is contained in:
Shakker 2026-05-07 10:32:11 +01:00
parent 9d9ff32d32
commit a995487433
No known key found for this signature in database
8 changed files with 389 additions and 20 deletions

View File

@ -11,6 +11,7 @@ import { computeProviderTurnAttribution } from "./collectors/provider.mjs";
import { summarizeRuntimeDepsLogs } from "./collectors/logs.mjs";
import { buildHealthMeasurement, healthReadinessClassification } from "./health.mjs";
import { resolveThresholdPolicy } from "./evaluation/thresholds.mjs";
import { measuredProductPhase, measurementScopeForPhase, normalizeMeasurementScope } from "./measurement-contract.mjs";
import {
checkAggregateThreshold,
checkDuration,
@ -30,13 +31,18 @@ export function evaluateRecord(record, scenario, options = {}) {
const roleThresholds = thresholdPolicy.roleThresholds;
const violations = [];
const allResults = collectResults(record);
const measuredResults = collectResults(record, { excludePhaseIds: ["target-setup"] });
const measurementScopeSummary = summarizeMeasurementScopes(record);
const measuredResults = collectResults(record, { productOnly: true });
const resourceSummary = collectResourceSummary(measuredResults);
const peakRssMb = maxNullable(
collectPeakRss(record, { excludePhaseIds: ["target-setup"] }),
const primaryResourceRole = options.surface?.resourcePrimaryRole ?? null;
const primaryRoleResources = primaryResourceRole ? resourceSummary.byRole[primaryResourceRole] : null;
const peakTrackedRssMb = maxNullable(
collectPeakRss(record, { productOnly: true }),
resourceSummary.peakTotalRssMb
);
const cpuPercentMax = maxNullable(collectCpuPercentMax(record), resourceSummary.maxTotalCpuPercent);
const cpuPercentMaxTracked = maxNullable(collectCpuPercentMax(record, { productOnly: true }), resourceSummary.maxTotalCpuPercent);
const peakRssMb = typeof primaryRoleResources?.peakRssMb === "number" ? primaryRoleResources.peakRssMb : peakTrackedRssMb;
const cpuPercentMax = typeof primaryRoleResources?.maxCpuPercent === "number" ? primaryRoleResources.maxCpuPercent : cpuPercentMaxTracked;
const missingDependencyErrors = countMissingDependencyErrors(allResults) + countLogMetric(record, "missingDependencyErrors");
const pluginLoadFailures = countLogMetric(record, "pluginLoadFailures");
const metadataScanMentions = countLogMetric(record, "metadataScanMentions");
@ -67,6 +73,7 @@ export function evaluateRecord(record, scenario, options = {}) {
const timelineRequirement = timelineRequirementFor(options);
const requiredOpenSpans = requiredTimelineSpans(options);
const openRequiredSpans = timelineSummary.openSpans.filter((span) => requiredOpenSpans.has(span.name));
const missingRequiredSpans = missingTimelineSpans(timelineSummary, requiredOpenSpans);
const runtimeDepsStagingMs = maxNullable(
openclawDiagnostics.runtimeDepsStagingMs,
timelineSummary.runtimeDepsStageMaxMs,
@ -719,6 +726,18 @@ export function evaluateRecord(record, scenario, options = {}) {
});
}
if (timelineSummary.available && missingRequiredSpans.length > 0) {
violations.push({
kind: "diagnostics",
metric: "openclawMissingRequiredSpanCount",
expected: "0",
actual: missingRequiredSpans.length,
message: `${missingRequiredSpans.length} required OpenClaw diagnostics span(s) were not observed: ${missingRequiredSpans.slice(0, 5).join(", ")}`
});
}
checkGatewaySessionTransport(violations, agentTurns, scenario);
if (agentResponseOk === false) {
violations.push({
kind: "agent",
@ -737,6 +756,11 @@ export function evaluateRecord(record, scenario, options = {}) {
record.measurements = {
peakRssMb,
cpuPercentMax,
measurementScopeSummary,
resourceMeasurementScope: "product",
resourcePrimaryRole: primaryResourceRole,
resourcePeakTrackedRssMb: peakTrackedRssMb,
resourceCpuPercentMaxTracked: cpuPercentMaxTracked,
coldReadyMs,
warmReadyMs,
upgradeMs,
@ -917,6 +941,8 @@ export function evaluateRecord(record, scenario, options = {}) {
openclawRepeatedSpanCount: timelineSummary.repeatedSpanCount,
openclawOpenSpanCount: timelineSummary.openSpanCount,
openclawOpenRequiredSpanCount: openRequiredSpans.length,
openclawMissingRequiredSpanCount: missingRequiredSpans.length,
openclawMissingRequiredSpans: missingRequiredSpans,
openclawOpenSpans: timelineSummary.openSpans,
openclawKeySpans: timelineSummary.keySpans,
openclawEventLoopMaxMs: timelineSummary.eventLoopMaxMs,
@ -1089,6 +1115,29 @@ function preferredPreProviderAttributionSummary(...summaries) {
return summaries.find((summary) => summary?.count > 0) ?? summaries[0];
}
function checkGatewaySessionTransport(violations, agentTurns, scenario) {
if (scenario.id !== "dashboard-session-send-turn") {
return;
}
for (const turn of agentTurns) {
if (!turn.gatewaySession) {
continue;
}
const transport = turn.gatewaySession.gatewayTransportKind;
if (transport === "direct-gateway-rpc") {
continue;
}
violations.push({
kind: "harness",
metric: "gatewayTransport.kind",
expected: "direct-gateway-rpc",
actual: transport ?? "unknown",
phaseId: turn.phaseId,
message: `dashboard session benchmark used ${transport ?? "unknown"} transport; direct Gateway RPC is required for Gateway product measurement${turn.gatewaySession.gatewayTransportFallbackReason ? ` (${turn.gatewaySession.gatewayTransportFallbackReason})` : ""}`
});
}
}
function extractGatewaySessionTurn(result) {
if (!result?.command?.includes("run-dashboard-session-send-turn.mjs")) {
return null;
@ -1115,6 +1164,9 @@ function extractGatewaySessionTurn(result) {
minAssistantCount: numberOrNull(payload.minAssistantCount),
sessionKey: payload.sessionKey ?? null,
runId: payload.runId ?? null,
gatewayTransportKind: payload.gatewayTransport?.kind ?? null,
gatewayTransportFallbackReason: payload.gatewayTransport?.fallbackReason ?? null,
gatewayTransportFallbackUsed: typeof payload.gatewayTransport?.kind === "string" && payload.gatewayTransport.kind !== "direct-gateway-rpc",
activeStartedAtEpochMs,
activeFinishedAtEpochMs,
activeTurnMs,
@ -2017,6 +2069,26 @@ function requiredTimelineSpans(options) {
]);
}
function missingTimelineSpans(timelineSummary, requiredSpans) {
return [...requiredSpans].filter((name) => !timelineSpanObserved(timelineSummary, name));
}
function timelineSpanObserved(timelineSummary, name) {
const exact = timelineSummary.keySpans?.[name] ?? timelineSummary.spanTotals?.[name];
if ((exact?.count ?? 0) > 0 || (exact?.openCount ?? 0) > 0) {
return true;
}
if ((timelineSummary.openSpans ?? []).some((span) => span.name === name)) {
return true;
}
if (name === "gateway.chat_send" || name === "auto_reply" || name === "reply" || name === "models.catalog") {
return Object.entries(timelineSummary.spanTotals ?? {}).some(([spanName, summary]) =>
spanName === name || (spanName.startsWith(`${name}.`) && (summary.count ?? 0) > 0)
);
}
return false;
}
function maxDurationWhere(results, predicate) {
const durations = results
.filter((result) => predicate(result.command))
@ -2515,6 +2587,28 @@ function healthFailureCount(samples) {
return samples.filter((sample) => sample && !sample.ok).length;
}
function summarizeMeasurementScopes(record) {
const phases = { product: 0, harness: 0, cleanup: 0 };
const results = { product: 0, harness: 0, cleanup: 0 };
for (const phase of record.phases ?? []) {
const phaseScope = measurementScopeForPhase(phase);
phases[phaseScope] += 1;
for (const result of phase.results ?? []) {
const resultScope = result.measurementScope ? normalizeMeasurementScope(result.measurementScope, phase.id) : phaseScope;
results[resultScope] += 1;
}
}
return {
schemaVersion: "kova.measurementScopeSummary.v1",
productPhaseCount: phases.product,
harnessPhaseCount: phases.harness,
cleanupPhaseCount: phases.cleanup,
productCommandCount: results.product,
harnessCommandCount: results.harness,
cleanupCommandCount: results.cleanup
};
}
function collectResults(record, options = {}) {
const excludePhaseIds = new Set(options.excludePhaseIds ?? []);
const results = [];
@ -2522,6 +2616,9 @@ function collectResults(record, options = {}) {
if (excludePhaseIds.has(phase.id)) {
continue;
}
if (options.productOnly === true && !measuredProductPhase(phase)) {
continue;
}
for (const result of phase.results ?? []) {
results.push(result);
}
@ -2546,6 +2643,9 @@ function collectPeakRss(record, options = {}) {
if (excludePhaseIds.has(phase.id)) {
continue;
}
if (options.productOnly === true && !measuredProductPhase(phase)) {
continue;
}
const rss = phase.metrics?.process?.rssMb;
if (typeof rss === "number") {
peak = peak === null ? rss : Math.max(peak, rss);
@ -2720,8 +2820,8 @@ function collectTimelineSummary(record) {
let repeatedSpanCount = 0;
let runtimeDepsStageMaxMs = null;
let slowestRuntimeDepsPlugin = null;
let openSpanCount = 0;
let openSpans = [];
let latestOpenSpanCount = 0;
let latestOpenSpans = [];
let latestEventCount = -1;
let events = [];
let turnAttributionEvents = [];
@ -2734,6 +2834,10 @@ function collectTimelineSummary(record) {
latestEventCount = timeline.eventCount ?? 0;
events = timeline.events;
turnAttributionEvents = Array.isArray(timeline.turnAttributionEvents) ? timeline.turnAttributionEvents : [];
latestOpenSpanCount = timeline.openSpanCount ?? timeline.openSpans?.length ?? 0;
latestOpenSpans = [...(timeline.openSpans ?? [])]
.toSorted((left, right) => (right.ageMs ?? -1) - (left.ageMs ?? -1))
.slice(0, 25);
}
for (const artifact of timeline.artifacts ?? []) {
artifacts.add(artifact);
@ -2742,8 +2846,6 @@ function collectTimelineSummary(record) {
parseErrorCount = Math.max(parseErrorCount, timeline.parseErrorCount ?? 0);
childProcessFailedCount = Math.max(childProcessFailedCount, timeline.childProcesses?.failedCount ?? 0);
repeatedSpanCount = Math.max(repeatedSpanCount, timeline.repeatedSpans?.length ?? 0);
openSpanCount = Math.max(openSpanCount, timeline.openSpanCount ?? timeline.openSpans?.length ?? 0);
openSpans = mergeOpenSpans(openSpans, timeline.openSpans ?? []);
mergeKeySpans(keySpans, timeline.keySpans ?? {});
mergeSpanTotals(spanTotals, timeline.spanTotals ?? {});
eventLoopMaxMs = maxNullable(eventLoopMaxMs, timeline.eventLoop?.maxMs);
@ -2775,8 +2877,8 @@ function collectTimelineSummary(record) {
slowestSpanName: slowestSpan?.name ?? null,
slowestSpanMs: slowestSpan?.durationMs ?? null,
repeatedSpanCount,
openSpanCount,
openSpans,
openSpanCount: latestOpenSpanCount,
openSpans: latestOpenSpans,
artifacts: [...artifacts],
timelineArtifacts: [...artifacts],
events,
@ -2847,9 +2949,12 @@ function mergeKeySpans(target, source) {
}
}
function collectCpuPercentMax(record) {
function collectCpuPercentMax(record, options = {}) {
const values = [];
for (const phase of record.phases ?? []) {
if (options.productOnly === true && !measuredProductPhase(phase)) {
continue;
}
const cpu = phase.metrics?.process?.cpuPercent;
if (typeof cpu === "number") {
values.push(cpu);

View File

@ -0,0 +1,68 @@
export const MEASUREMENT_SCOPES = new Set(["product", "harness", "cleanup"]);
export function normalizeMeasurementScope(value, phaseId = null) {
if (MEASUREMENT_SCOPES.has(value)) {
return value;
}
if (phaseId === "target-setup" || phaseId === "auth-prepare" || phaseId === "auth-setup" || phaseId === "prepare" || phaseId?.startsWith("state-")) {
return "harness";
}
if (phaseId === "cleanup" || phaseId === "auth-cleanup" || phaseId === "env-cleanup") {
return "cleanup";
}
return "product";
}
export function measuredProductPhase(phase) {
return measurementScopeForPhase(phase) === "product";
}
export function measurementScopeForPhase(phase) {
if (MEASUREMENT_SCOPES.has(phase?.measurementScope)) {
return phase.measurementScope;
}
if (phase?.id === "provision" && (phase.commands ?? []).some((command) => /(?:^|\s)--no-service(?:\s|$)/.test(command))) {
return "harness";
}
return normalizeMeasurementScope(phase?.measurementScope, phase?.id);
}
export function driverKindForCommand(command) {
const text = String(command ?? "");
if (text.includes("run-dashboard-session-send-turn.mjs")) {
return "gateway-rpc";
}
if (text.includes("run-openai-compatible-turn.mjs")) {
return "gateway-http";
}
if (text.includes("run-tui-message-turn.mjs")) {
return "gateway-rpc";
}
if (/\bocm\s+@[^ ]+\s+--\s+agent\b/.test(text)) {
return text.includes("--local") ? "openclaw-cli-local" : "openclaw-cli-gateway";
}
if (/\bocm\s+@[^ ]+\s+--\s+gateway\s+call\b/.test(text)) {
return "gateway-rpc-via-cli";
}
if (/\bocm\b/.test(text)) {
return "ocm";
}
if (/\bnode\b/.test(text)) {
return "kova-helper";
}
return "unknown";
}
export function phaseDriverKind(phase, commands = phase?.commands ?? []) {
if (phase?.driverKind) {
return phase.driverKind;
}
const kinds = new Set(commands.map(driverKindForCommand));
if (kinds.size === 1) {
return [...kinds][0];
}
if (kinds.size === 0) {
return "none";
}
return "mixed";
}

View File

@ -44,6 +44,8 @@ const defaultThresholds = {
heapSnapshotBytes: 50 * 1024 * 1024,
resourcePeakCommandTreeRssMb: 100,
resourcePeakGatewayRssMb: 100,
resourcePeakTrackedRssMb: 100,
resourceCpuPercentMaxTracked: 25,
openclawTimelineParseErrors: 0,
openclawSlowestSpanMs: 5000,
openclawEventLoopMaxMs: 250,
@ -457,6 +459,8 @@ function metricDeltas(baseline, current) {
"nodeProfileTopFunctionMs",
"heapSnapshotBytes",
"resourceSampleCount",
"resourcePeakTrackedRssMb",
"resourceCpuPercentMaxTracked",
"resourcePeakCommandTreeRssMb",
"resourcePeakGatewayRssMb",
"openclawTimelineEventCount",

View File

@ -237,7 +237,9 @@ export function renderMarkdownReport(report) {
const expectedFailure = turn.expectedFailure ? "; expected failure observed " + turn.expectedFailureObserved : "";
lines.push(` - ${turn.label}: total ${turn.totalTurnMs ?? "unknown"} ms; pre-provider ${turn.preProviderMs ?? "unknown"} ms; provider ${turn.providerFinalMs ?? "unknown"} ms; post-provider ${turn.postProviderMs ?? "unknown"} ms; route ${route}; status ${status}; issue ${issue}; response ${turn.responseOk}; leaks ${turn.processLeakCount ?? "unknown"}${providerTiming}${expectedFailure}`);
if (turn.gatewaySession) {
lines.push(` - gateway session: create ${turn.gatewaySession.createSession}; session create ${turn.gatewaySession.sessionCreateDurationMs ?? "n/a"} ms; send ${turn.gatewaySession.sendDurationMs ?? "unknown"} ms; first assistant ${turn.gatewaySession.timeToFirstAssistantMs ?? "unknown"} ms; matched assistant ${turn.gatewaySession.timeToMatchedAssistantMs ?? "unknown"} ms; polls ${turn.gatewaySession.historyPollCount ?? "unknown"} (${turn.gatewaySession.historyErrorCount ?? "unknown"} errors)`);
const transport = turn.gatewaySession.gatewayTransportKind ?? "unknown";
const fallback = turn.gatewaySession.gatewayTransportFallbackReason ? `; fallback ${turn.gatewaySession.gatewayTransportFallbackReason}` : "";
lines.push(` - gateway session: transport ${transport}${fallback}; create ${turn.gatewaySession.createSession}; session create ${turn.gatewaySession.sessionCreateDurationMs ?? "n/a"} ms; send ${turn.gatewaySession.sendDurationMs ?? "unknown"} ms; first assistant ${turn.gatewaySession.timeToFirstAssistantMs ?? "unknown"} ms; matched assistant ${turn.gatewaySession.timeToMatchedAssistantMs ?? "unknown"} ms; polls ${turn.gatewaySession.historyPollCount ?? "unknown"} (${turn.gatewaySession.historyErrorCount ?? "unknown"} errors)`);
}
if (turn.turnDiagnostics) {
lines.push(` - active window: metadata scans ${turn.metadataScanCount ?? "unknown"} (${turn.metadataScanTotalMs ?? "unknown"} ms total, max ${turn.metadataScanMaxMs ?? "unknown"} ms); event-loop samples ${turn.turnDiagnostics.eventLoop?.sampleCount ?? "unknown"} max ${turn.eventLoopMaxMs ?? "unknown"} ms`);
@ -703,6 +705,11 @@ function summarizeMeasurements(measurements) {
return {
peakRssMb: measurements.peakRssMb ?? null,
cpuPercentMax: measurements.cpuPercentMax ?? null,
measurementScopeSummary: measurements.measurementScopeSummary ?? null,
resourceMeasurementScope: measurements.resourceMeasurementScope ?? null,
resourcePrimaryRole: measurements.resourcePrimaryRole ?? null,
resourcePeakTrackedRssMb: measurements.resourcePeakTrackedRssMb ?? null,
resourceCpuPercentMaxTracked: measurements.resourceCpuPercentMaxTracked ?? null,
health: measurements.health ?? null,
missingDependencyErrors: measurements.missingDependencyErrors ?? null,
pluginLoadFailures: measurements.pluginLoadFailures ?? null,
@ -719,6 +726,8 @@ function summarizeMeasurements(measurements) {
openclawSlowestSpanMs: measurements.openclawSlowestSpanMs ?? null,
openclawOpenSpanCount: measurements.openclawOpenSpanCount ?? null,
openclawOpenRequiredSpanCount: measurements.openclawOpenRequiredSpanCount ?? null,
openclawMissingRequiredSpanCount: measurements.openclawMissingRequiredSpanCount ?? null,
openclawMissingRequiredSpans: measurements.openclawMissingRequiredSpans ?? null,
openclawOpenSpans: measurements.openclawOpenSpans ?? null,
openclawKeySpans: measurements.openclawKeySpans ?? null,
providerRequestCount: measurements.providerRequestCount ?? null,

View File

@ -14,6 +14,7 @@ import { collectEnvMetrics, collectNodeProfileMetrics } from "./metrics.mjs";
import { collectorArtifactDirs, prepareCollectorArtifactDirs } from "./collectors/artifacts.mjs";
import { collectProviderEvidence } from "./collectors/provider.mjs";
import { evaluateRecord } from "./evaluator.mjs";
import { driverKindForCommand, measurementScopeForPhase, normalizeMeasurementScope, phaseDriverKind } from "./measurement-contract.mjs";
import { artifactsDir } from "./paths.mjs";
import { repoRoot } from "./paths.mjs";
import { assertKovaEnvName, assertSafeScenarioCommand } from "./safety.mjs";
@ -82,6 +83,8 @@ export async function executeScenario(scenario, context) {
id: "target-setup",
title: "Target Runtime Setup",
intent: "Prepare the target OpenClaw runtime selector for the scenario.",
measurementScope: "harness",
driverKind: "ocm",
commands: setupResults.map((result) => result.command),
evidence: [],
results: setupResults
@ -143,6 +146,8 @@ export async function executeScenario(scenario, context) {
title: phase.title,
intent: phase.intent,
healthScope: phase.healthScope,
measurementScope: phaseMeasurementScope(phase),
driverKind: phaseDriverKind(phase, commands),
expectedAgentFailure: phase.expectedAgentFailure === true,
commands,
evidence: phase.evidence ?? [],
@ -332,7 +337,7 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
const authPreparePhase = buildAuthPreparePhase(authPolicy, artifactDir);
if (authPreparePhase) {
phases.push(authPreparePhase);
phases.push(withPhaseContract(authPreparePhase, "harness"));
}
const preparePhase = buildStateLifecyclePhase(context, envName, scenario, "prepare", context.state?.prepare ?? [], artifactDir);
@ -344,20 +349,23 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
if (phase.id === "cleanup") {
continue;
}
const commands = materializeScenarioPhaseCommands(phase, context, envName, artifactDir);
phases.push({
id: phase.id,
title: phase.title,
intent: phase.intent,
healthScope: phase.healthScope,
measurementScope: phaseMeasurementScope(phase),
driverKind: phaseDriverKind(phase, commands),
expectedAgentFailure: phase.expectedAgentFailure === true,
commands: materializeScenarioPhaseCommands(phase, context, envName, artifactDir),
commands,
evidence: phase.evidence ?? []
});
if (phaseSupportsAuthSetup(phase, authPolicy) && !phases.some((planned) => planned.id === "auth-setup")) {
const authSetupPhase = buildAuthSetupPhase(authPolicy, envName, artifactDir);
if (authSetupPhase) {
phases.push(authSetupPhase);
phases.push(withPhaseContract(authSetupPhase, "harness"));
}
}
@ -378,7 +386,7 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
if (!context.keepEnv) {
const authCleanupPhase = buildAuthCleanupPhase(authPolicy, artifactDir);
if (authCleanupPhase) {
phases.push(authCleanupPhase);
phases.push(withPhaseContract(authCleanupPhase, "cleanup"));
}
const cleanupPhase = buildStateLifecyclePhase(context, envName, scenario, "cleanup", context.state?.cleanup ?? [], artifactDir);
if (cleanupPhase) {
@ -388,6 +396,8 @@ function buildPlannedPhases(scenario, context, envName, artifactDir, authPolicy)
id: "env-cleanup",
title: "Environment Cleanup",
intent: "Destroy the disposable Kova env after the scenario finishes.",
measurementScope: "cleanup",
driverKind: "ocm",
commands: [ocmEnvDestroy(envName)],
evidence: ["temporary env destroyed"]
});
@ -405,6 +415,8 @@ function buildTargetSetupPhase(context, envName) {
id: "target-setup",
title: "Target Runtime Setup",
intent: "Prepare the target OpenClaw runtime selector for the scenario.",
measurementScope: "harness",
driverKind: "ocm",
commands: [targetSetupCommand(context.targetPlan)],
evidence: [`local-build runtime ${context.targetPlan.runtimeName}`, `kova env ${envName}`]
};
@ -426,6 +438,8 @@ function buildStateLifecyclePhase(context, envName, scenario, kind, steps, artif
id: kind,
title: stateLifecycleTitle(context.state?.id, kind, phaseId),
intent: stateLifecycleIntent(context.state?.id, kind, phaseId),
measurementScope: normalizeMeasurementScope(null, kind),
driverKind: phaseDriverKind(null, commands),
commands,
evidence,
scenario: scenario.id
@ -459,6 +473,8 @@ async function executeStateLifecycleSteps(context, envName, scenario, kind, step
id: kind,
title: stateLifecycleTitle(context.state?.id, kind, phaseId),
intent: stateLifecycleIntent(context.state?.id, kind, phaseId),
measurementScope: normalizeMeasurementScope(null, kind),
driverKind: phaseDriverKind(null, commands),
commands,
evidence,
results,
@ -476,6 +492,8 @@ async function executeAuthPhase(phase, context, envName, artifactDir, authPolicy
}
return {
...phase,
measurementScope: normalizeMeasurementScope(phase.measurementScope, phase.id),
driverKind: phaseDriverKind(phase),
results,
metrics: await collectEnvMetrics(envName, metricOptions(context, null, { id: phase.id }, artifactDir))
};
@ -569,7 +587,7 @@ async function executeTargetSetup(context, envName, artifactDir) {
}
const results = [
await runCommand(targetSetupCommand(context.targetPlan), {
tagCommandResult(await runCommand(targetSetupCommand(context.targetPlan), {
timeoutMs: context.timeoutMs,
env: { KOVA_ENV_NAME: envName },
resourceSample: context.resourceSampling === false ? null : {
@ -578,7 +596,7 @@ async function executeTargetSetup(context, envName, artifactDir) {
processRoles: context.processRoles ?? [],
artifactPath: join(collectorArtifactDirs(artifactDir).resourceSamples, "target-setup-1.jsonl")
}
})
}), "target-setup")
];
if (results.every((result) => result.status === 0) && context.targetSetup) {
context.targetSetup.completed = true;
@ -617,6 +635,7 @@ async function runScenarioCommand(command, context, envName, artifactDir, phaseI
artifactPath: join(collectorArtifactDirs(artifactDir).resourceSamples, `${safeSegment(phaseId)}-${commandIndex + 1}.jsonl`)
}
});
tagCommandResult(result, phaseId);
if (agentCommand) {
await sleep(1000);
const afterSnapshot = captureProcessSnapshot(snapshotOptions);
@ -638,6 +657,28 @@ async function runScenarioCommand(command, context, envName, artifactDir, phaseI
return result;
}
function phaseMeasurementScope(phase) {
return measurementScopeForPhase(phase);
}
function withPhaseContract(phase, scope = null) {
return {
...phase,
measurementScope: normalizeMeasurementScope(scope ?? phase.measurementScope, phase.id),
driverKind: phaseDriverKind(phase)
};
}
function tagCommandResult(result, phaseId) {
result.measurementScope = measurementScopeForPhase({
id: phaseId,
measurementScope: result.measurementScope,
commands: [result.command]
});
result.driverKind = driverKindForCommand(result.command);
return result;
}
function isAgentMessageCommand(command) {
return (command.includes(" -- agent ") && command.includes("--message")) ||
command.includes("run-concurrent-agent-turns.mjs") ||

View File

@ -608,6 +608,7 @@ function localBuildTargetSetupResourceExclusionCheck() {
phases: [
{
id: "target-setup",
measurementScope: "harness",
results: [{
command: "ocm runtime build-local kova-local-test --repo /tmp/openclaw --force",
status: 0,
@ -619,8 +620,23 @@ function localBuildTargetSetupResourceExclusionCheck() {
})
}]
},
{
id: "auth-prepare",
measurementScope: "harness",
results: [{
command: "node support/mock-openai-server.mjs",
status: 0,
durationMs: 500,
resourceSamples: syntheticResourceSamples({
peakRssMb: 1900,
maxCpuPercent: 320,
role: "mock-provider"
})
}]
},
{
id: "scenario-command",
measurementScope: "product",
results: [{
command: "ocm @kova-self-check -- status",
status: 0,
@ -630,6 +646,29 @@ function localBuildTargetSetupResourceExclusionCheck() {
maxCpuPercent: 20,
role: "gateway"
})
}, {
command: "node support/kova-helper.mjs",
status: 0,
durationMs: 100,
resourceSamples: syntheticResourceSamples({
peakRssMb: 600,
maxCpuPercent: 30,
role: "command-tree"
})
}]
},
{
id: "auth-cleanup",
measurementScope: "cleanup",
results: [{
command: "kill $(cat mock/pid)",
status: 0,
durationMs: 50,
resourceSamples: syntheticResourceSamples({
peakRssMb: 1800,
maxCpuPercent: 300,
role: "mock-provider"
})
}]
}
],
@ -638,14 +677,19 @@ function localBuildTargetSetupResourceExclusionCheck() {
logs: zeroLogMetrics()
}
};
evaluateRecord(record, { thresholds: { peakRssMb: 900 } }, {
surface: { thresholds: {} },
evaluateRecord(record, { thresholds: { peakRssMb: 200 } }, {
surface: { thresholds: {}, resourcePrimaryRole: "gateway" },
targetPlan: { kind: "local-build" }
});
assertEqual(record.status, "PASS", "local-build target setup resources ignored status");
assertEqual(record.measurements.peakRssMb, 100, "local-build target setup resources ignored RSS");
assertEqual(record.measurements.resourcePeakTrackedRssMb, 600, "tracked product helper RSS retained separately");
assertEqual(record.measurements.resourcePrimaryRole, "gateway", "primary resource role retained");
assertEqual(record.measurements.resourceByRole.gateway.peakRssMb, 100, "scenario role RSS retained");
assertEqual(record.measurements.resourceByRole["build-tooling"], undefined, "target setup role excluded");
assertEqual(record.measurements.resourceByRole["mock-provider"], undefined, "harness auth resources excluded");
assertEqual(record.measurements.measurementScopeSummary.harnessCommandCount, 2, "harness command count");
assertEqual(record.measurements.measurementScopeSummary.cleanupCommandCount, 1, "cleanup command count");
assertEqual(record.violations, undefined, "no-service local-build record has no gateway violation");
return {
id: "local-build-target-setup-resource-exclusion",
@ -2125,6 +2169,7 @@ function gatewaySessionTurnEvaluationCheck() {
minAssistantCount: 1,
sessionKey: "kova-dashboard-session-send",
runId: "cold-run",
gatewayTransport: { kind: "direct-gateway-rpc", fallbackReason: null },
activeStartedAtEpochMs: base + 1000,
activeFinishedAtEpochMs: base + 2500,
activeTurnMs: 1500,
@ -2150,6 +2195,7 @@ function gatewaySessionTurnEvaluationCheck() {
minAssistantCount: 2,
sessionKey: "kova-dashboard-session-send",
runId: "warm-run",
gatewayTransport: { kind: "direct-gateway-rpc", fallbackReason: null },
activeStartedAtEpochMs: base + 11000,
activeFinishedAtEpochMs: base + 11800,
activeTurnMs: 800,
@ -2284,6 +2330,7 @@ function gatewaySessionTurnEvaluationCheck() {
assertEqual(record.measurements.agentEventLoopMaxMs, 9, "active-window event-loop max");
assertEqual(record.measurements.agentSessionPollCount, 5, "session polling total");
assertEqual(record.measurements.agentTurns[1].gatewaySession.createSession, false, "warm turn reuses session");
assertEqual(record.measurements.agentTurns[0].gatewaySession.gatewayTransportKind, "direct-gateway-rpc", "dashboard turn direct Gateway transport");
const rendered = renderMarkdownReport({
generatedAt: "2026-05-01T00:00:00.000Z",
@ -2295,8 +2342,57 @@ function gatewaySessionTurnEvaluationCheck() {
summary: { statuses: { PASS: 1 } }
});
assertEqual(rendered.includes("gateway session:"), true, "markdown includes gateway session detail");
assertEqual(rendered.includes("transport direct-gateway-rpc"), true, "markdown includes direct Gateway transport");
assertEqual(rendered.includes("active window:"), true, "markdown includes active turn diagnostics");
const fallbackPayload = {
...coldPayload,
gatewayTransport: { kind: "shell", fallbackReason: "gateway-token-unavailable" }
};
const fallbackRecord = {
scenario: "dashboard-session-send-turn",
surface: "dashboard-session-send-turn",
title: "Gateway session shell fallback",
status: "PASS",
phases: [{
id: "cold-dashboard-session-turn",
title: "Cold Gateway Session Turn",
intent: "Synthetic shell fallback",
commands: ["node support/run-dashboard-session-send-turn.mjs --create-session true"],
evidence: [],
results: [{
command: "node support/run-dashboard-session-send-turn.mjs --create-session true",
status: 0,
timedOut: false,
startedAt: new Date(base).toISOString(),
startedAtEpochMs: base,
finishedAt: new Date(base + 5000).toISOString(),
finishedAtEpochMs: base + 5000,
durationMs: 5000,
stdout: JSON.stringify(fallbackPayload),
stderr: ""
}],
metrics: { logs: zeroLogMetrics(), health: { ok: true } }
}],
providerEvidence: {
available: true,
requestCount: 1,
requests: [record.providerEvidence.requests[0]]
},
finalMetrics: { service: { gatewayState: "running" }, logs: zeroLogMetrics() }
};
evaluateRecord(fallbackRecord, {
id: "dashboard-session-send-turn",
agent: { expectedText: "KOVA_AGENT_OK" },
thresholds: {}
}, { surface: { thresholds: {} }, targetPlan: { kind: "runtime" } });
assertEqual(fallbackRecord.status, "FAIL", "dashboard session shell fallback rejected");
assertEqual(
fallbackRecord.violations.some((violation) => violation.metric === "gatewayTransport.kind"),
true,
"dashboard session shell fallback violation"
);
return {
id: "gateway-session-turn-evaluation",
status: "PASS",
@ -4144,6 +4240,47 @@ function diagnosticsTimelineEvaluationCheck() {
"missing diagnostic timeline violation"
);
const missingSpanRecord = {
scenario: "diagnostic-missing-span",
status: "PASS",
phases: [],
finalMetrics: {
service: { gatewayState: "running" },
logs: zeroLogMetrics(),
timeline: {
available: true,
eventCount: 1,
parseErrorCount: 0,
openSpanCount: 0,
openSpans: [],
keySpans: {},
spanTotals: {
"gateway.startup": { count: 1, totalDurationMs: 100, maxDurationMs: 100 }
},
runtimeDeps: {},
eventLoop: {},
providers: {},
childProcesses: {}
}
}
};
evaluateRecord(missingSpanRecord, { thresholds: {} }, {
targetPlan: { kind: "local-build" },
profile: { id: "diagnostic", diagnostics: { timelineRequired: true } },
surface: {
id: "bundled-runtime-deps",
diagnostics: { expectedSpans: ["runtimeDeps.stage"] },
thresholds: {}
}
});
assertEqual(missingSpanRecord.status, "FAIL", "missing required span status");
assertEqual(missingSpanRecord.measurements.openclawMissingRequiredSpanCount, 1, "missing required span measurement");
assertEqual(
missingSpanRecord.violations.some((violation) => violation.metric === "openclawMissingRequiredSpanCount"),
true,
"missing required span violation"
);
const openSpanRecord = {
scenario: "diagnostic-open-span",
status: "PASS",

View File

@ -24,7 +24,11 @@ try {
const sessionKey = args["session-key"] ?? `kova-dashboard-${randomUUID()}`;
const createSession = readBoolean(args["create-session"], true);
const minAssistantCount = readPositiveInteger(args["min-assistant-count"], 1);
const allowShellFallback = readBoolean(args["allow-shell-fallback"], false);
const gatewayTransport = await openDirectGatewayRpcClient(runtimeContext);
if (!gatewayTransport.client && !allowShellFallback) {
throw new Error(`direct Gateway RPC is required for dashboard-session-send-turn; fallback=${gatewayTransport.transport}; reason=${gatewayTransport.fallbackReason ?? "unknown"}`);
}
try {
let created = null;

View File

@ -10,6 +10,7 @@
"agent-process",
"mock-provider"
],
"resourcePrimaryRole": "gateway",
"thresholds": {
"agentTurnMs": 45000,
"coldAgentTurnMs": 45000,