fix: harden gateway message ingress evidence

2026-05-01 16:52:59 +01:00 · 2026-05-01 16:52:59 +01:00 · 689ef0e43f
commit 689ef0e43f
parent 2254df72f9
11 changed files with 212 additions and 52 deletions
--- a/scenarios/agent-gateway-rpc-turn.json
+++ b/scenarios/agent-gateway-rpc-turn.json
@ -23,9 +23,16 @@
    {
      "id": "provision",
      "title": "Provision Gateway Env",
-      "intent": "Start a disposable OpenClaw gateway before sending the Gateway RPC agent turn.",
-      "commands": ["ocm start {env} {startSelector} --json"],
-      "evidence": ["gateway port", "runtime binding", "startup readiness"]
+      "intent": "Create a disposable OpenClaw env without starting the gateway yet so Kova auth is applied before the gateway process boots.",
+      "commands": ["ocm start {env} {startSelector} --no-service --json"],
+      "evidence": ["gateway port", "runtime binding", "env created without service"]
+    },
+    {
+      "id": "gateway-start",
+      "title": "Start Gateway",
+      "intent": "Start the gateway after auth/provider config is already present in the OpenClaw home.",
+      "commands": ["ocm service install {env} --json", "ocm service start {env} --json"],
+      "evidence": ["gateway service installed", "gateway service started", "startup readiness"]
    },
    {
      "id": "gateway-agent-turn",
--- a/scenarios/dashboard-session-send-turn.json
+++ b/scenarios/dashboard-session-send-turn.json
@ -23,16 +23,23 @@
    {
      "id": "provision",
      "title": "Provision Dashboard Env",
-      "intent": "Start a disposable OpenClaw gateway before sending a dashboard session message.",
-      "commands": ["ocm start {env} {startSelector} --json"],
-      "evidence": ["gateway port", "runtime binding", "startup readiness"]
+      "intent": "Create a disposable OpenClaw env without starting the gateway yet so Kova auth is applied before the gateway process boots.",
+      "commands": ["ocm start {env} {startSelector} --no-service --json"],
+      "evidence": ["gateway port", "runtime binding", "env created without service"]
+    },
+    {
+      "id": "gateway-start",
+      "title": "Start Gateway",
+      "intent": "Start the gateway after auth/provider config is already present in the OpenClaw home.",
+      "commands": ["ocm service install {env} --json", "ocm service start {env} --json"],
+      "evidence": ["gateway service installed", "gateway service started", "startup readiness"]
    },
    {
      "id": "dashboard-session-turn",
      "title": "Dashboard Session Message",
      "intent": "Exercise Gateway `sessions.send` and verify the final assistant response is present in chat history.",
      "commands": [
-        "ocm @{env} -- node {kovaRoot}/support/run-dashboard-session-send-turn.mjs --session-key kova-dashboard-session-send --message 'Reply with exact ASCII text KOVA_AGENT_OK only.' --expected-text KOVA_AGENT_OK --timeout 120000"
+        "node {kovaRoot}/support/run-dashboard-session-send-turn.mjs --env {env} --session-key kova-dashboard-session-send --message 'Reply with exact ASCII text KOVA_AGENT_OK only.' --expected-text KOVA_AGENT_OK --timeout 120000"
      ],
      "evidence": ["sessions.send command duration", "chat history final assistant text", "mock provider request timing", "gateway health after turn", "role resource samples"]
    },
--- a/scenarios/openai-compatible-turn.json
+++ b/scenarios/openai-compatible-turn.json
@ -23,16 +23,23 @@
    {
      "id": "provision",
      "title": "Provision HTTP Env",
-      "intent": "Start a disposable OpenClaw gateway before sending an OpenAI-compatible HTTP request.",
-      "commands": ["ocm start {env} {startSelector} --json"],
-      "evidence": ["gateway port", "runtime binding", "startup readiness"]
+      "intent": "Create a disposable OpenClaw env without starting the gateway yet so Kova auth is applied before the gateway process boots.",
+      "commands": ["ocm start {env} {startSelector} --no-service --json"],
+      "evidence": ["gateway port", "runtime binding", "env created without service"]
+    },
+    {
+      "id": "gateway-start",
+      "title": "Start Gateway",
+      "intent": "Start the gateway after auth/provider config is already present in the OpenClaw home.",
+      "commands": ["ocm service install {env} --json", "ocm service start {env} --json"],
+      "evidence": ["gateway service installed", "gateway service started", "startup readiness"]
    },
    {
      "id": "openai-compatible-turn",
      "title": "OpenAI-Compatible Message",
      "intent": "Exercise the `/v1/chat/completions` user API and verify final assistant output.",
      "commands": [
-        "ocm @{env} -- node {kovaRoot}/support/run-openai-compatible-turn.mjs --model openai/gpt-5.5 --message 'Reply with exact ASCII text KOVA_AGENT_OK only.' --expected-text KOVA_AGENT_OK --timeout 120000"
+        "node {kovaRoot}/support/run-openai-compatible-turn.mjs --env {env} --model openai/gpt-5.5 --message 'Reply with exact ASCII text KOVA_AGENT_OK only.' --expected-text KOVA_AGENT_OK --timeout 120000"
      ],
      "evidence": ["HTTP status", "final assistant text", "mock provider request timing", "gateway health after turn", "role resource samples"]
    },
--- a/scenarios/tui-message-turn.json
+++ b/scenarios/tui-message-turn.json
@ -23,9 +23,16 @@
    {
      "id": "provision",
      "title": "Provision TUI Env",
-      "intent": "Start a disposable OpenClaw gateway before attaching the TUI.",
-      "commands": ["ocm start {env} {startSelector} --json"],
-      "evidence": ["gateway port", "runtime binding", "startup readiness"]
+      "intent": "Create a disposable OpenClaw env without starting the gateway yet so Kova auth is applied before the gateway process boots.",
+      "commands": ["ocm start {env} {startSelector} --no-service --json"],
+      "evidence": ["gateway port", "runtime binding", "env created without service"]
+    },
+    {
+      "id": "gateway-start",
+      "title": "Start Gateway",
+      "intent": "Start the gateway after auth/provider config is already present in the OpenClaw home.",
+      "commands": ["ocm service install {env} --json", "ocm service start {env} --json"],
+      "evidence": ["gateway service installed", "gateway service started", "startup readiness"]
    },
    {
      "id": "tui-message-turn",
--- a/src/collectors/provider.mjs
+++ b/src/collectors/provider.mjs
@ -193,9 +193,12 @@ export function computeProviderTurnAttribution(result, providerEvidence) {
  }
  const commandStartedAt = result.startedAtEpochMs;
  const commandFinishedAt = result.finishedAtEpochMs;
-  const requests = providerEvidence?.available === true
+  const requestsInCommand = providerEvidence?.available === true
    ? requestsWithinCommand(providerEvidence.requests ?? [], commandStartedAt, commandFinishedAt)
    : [];
+  const requests = requestsInCommand.length > 0
+    ? requestsInCommand
+    : requestsAfterCommandTimeout(providerEvidence?.requests ?? [], commandStartedAt, commandFinishedAt);
  const firstRequest = requests[0] ?? null;
  const lastResponse = requests
    .filter((request) => typeof request.respondedAtEpochMs === "number")
@ -233,9 +236,15 @@ export function computeProviderTurnAttribution(result, providerEvidence) {
      providerDominates: null,
      preProviderDominates: null,
      missingProviderRequest: true,
+      providerRequestTiming: "missing",
+      providerAfterCommandEnd: false,
+      providerLateByMs: null,
      providerEvidenceAvailable: providerEvidence?.available === true
    };
  }
+  const attributionWindowEnd = Math.max(commandFinishedAt, lastProviderResponseAt);
+  const attributionTotalMs = Math.max(0, attributionWindowEnd - commandStartedAt);
+  const providerAfterCommandEnd = firstProviderRequestAt > commandFinishedAt;
  const firstByte = requests
    .filter((request) => typeof request.firstByteLatencyMs === "number")
    .toSorted((left, right) => left.firstByteLatencyMs - right.firstByteLatencyMs)[0] ?? null;
@ -268,9 +277,12 @@ export function computeProviderTurnAttribution(result, providerEvidence) {
    errorClasses: summarizeBy(requests, "errorClass"),
    usage: summarizeUsage(requests),
    errors: requestErrors(requests),
-    providerDominates: dominanceRatio(Math.max(0, lastProviderResponseAt - firstProviderRequestAt), Math.max(0, commandFinishedAt - commandStartedAt)),
-    preProviderDominates: dominanceRatio(Math.max(0, firstProviderRequestAt - commandStartedAt), Math.max(0, commandFinishedAt - commandStartedAt)),
+    providerDominates: dominanceRatio(Math.max(0, lastProviderResponseAt - firstProviderRequestAt), attributionTotalMs),
+    preProviderDominates: dominanceRatio(Math.max(0, firstProviderRequestAt - commandStartedAt), attributionTotalMs),
    missingProviderRequest: false,
+    providerRequestTiming: requestsInCommand.length > 0 ? "within-command" : "after-command-timeout",
+    providerAfterCommandEnd,
+    providerLateByMs: providerAfterCommandEnd ? Math.max(0, firstProviderRequestAt - commandFinishedAt) : null,
    providerEvidenceAvailable: true
  };
 }
@ -288,6 +300,21 @@ function requestsWithinCommand(requests, commandStartedAt, commandFinishedAt) {
    .toSorted((left, right) => left.receivedAtEpochMs - right.receivedAtEpochMs);
 }

+function requestsAfterCommandTimeout(requests, commandStartedAt, commandFinishedAt) {
+  if (typeof commandStartedAt !== "number" || typeof commandFinishedAt !== "number") {
+    return [];
+  }
+  const graceMs = 60000;
+  return requests
+    .filter((request) =>
+      typeof request.receivedAtEpochMs === "number" &&
+      request.receivedAtEpochMs > commandFinishedAt &&
+      request.receivedAtEpochMs >= commandStartedAt &&
+      request.receivedAtEpochMs <= commandFinishedAt + graceMs
+    )
+    .toSorted((left, right) => left.receivedAtEpochMs - right.receivedAtEpochMs);
+}
+
 function normalizeTimelineProviderRequest(event, line) {
  const receivedAtEpochMs = numberOrParsedTime(event.receivedAtEpochMs, event.receivedAt ?? event.timestamp ?? event.time);
  const durationMs = numberOrNull(event.durationMs ?? event.elapsedMs ?? event.ms);
--- a/src/evaluator.mjs
+++ b/src/evaluator.mjs
@ -920,6 +920,9 @@ function collectAgentTurns(record, providerEvidence, scenario, timelineSummary)
        providerOutcomes: attribution?.outcomes ?? [],
        providerErrorClasses: attribution?.errorClasses ?? [],
        providerErrors: attribution?.errors ?? [],
+        providerRequestTiming: attribution?.providerRequestTiming ?? null,
+        providerAfterCommandEnd: attribution?.providerAfterCommandEnd ?? false,
+        providerLateByMs: attribution?.providerLateByMs ?? null,
        phaseBreakdown,
        cleanupMs: phaseBreakdown?.buckets?.cleanupMs ?? null,
        processLeaks: result.processSnapshots?.leaks ?? null,
--- a/src/reporting/report.mjs
+++ b/src/reporting/report.mjs
@ -205,8 +205,9 @@ export function renderMarkdownReport(report) {
          const route = turn.providerRoutes?.[0]?.value ?? "unknown";
          const status = turn.providerStatuses?.[0]?.value ?? "unknown";
          const issue = turn.providerErrorClasses?.[0]?.value ?? turn.providerOutcomes?.[0]?.value ?? "none";
+          const providerTiming = turn.providerAfterCommandEnd ? `; provider late ${turn.providerLateByMs} ms` : "";
          const expectedFailure = turn.expectedFailure ? "; expected failure observed " + turn.expectedFailureObserved : "";
-          lines.push(`  - ${turn.label}: total ${turn.totalTurnMs ?? "unknown"} ms; pre-provider ${turn.preProviderMs ?? "unknown"} ms; provider ${turn.providerFinalMs ?? "unknown"} ms; post-provider ${turn.postProviderMs ?? "unknown"} ms; route ${route}; status ${status}; issue ${issue}; response ${turn.responseOk}; leaks ${turn.processLeakCount ?? "unknown"}${expectedFailure}`);
+          lines.push(`  - ${turn.label}: total ${turn.totalTurnMs ?? "unknown"} ms; pre-provider ${turn.preProviderMs ?? "unknown"} ms; provider ${turn.providerFinalMs ?? "unknown"} ms; post-provider ${turn.postProviderMs ?? "unknown"} ms; route ${route}; status ${status}; issue ${issue}; response ${turn.responseOk}; leaks ${turn.processLeakCount ?? "unknown"}${providerTiming}${expectedFailure}`);
          const breakdown = summarizeAgentTurnBreakdownForMarkdown(turn.phaseBreakdown);
          if (breakdown) {
            lines.push(`    - breakdown: ${breakdown}`);
--- a/src/runner.mjs
+++ b/src/runner.mjs
@ -528,9 +528,15 @@ function readinessThresholdForPhase(scenario, phase) {
  if (!phase) {
    return 0;
  }
+  if ((phase.commands ?? []).some((command) => /(?:^|\s)--no-service(?:\s|$)/.test(command))) {
+    return 0;
+  }
  if (phase.id === "cold-start" || phase.id === "provision" || phase.id === "baseline" || phase.id === "gateway" || phase.id === "start") {
    return thresholds.coldReadyMs ?? thresholds.gatewayReadyMs ?? defaultMs;
  }
+  if (phase.id === "gateway-start") {
+    return thresholds.gatewayReadyMs ?? defaultMs;
+  }
  if (phase.id === "warm-restart" || phase.id === "restart") {
    return thresholds.warmReadyMs ?? thresholds.restartReadyMs ?? thresholds.gatewayReadyMs ?? defaultMs;
  }
--- a/support/openclaw-runtime.mjs
+++ b/support/openclaw-runtime.mjs
@ -1,5 +1,7 @@
 import { pathToFileURL } from "node:url";
 import path from "node:path";
+import { dirname } from "node:path";
+import { execFileSync } from "node:child_process";

 export async function importOpenClawDistModule(relativePath) {
  const packageRoot = process.cwd();
@ -15,6 +17,38 @@ export async function importOpenClawDistModule(relativePath) {
  }
 }

+export function prepareOpenClawRuntimeFromOcmEnv(envName) {
+  if (!envName) {
+    throw new Error("--env is required");
+  }
+  const status = runOcmJson(["env", "status", envName, "--json"]);
+  const resolved = runOcmJson(["env", "resolve", envName, "--json", "--", "status"]);
+  const root = readRequiredString(status.root, "ocm env status root");
+  const port = Number(status.gatewayPort);
+  const binaryPath = readRequiredString(resolved.binaryPath, "ocm env resolve binaryPath");
+  if (!Number.isInteger(port) || port <= 0) {
+    throw new Error(`invalid gateway port from OCM status: ${JSON.stringify(status.gatewayPort)}`);
+  }
+  const packageRoot = dirname(binaryPath);
+  process.env.OPENCLAW_HOME = root;
+  process.env.OPENCLAW_GATEWAY_PORT = String(port);
+  process.chdir(packageRoot);
+  return {
+    envName,
+    root,
+    gatewayPort: port,
+    binaryPath,
+    packageRoot,
+    runtime: {
+      bindingKind: resolved.bindingKind ?? null,
+      bindingName: resolved.bindingName ?? null,
+      releaseVersion: resolved.runtimeReleaseVersion ?? null,
+      releaseChannel: resolved.runtimeReleaseChannel ?? null,
+      sourceKind: resolved.runtimeSourceKind ?? null
+    }
+  };
+}
+
 export function parseSupportArgs(argv) {
  const parsed = {};
  for (let index = 0; index < argv.length; index += 1) {
@ -44,6 +78,43 @@ export function readTimeoutMs(value, fallbackMs) {
  return parsed;
 }

+export function runOcmJson(args) {
+  let stdout = "";
+  try {
+    stdout = execFileSync("ocm", args, {
+      encoding: "utf8",
+      stdio: ["ignore", "pipe", "pipe"]
+    });
+  } catch (error) {
+    const stderr = error?.stderr ? String(error.stderr) : "";
+    throw new Error(`ocm ${args.join(" ")} failed: ${stderr.trim() || error.message}`);
+  }
+  try {
+    return JSON.parse(stdout);
+  } catch {
+    throw new Error(`ocm ${args.join(" ")} did not return JSON: ${stdout.slice(0, 1000)}`);
+  }
+}
+
+export function runOcmText(args) {
+  try {
+    return execFileSync("ocm", args, {
+      encoding: "utf8",
+      stdio: ["ignore", "pipe", "pipe"]
+    });
+  } catch (error) {
+    const stderr = error?.stderr ? String(error.stderr) : "";
+    throw new Error(`ocm ${args.join(" ")} failed: ${stderr.trim() || error.message}`);
+  }
+}
+
+function readRequiredString(value, label) {
+  if (typeof value !== "string" || value.trim().length === 0) {
+    throw new Error(`${label} missing`);
+  }
+  return value;
+}
+
 export function extractText(value) {
  if (typeof value === "string") {
    return value;
--- a/support/run-dashboard-session-send-turn.mjs
+++ b/support/run-dashboard-session-send-turn.mjs
@ -5,9 +5,10 @@ import {
  extractText,
  failJson,
  finishJson,
-  importOpenClawDistModule,
  parseSupportArgs,
+  prepareOpenClawRuntimeFromOcmEnv,
  readTimeoutMs,
+  runOcmJson,
  sleep
 } from "./openclaw-runtime.mjs";

@ -15,38 +16,30 @@ const startedAtEpochMs = Date.now();

 try {
  const args = parseSupportArgs(process.argv.slice(2));
+  const runtimeContext = prepareOpenClawRuntimeFromOcmEnv(args.env);
  const message = args.message ?? "Reply with exact ASCII text KOVA_AGENT_OK only.";
  const expectedText = args["expected-text"] ?? "KOVA_AGENT_OK";
  const timeoutMs = readTimeoutMs(args.timeout, 120000);
  const sessionKey = args["session-key"] ?? `kova-dashboard-${randomUUID()}`;
-  const { callGateway } = await importOpenClawDistModule("gateway/call.js");

-  const created = await callGateway({
-    method: "sessions.create",
-    params: {
+  const created = gatewayCall(runtimeContext.envName, "sessions.create", {
      agentId: "main",
      key: sessionKey,
      label: "Kova Dashboard Session Send"
-    },
-    timeoutMs: Math.min(timeoutMs, 30000)
-  });
+    }, Math.min(timeoutMs, 30000));
  const canonicalKey = created?.key ?? sessionKey;
  const sendStartedAtEpochMs = Date.now();
-  const sent = await callGateway({
-    method: "sessions.send",
-    params: {
+  const sent = gatewayCall(runtimeContext.envName, "sessions.send", {
      key: canonicalKey,
      message,
      thinking: "off",
      timeoutMs,
      idempotencyKey: `kova-dashboard-${randomUUID()}`
-    },
-    timeoutMs: Math.min(timeoutMs, 30000)
-  });
+    }, Math.min(timeoutMs, 30000));
  const runId = typeof sent?.runId === "string" ? sent.runId : null;

  const history = await waitForAssistantText({
-    callGateway,
+    envName: runtimeContext.envName,
    sessionKey: canonicalKey,
    expectedText,
    timeoutMs,
@ -57,6 +50,8 @@ try {
    ok: true,
    surface: "dashboard-session-send-turn",
    method: "sessions.send",
+    envName: runtimeContext.envName,
+    runtime: runtimeContext.runtime,
    sessionKey: canonicalKey,
    runId,
    startedAtEpochMs,
@ -71,31 +66,48 @@ try {
  failJson(error, { surface: "dashboard-session-send-turn", finishedAtEpochMs: Date.now() });
 }

-async function waitForAssistantText({ callGateway, sessionKey, expectedText, timeoutMs, minAssistantCount }) {
+async function waitForAssistantText({ envName, sessionKey, expectedText, timeoutMs, minAssistantCount }) {
  const deadline = Date.now() + timeoutMs;
  let lastAssistantText = "";
+  let lastHistoryError = null;
  let assistantTexts = [];
  while (Date.now() < deadline) {
-    const history = await callGateway({
-      method: "chat.history",
-      params: { sessionKey, limit: 16 },
-      timeoutMs: 15000
-    });
-    assistantTexts = extractAssistantTexts(history?.messages ?? []);
-    lastAssistantText = assistantTexts.at(-1) ?? "";
-    const matchedAssistantText = assistantTexts
-      .slice(Math.max(0, minAssistantCount - 1))
-      .find((text) => text.includes(expectedText));
-    if (matchedAssistantText) {
-      return { assistantTexts, lastAssistantText, matchedAssistantText };
+    try {
+      const history = gatewayCall(envName, "chat.history", { sessionKey, limit: 16 }, Math.min(15000, Math.max(1000, deadline - Date.now())));
+      lastHistoryError = null;
+      assistantTexts = extractAssistantTexts(history?.messages ?? []);
+      lastAssistantText = assistantTexts.at(-1) ?? "";
+      const matchedAssistantText = assistantTexts
+        .slice(Math.max(0, minAssistantCount - 1))
+        .find((text) => text.includes(expectedText));
+      if (matchedAssistantText) {
+        return { assistantTexts, lastAssistantText, matchedAssistantText };
+      }
+    } catch (error) {
+      lastHistoryError = error;
    }
    await sleep(500);
  }
  throw new Error(
-    `timed out waiting for dashboard assistant text ${JSON.stringify(expectedText)}; last=${JSON.stringify(lastAssistantText)}`
+    `timed out waiting for dashboard assistant text ${JSON.stringify(expectedText)}; last=${JSON.stringify(lastAssistantText)}; lastHistoryError=${JSON.stringify(lastHistoryError?.message ?? null)}`
  );
 }

+function gatewayCall(envName, method, params, timeoutMs) {
+  return runOcmJson([
+    `@${envName}`,
+    "--",
+    "gateway",
+    "call",
+    method,
+    "--params",
+    JSON.stringify(params),
+    "--timeout",
+    String(timeoutMs),
+    "--json"
+  ]);
+}
+
 function extractAssistantTexts(messages) {
  if (!Array.isArray(messages)) {
    return [];
--- a/support/run-openai-compatible-turn.mjs
+++ b/support/run-openai-compatible-turn.mjs
@ -1,11 +1,13 @@
 #!/usr/bin/env node

+import fs from "node:fs";
+import path from "node:path";
 import {
  extractText,
  failJson,
  finishJson,
-  importOpenClawDistModule,
  parseSupportArgs,
+  prepareOpenClawRuntimeFromOcmEnv,
  readTimeoutMs
 } from "./openclaw-runtime.mjs";

@ -13,14 +15,13 @@ const startedAtEpochMs = Date.now();

 try {
  const args = parseSupportArgs(process.argv.slice(2));
+  const runtimeContext = prepareOpenClawRuntimeFromOcmEnv(args.env);
  const message = args.message ?? "Reply with exact ASCII text KOVA_AGENT_OK only.";
  const expectedText = args["expected-text"] ?? "KOVA_AGENT_OK";
  const timeoutMs = readTimeoutMs(args.timeout, 120000);
  const model = args.model ?? "openai/gpt-5.5";
-  const { getRuntimeConfig } = await importOpenClawDistModule("config/io.js");
-  const { resolveGatewayPort } = await importOpenClawDistModule("config/paths.js");
-  const cfg = getRuntimeConfig();
-  const port = resolveGatewayPort(cfg, process.env);
+  const cfg = readConfig(runtimeContext.root);
+  const port = runtimeContext.gatewayPort;
  const token = readGatewayToken(cfg);
  const controller = new AbortController();
  const timer = setTimeout(() => controller.abort(new Error(`OpenAI-compatible request timed out after ${timeoutMs}ms`)), timeoutMs);
@ -54,6 +55,8 @@ try {
      ok: true,
      surface: "openai-compatible-turn",
      method: "POST /v1/chat/completions",
+      envName: runtimeContext.envName,
+      runtime: runtimeContext.runtime,
      model,
      startedAtEpochMs,
      requestStartedAtEpochMs,
@ -78,3 +81,12 @@ function readGatewayToken(cfg) {
  ];
  return candidates.find((value) => typeof value === "string" && value.trim().length > 0)?.trim() ?? "";
 }
+
+function readConfig(root) {
+  const configPath = path.join(root, ".openclaw", "openclaw.json");
+  try {
+    return JSON.parse(fs.readFileSync(configPath, "utf8"));
+  } catch {
+    return {};
+  }
+}