fix(runner): harden summary+sleep handling

2025-11-14 22:17:39 +01:00 · 2025-11-14 22:17:39 +01:00 · 5e97a0d4cc
commit 5e97a0d4cc
parent 70057fba11
4 changed files with 352 additions and 97 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -3,7 +3,7 @@
 ## [Unreleased]

 ### CLI & runtime
- _Nothing yet._
+- Runner guardrails now handle `sleep` invocations with missing/empty arguments without crashing, and summary-style selection no longer emits redundant cases, restoring `pnpm lint`/`pnpm typecheck` in local workflows.

 ### CLI
 - Ad-hoc STDIO invocations that start with `npx -y <package>` now infer the npm package name (stripping versions and ignoring arguments after `--`) instead of producing slugs like `npx-y`, so repeated `mcporter list|call` runs automatically reuse a readable server key without passing `--name`.
--- a/scripts/runner.ts
+++ b/scripts/runner.ts
@ -24,6 +24,7 @@ const LONG_TIMEOUT_MS = 25 * 60 * 1000; // Build + full-suite commands (Next.js
 const LINT_TIMEOUT_MS = 30 * 60 * 1000;
 const LONG_RUN_REPORT_THRESHOLD_MS = 60 * 1000;
 const ENABLE_DEBUG_LOGS = process.env.RUNNER_DEBUG === '1';
+const MAX_SLEEP_SECONDS = 30;

 const WRAPPER_COMMANDS = new Set([
  'sudo',
@ -36,11 +37,22 @@ const WRAPPER_COMMANDS = new Set([
  '/usr/bin/nohup',
 ]);

+type SummaryStyle = 'compact' | 'minimal' | 'verbose';
+const SUMMARY_STYLE = resolveSummaryStyle(process.env.RUNNER_SUMMARY_STYLE);
+
 // biome-ignore format: keep each keyword on its own line for grep-friendly diffs.
-const LONG_SCRIPT_KEYWORDS = ['build', 'test:all', 'test:browser', 'vitest.browser', 'vitest.browser.config.ts'];
+const LONG_SCRIPT_KEYWORDS = [
+  'build',
+  'test:all',
+  'test:browser',
+  'test:e2e',
+  'test:e2e:headed',
+  'vitest.browser',
+  'vitest.browser.config.ts',
+];
 const EXTENDED_SCRIPT_KEYWORDS = ['lint', 'test', 'playwright', 'check', 'docker'];
 const SINGLE_TEST_SCRIPTS = new Set(['test:file']);
-const SINGLE_TEST_FLAGS = new Set(['--run']);
+const SINGLE_TEST_FLAGS = new Set(['--run', '--filter']);
 const TEST_BINARIES = new Set(['vitest', 'playwright', 'jest']);
 const LINT_BINARIES = new Set(['eslint', 'biome', 'oxlint', 'knip']);

@ -156,61 +168,69 @@ function shouldExtendTimeout(commandArgs: string[]): boolean {
    return false;
  }

-  const first = tokens[0];
+  const [first, ...rest] = tokens;
  if (!first) {
    return false;
  }
-  const rest = tokens.slice(1);

  if (first === 'pnpm') {
-    if (rest.length === 0) {
-      return false;
-    }
-    const subcommand = rest[0];
-    if (!subcommand) {
-      return false;
-    }
-    if (!subcommand) {
-      return false;
-    }
-    if (subcommand === 'run') {
-      const script = rest[1];
-      if (!script) {
-        return false;
-      }
-      return shouldExtendForScript(script);
-    }
-    if (subcommand === 'exec') {
-      const execTarget = rest[1];
-      if (!execTarget) {
-        return false;
-      }
-      if (shouldExtendForScript(execTarget) || TEST_BINARIES.has(execTarget.toLowerCase())) {
-        return true;
-      }
-      for (const token of rest.slice(1)) {
-        if (shouldExtendForScript(token) || TEST_BINARIES.has(token.toLowerCase())) {
-          return true;
-        }
-      }
-      return false;
-    }
-    if (shouldExtendForScript(subcommand)) {
-      return true;
-    }
+    return shouldExtendViaPnpm(rest);
+  }
+  if (first === 'bun') {
+    return shouldExtendViaBun(rest);
  }

  if (shouldExtendForScript(first) || TEST_BINARIES.has(first.toLowerCase())) {
    return true;
  }

-  for (const token of rest) {
-    if (shouldExtendForScript(token) || TEST_BINARIES.has(token.toLowerCase())) {
+  return rest.some((token) => shouldExtendForScript(token) || TEST_BINARIES.has(token.toLowerCase()));
+}
+
+function shouldExtendViaPnpm(rest: string[]): boolean {
+  if (rest.length === 0) {
+    return false;
+  }
+  const subcommand = rest[0];
+  if (!subcommand) {
+    return false;
+  }
+  if (subcommand === 'run') {
+    const script = rest[1];
+    return typeof script === 'string' && shouldExtendForScript(script);
+  }
+  if (subcommand === 'exec') {
+    const execTarget = rest[1];
+    if (execTarget && (shouldExtendForScript(execTarget) || TEST_BINARIES.has(execTarget.toLowerCase()))) {
+      return true;
+    }
+    return rest.slice(1).some((token) => shouldExtendForScript(token) || TEST_BINARIES.has(token.toLowerCase()));
+  }
+  return shouldExtendForScript(subcommand);
+}
+
+function shouldExtendViaBun(rest: string[]): boolean {
+  if (rest.length === 0) {
+    return false;
+  }
+  const subcommand = rest[0];
+  if (!subcommand) {
+    return false;
+  }
+  if (subcommand === 'run') {
+    const script = rest[1];
+    return typeof script === 'string' && shouldExtendForScript(script);
+  }
+  if (subcommand === 'test') {
+    return true;
+  }
+  if (subcommand === 'x' || subcommand === 'bunx') {
+    const execTarget = rest[1];
+    if (execTarget && TEST_BINARIES.has(execTarget.toLowerCase())) {
      return true;
    }
  }
-
-  return false;
+  return shouldExtendForScript(subcommand);
 }

 // Checks script names for long-running markers (lint/test/build/etc.).
@ -228,37 +248,59 @@ function shouldUseLintTimeout(commandArgs: string[]): boolean {
    return false;
  }

-  const first = tokens[0];
+  const [first, ...rest] = tokens;
  if (!first) {
    return false;
  }
-  const rest = tokens.slice(1);

  if (first === 'pnpm') {
-    if (rest.length === 0) {
-      return false;
-    }
-    const subcommand = rest[0];
-    if (!subcommand) {
-      return false;
-    }
-    if (subcommand === 'run') {
-      const script = rest[1];
-      return typeof script === 'string' && script.startsWith('lint');
-    }
-    if (subcommand === 'exec') {
-      const execTarget = rest[1];
-      if (execTarget && LINT_BINARIES.has(execTarget.toLowerCase())) {
-        return true;
-      }
-    }
+    return shouldUseLintTimeoutViaPnpm(rest);
+  }
+  if (first === 'bun') {
+    return shouldUseLintTimeoutViaBun(rest);
  }

-  if (LINT_BINARIES.has(first.toLowerCase())) {
-    return true;
-  }
+  return LINT_BINARIES.has(first.toLowerCase());
+}

-  return false;
+function shouldUseLintTimeoutViaPnpm(rest: string[]): boolean {
+  if (rest.length === 0) {
+    return false;
+  }
+  const subcommand = rest[0];
+  if (!subcommand) {
+    return false;
+  }
+  if (subcommand === 'run') {
+    const script = rest[1];
+    return typeof script === 'string' && script.startsWith('lint');
+  }
+  if (subcommand === 'exec') {
+    const execTarget = rest[1];
+    if (execTarget && LINT_BINARIES.has(execTarget.toLowerCase())) {
+      return true;
+    }
+    return rest.slice(1).some((token) => LINT_BINARIES.has(token.toLowerCase()));
+  }
+  return LINT_BINARIES.has(subcommand.toLowerCase());
+}
+
+function shouldUseLintTimeoutViaBun(rest: string[]): boolean {
+  if (rest.length === 0) {
+    return false;
+  }
+  const subcommand = rest[0];
+  if (!subcommand) {
+    return false;
+  }
+  if (subcommand === 'run') {
+    const script = rest[1];
+    return typeof script === 'string' && script.startsWith('lint');
+  }
+  if (subcommand === 'x' || subcommand === 'bunx') {
+    return rest.slice(1).some((token) => LINT_BINARIES.has(token.toLowerCase()));
+  }
+  return LINT_BINARIES.has(subcommand.toLowerCase());
 }

 // Detects when a user is running a single spec so we can keep the shorter timeout.
@ -268,27 +310,64 @@ function isSingleTestInvocation(commandArgs: string[]): boolean {
    return false;
  }

-  for (const token of tokens) {
-    if (SINGLE_TEST_FLAGS.has(token)) {
-      return true;
-    }
+  if (tokens.some((token) => SINGLE_TEST_FLAGS.has(token))) {
+    return true;
  }

-  const first = tokens[0];
+  const [first, ...rest] = tokens;
  if (!first) {
    return false;
  }
-  const rest = tokens.slice(1);
+
  if (first === 'pnpm') {
-    if (rest[0] === 'test:file') {
-      return true;
-    }
-  } else if (first === 'vitest') {
-    if (rest.some((token) => SINGLE_TEST_FLAGS.has(token))) {
-      return true;
-    }
+    return isSingleTestViaPnpm(rest);
+  }
+  if (first === 'bun') {
+    return isSingleTestViaBun(rest);
+  }
+  if (first === 'vitest') {
+    return rest.some((token) => SINGLE_TEST_FLAGS.has(token));
  }

+  return SINGLE_TEST_SCRIPTS.has(first);
+}
+
+function isSingleTestViaPnpm(rest: string[]): boolean {
+  if (rest.length === 0) {
+    return false;
+  }
+  const subcommand = rest[0];
+  if (!subcommand) {
+    return false;
+  }
+  if (subcommand === 'run') {
+    const script = rest[1];
+    return typeof script === 'string' && SINGLE_TEST_SCRIPTS.has(script);
+  }
+  if (subcommand === 'exec') {
+    return rest.slice(1).some((token) => SINGLE_TEST_FLAGS.has(token));
+  }
+  return SINGLE_TEST_SCRIPTS.has(subcommand);
+}
+
+function isSingleTestViaBun(rest: string[]): boolean {
+  if (rest.length === 0) {
+    return false;
+  }
+  const subcommand = rest[0];
+  if (!subcommand) {
+    return false;
+  }
+  if (subcommand === 'run') {
+    const script = rest[1];
+    return typeof script === 'string' && SINGLE_TEST_SCRIPTS.has(script);
+  }
+  if (subcommand === 'test') {
+    return true;
+  }
+  if (subcommand === 'x' || subcommand === 'bunx') {
+    return rest.slice(1).some((token) => SINGLE_TEST_FLAGS.has(token));
+  }
  return false;
 }

@ -529,11 +608,11 @@ async function runCommand(context: RunnerExecutionContext): Promise<void> {
    const elapsedMs = Date.now() - startTime;
    if (timedOut) {
      console.error(
-        `[runner] Command terminated after ${formatDuration(context.timeoutMs)}. Re-run inside tmux for long-lived work.`
-      );
-      console.error(
-        `[runner] Finished ${commandLabel} (exit ${exitCode}, elapsed ${formatDuration(elapsedMs)}; timed out).`
+        `[runner] Command terminated after ${formatDuration(
+          context.timeoutMs
+        )}. Re-run inside tmux for long-lived work.`
      );
+      console.error(formatCompletionSummary({ exitCode, elapsedMs, timedOut: true, commandLabel }));
      process.exit(124);
    }

@ -543,7 +622,7 @@ async function runCommand(context: RunnerExecutionContext): Promise<void> {
      );
    }

-    console.error(`[runner] Finished ${commandLabel} (exit ${exitCode}, elapsed ${formatDuration(elapsedMs)}).`);
+    console.error(formatCompletionSummary({ exitCode, elapsedMs, commandLabel }));
    process.exit(exitCode);
  } catch (error) {
    console.error('[runner] Failed to launch command:', error instanceof Error ? error.message : String(error));
@ -552,6 +631,39 @@ async function runCommand(context: RunnerExecutionContext): Promise<void> {
  }
 }

+async function runCommandWithoutTimeout(context: RunnerExecutionContext): Promise<void> {
+  const { command, args, env } = buildExecutionParams(context.commandArgs);
+  const commandLabel = formatDisplayCommand(context.commandArgs);
+  const startTime = Date.now();
+
+  const child = spawn(command, args, {
+    cwd: context.workspaceDir,
+    env,
+    stdio: 'inherit',
+  });
+
+  const removeSignalHandlers = registerSignalForwarding(child);
+
+  try {
+    const exitCode = await new Promise<number>((resolve, reject) => {
+      child.once('error', (error) => {
+        removeSignalHandlers();
+        reject(error);
+      });
+      child.once('exit', (code, signal) => {
+        removeSignalHandlers();
+        resolve(code ?? exitCodeFromSignal(signal));
+      });
+    });
+    const elapsedMs = Date.now() - startTime;
+    console.error(formatCompletionSummary({ exitCode, elapsedMs, commandLabel }));
+    process.exit(exitCode);
+  } catch (error) {
+    console.error('[runner] Failed to launch command:', error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  }
+}
+
 // Prepares the executable, args, and sanitized env for the child process.
 function buildExecutionParams(commandArgs: string[]): { command: string; args: string[]; env: NodeJS.ProcessEnv } {
  const env = { ...process.env };
@ -616,8 +728,10 @@ function exitCodeFromSignal(signal: NodeJS.Signals | null): number {
 // Gives policy interceptors a chance to fully handle a command before exec.
 async function resolveCommandInterception(context: RunnerExecutionContext): Promise<CommandInterceptionResult> {
  const interceptors: Array<(ctx: RunnerExecutionContext) => Promise<boolean>> = [
+    maybeHandleTmuxInvocation,
    maybeHandleFindInvocation,
    maybeHandleRmInvocation,
+    maybeHandleSleepInvocation,
  ];

  for (const interceptor of interceptors) {
@ -769,6 +883,93 @@ async function maybeHandleGitRm(gitContext: GitExecutionContext): Promise<boolea
  return true;
 }

+// Blocks `sleep` calls longer than the AGENTS.md ceiling so scripts cannot stall the runner.
+async function maybeHandleSleepInvocation(context: RunnerExecutionContext): Promise<boolean> {
+  const tokens = stripWrappersAndAssignments(context.commandArgs);
+  if (tokens.length === 0) {
+    return false;
+  }
+  const [first, ...rest] = tokens;
+  if (!first || !isSleepBinary(first) || rest.length === 0) {
+    return false;
+  }
+
+  const commandIndex = context.commandArgs.length - tokens.length;
+  if (commandIndex < 0) {
+    return false;
+  }
+
+  const adjustedArgs = [...context.commandArgs];
+  const adjustments: string[] = [];
+
+  for (let offset = 0; offset < rest.length; offset += 1) {
+    const token = rest[offset];
+    if (!token) {
+      continue;
+    }
+    const durationSeconds = parseSleepDurationSeconds(token);
+    if (durationSeconds == null || durationSeconds <= MAX_SLEEP_SECONDS) {
+      continue;
+    }
+    adjustments.push(`${token}→${formatSleepDuration(MAX_SLEEP_SECONDS)}`);
+    adjustedArgs[commandIndex + 1 + offset] = formatSleepArgument(MAX_SLEEP_SECONDS);
+  }
+
+  if (adjustments.length === 0) {
+    return false;
+  }
+
+  console.error(`[runner] sleep arguments exceed ${MAX_SLEEP_SECONDS}s; clamping (${adjustments.join(', ')}).`);
+  context.commandArgs = adjustedArgs;
+  return false;
+}
+
+async function maybeHandleTmuxInvocation(context: RunnerExecutionContext): Promise<boolean> {
+  const tokens = stripWrappersAndAssignments(context.commandArgs);
+  if (tokens.length === 0) {
+    return false;
+  }
+  const candidate = tokens[0];
+  if (!candidate) {
+    return false;
+  }
+  if (basename(candidate) !== 'tmux') {
+    return false;
+  }
+  console.error('[runner] Detected tmux invocation; executing command without runner timeout guardrails.');
+  await runCommandWithoutTimeout(context);
+  return true;
+}
+
+function parseSleepDurationSeconds(token: string): number | null {
+  const match = /^(\d+(?:\.\d+)?)([smhdSMHD]?)$/.exec(token);
+  if (!match) {
+    return null;
+  }
+  const value = Number(match[1]);
+  if (!Number.isFinite(value)) {
+    return null;
+  }
+  const unit = match[2]?.toLowerCase() ?? '';
+  const multiplier = unit === 'm' ? 60 : unit === 'h' ? 60 * 60 : unit === 'd' ? 60 * 60 * 24 : 1;
+  return value * multiplier;
+}
+
+function formatSleepArgument(seconds: number): string {
+  return Number.isInteger(seconds) ? `${seconds}` : seconds.toString();
+}
+
+function formatSleepDuration(seconds: number): string {
+  if (Number.isInteger(seconds)) {
+    return `${seconds}s`;
+  }
+  return `${seconds.toFixed(2)}s`;
+}
+
+function isSleepBinary(token: string): boolean {
+  return token === 'sleep' || token.endsWith('/sleep');
+}
+
 // Detects `git find` invocations that need policy enforcement.
 function extractFindInvocation(commandArgs: string[]): GitInvocation | null {
  for (const [index, token] of commandArgs.entries()) {
@ -1291,6 +1492,55 @@ function formatDuration(durationMs: number): string {
  return `${hours}h ${remainingMinutes}m`;
 }

+function resolveSummaryStyle(rawValue: string | undefined | null): SummaryStyle {
+  if (!rawValue) {
+    return 'compact';
+  }
+  const normalized = rawValue.trim().toLowerCase();
+  if (normalized === 'minimal') {
+    return 'minimal';
+  }
+  if (normalized === 'verbose') {
+    return 'verbose';
+  }
+  if (normalized === 'short' || normalized === 'compact') {
+    return 'compact';
+  }
+  return 'compact';
+}
+
+function formatCompletionSummary(options: {
+  exitCode: number;
+  elapsedMs?: number;
+  timedOut?: boolean;
+  commandLabel: string;
+}): string {
+  const { exitCode, elapsedMs, timedOut, commandLabel } = options;
+  const durationText = typeof elapsedMs === 'number' ? formatDuration(elapsedMs) : null;
+  switch (SUMMARY_STYLE) {
+    case 'minimal': {
+      const parts = [`${exitCode}`];
+      if (durationText) {
+        parts.push(durationText);
+      }
+      if (timedOut) {
+        parts.push('timeout');
+      }
+      return `[runner] ${parts.join(' · ')}`;
+    }
+    case 'verbose': {
+      const elapsedPart = durationText ? `, elapsed ${durationText}` : '';
+      const timeoutPart = timedOut ? '; timed out' : '';
+      return `[runner] Finished ${commandLabel} (exit ${exitCode}${elapsedPart}${timeoutPart}).`;
+    }
+    default: {
+      const elapsedPart = durationText ? ` in ${durationText}` : '';
+      const timeoutPart = timedOut ? ' (timeout)' : '';
+      return `[runner] exit ${exitCode}${elapsedPart}${timeoutPart}`;
+    }
+  }
+}
+
 // Joins the command args in a shell-friendly way for log display.
 function formatDisplayCommand(commandArgs: string[]): string {
  return commandArgs.map((token) => (token.includes(' ') ? `"${token}"` : token)).join(' ');
--- a/tests/ephemeral-target.test.ts
+++ b/tests/ephemeral-target.test.ts
@ -1,6 +1,6 @@
 import { describe, expect, it, vi } from 'vitest';
-import type { Runtime } from '../src/runtime.js';
 import { prepareEphemeralServerTarget } from '../src/cli/ephemeral-target.js';
+import type { Runtime } from '../src/runtime.js';

 function createRuntimeStub(): Runtime {
  return {
@ -18,6 +18,9 @@ describe('prepareEphemeralServerTarget', () => {
    });
    expect(target).toBe('xcodebuildmcp');
    expect(resolution?.definition.command.kind).toBe('stdio');
-    expect(resolution?.definition.command.command).toBe('npx');
+    if (resolution?.definition.command.kind !== 'stdio') {
+      throw new Error('Expected stdio command');
+    }
+    expect(resolution.definition.command.command).toBe('npx');
  });
 });
--- a/tests/list-inline-stdio.test.ts
+++ b/tests/list-inline-stdio.test.ts
@ -1,18 +1,20 @@
 import { describe, expect, it, vi } from 'vitest';
-import type { Runtime, ServerToolInfo } from '../src/runtime.js';
-import type { ServerDefinition } from '../src/config.js';
 import { handleList } from '../src/cli/list-command.js';
+import type { ServerDefinition } from '../src/config.js';
+import type { Runtime, ServerToolInfo } from '../src/runtime.js';

 function createRuntimeStub() {
  const definitions: ServerDefinition[] = [];
-  const listTools = vi.fn(async (_server: string, _options?: unknown): Promise<ServerToolInfo[]> => [
-    {
-      name: 'doctor',
-      description: 'Runs diagnostics',
-      inputSchema: { type: 'object', properties: {}, required: [] },
-      outputSchema: undefined,
-    },
-  ]);
+  const listTools = vi.fn(
+    async (_server: string, _options?: unknown): Promise<ServerToolInfo[]> => [
+      {
+        name: 'doctor',
+        description: 'Runs diagnostics',
+        inputSchema: { type: 'object', properties: {}, required: [] },
+        outputSchema: undefined,
+      },
+    ]
+  );

  const getDefinition = (name: string): ServerDefinition => {
    const found = definitions.find((entry) => entry.name === name);