fix(runner): harden summary+sleep handling

This commit is contained in:
Peter Steinberger 2025-11-14 22:17:39 +01:00
parent 70057fba11
commit 5e97a0d4cc
4 changed files with 352 additions and 97 deletions

View File

@ -3,7 +3,7 @@
## [Unreleased]
### CLI & runtime
- _Nothing yet._
- Runner guardrails now handle `sleep` invocations with missing/empty arguments without crashing, and summary-style selection no longer emits redundant cases, restoring `pnpm lint`/`pnpm typecheck` in local workflows.
### CLI
- Ad-hoc STDIO invocations that start with `npx -y <package>` now infer the npm package name (stripping versions and ignoring arguments after `--`) instead of producing slugs like `npx-y`, so repeated `mcporter list|call` runs automatically reuse a readable server key without passing `--name`.

View File

@ -24,6 +24,7 @@ const LONG_TIMEOUT_MS = 25 * 60 * 1000; // Build + full-suite commands (Next.js
const LINT_TIMEOUT_MS = 30 * 60 * 1000;
const LONG_RUN_REPORT_THRESHOLD_MS = 60 * 1000;
const ENABLE_DEBUG_LOGS = process.env.RUNNER_DEBUG === '1';
const MAX_SLEEP_SECONDS = 30;
const WRAPPER_COMMANDS = new Set([
'sudo',
@ -36,11 +37,22 @@ const WRAPPER_COMMANDS = new Set([
'/usr/bin/nohup',
]);
type SummaryStyle = 'compact' | 'minimal' | 'verbose';
const SUMMARY_STYLE = resolveSummaryStyle(process.env.RUNNER_SUMMARY_STYLE);
// biome-ignore format: keep each keyword on its own line for grep-friendly diffs.
const LONG_SCRIPT_KEYWORDS = ['build', 'test:all', 'test:browser', 'vitest.browser', 'vitest.browser.config.ts'];
const LONG_SCRIPT_KEYWORDS = [
'build',
'test:all',
'test:browser',
'test:e2e',
'test:e2e:headed',
'vitest.browser',
'vitest.browser.config.ts',
];
const EXTENDED_SCRIPT_KEYWORDS = ['lint', 'test', 'playwright', 'check', 'docker'];
const SINGLE_TEST_SCRIPTS = new Set(['test:file']);
const SINGLE_TEST_FLAGS = new Set(['--run']);
const SINGLE_TEST_FLAGS = new Set(['--run', '--filter']);
const TEST_BINARIES = new Set(['vitest', 'playwright', 'jest']);
const LINT_BINARIES = new Set(['eslint', 'biome', 'oxlint', 'knip']);
@ -156,61 +168,69 @@ function shouldExtendTimeout(commandArgs: string[]): boolean {
return false;
}
const first = tokens[0];
const [first, ...rest] = tokens;
if (!first) {
return false;
}
const rest = tokens.slice(1);
if (first === 'pnpm') {
if (rest.length === 0) {
return false;
}
const subcommand = rest[0];
if (!subcommand) {
return false;
}
if (!subcommand) {
return false;
}
if (subcommand === 'run') {
const script = rest[1];
if (!script) {
return false;
}
return shouldExtendForScript(script);
}
if (subcommand === 'exec') {
const execTarget = rest[1];
if (!execTarget) {
return false;
}
if (shouldExtendForScript(execTarget) || TEST_BINARIES.has(execTarget.toLowerCase())) {
return true;
}
for (const token of rest.slice(1)) {
if (shouldExtendForScript(token) || TEST_BINARIES.has(token.toLowerCase())) {
return true;
}
}
return false;
}
if (shouldExtendForScript(subcommand)) {
return true;
}
return shouldExtendViaPnpm(rest);
}
if (first === 'bun') {
return shouldExtendViaBun(rest);
}
if (shouldExtendForScript(first) || TEST_BINARIES.has(first.toLowerCase())) {
return true;
}
for (const token of rest) {
if (shouldExtendForScript(token) || TEST_BINARIES.has(token.toLowerCase())) {
return rest.some((token) => shouldExtendForScript(token) || TEST_BINARIES.has(token.toLowerCase()));
}
function shouldExtendViaPnpm(rest: string[]): boolean {
if (rest.length === 0) {
return false;
}
const subcommand = rest[0];
if (!subcommand) {
return false;
}
if (subcommand === 'run') {
const script = rest[1];
return typeof script === 'string' && shouldExtendForScript(script);
}
if (subcommand === 'exec') {
const execTarget = rest[1];
if (execTarget && (shouldExtendForScript(execTarget) || TEST_BINARIES.has(execTarget.toLowerCase()))) {
return true;
}
return rest.slice(1).some((token) => shouldExtendForScript(token) || TEST_BINARIES.has(token.toLowerCase()));
}
return shouldExtendForScript(subcommand);
}
function shouldExtendViaBun(rest: string[]): boolean {
if (rest.length === 0) {
return false;
}
const subcommand = rest[0];
if (!subcommand) {
return false;
}
if (subcommand === 'run') {
const script = rest[1];
return typeof script === 'string' && shouldExtendForScript(script);
}
if (subcommand === 'test') {
return true;
}
if (subcommand === 'x' || subcommand === 'bunx') {
const execTarget = rest[1];
if (execTarget && TEST_BINARIES.has(execTarget.toLowerCase())) {
return true;
}
}
return false;
return shouldExtendForScript(subcommand);
}
// Checks script names for long-running markers (lint/test/build/etc.).
@ -228,37 +248,59 @@ function shouldUseLintTimeout(commandArgs: string[]): boolean {
return false;
}
const first = tokens[0];
const [first, ...rest] = tokens;
if (!first) {
return false;
}
const rest = tokens.slice(1);
if (first === 'pnpm') {
if (rest.length === 0) {
return false;
}
const subcommand = rest[0];
if (!subcommand) {
return false;
}
if (subcommand === 'run') {
const script = rest[1];
return typeof script === 'string' && script.startsWith('lint');
}
if (subcommand === 'exec') {
const execTarget = rest[1];
if (execTarget && LINT_BINARIES.has(execTarget.toLowerCase())) {
return true;
}
}
return shouldUseLintTimeoutViaPnpm(rest);
}
if (first === 'bun') {
return shouldUseLintTimeoutViaBun(rest);
}
if (LINT_BINARIES.has(first.toLowerCase())) {
return true;
}
return LINT_BINARIES.has(first.toLowerCase());
}
return false;
function shouldUseLintTimeoutViaPnpm(rest: string[]): boolean {
if (rest.length === 0) {
return false;
}
const subcommand = rest[0];
if (!subcommand) {
return false;
}
if (subcommand === 'run') {
const script = rest[1];
return typeof script === 'string' && script.startsWith('lint');
}
if (subcommand === 'exec') {
const execTarget = rest[1];
if (execTarget && LINT_BINARIES.has(execTarget.toLowerCase())) {
return true;
}
return rest.slice(1).some((token) => LINT_BINARIES.has(token.toLowerCase()));
}
return LINT_BINARIES.has(subcommand.toLowerCase());
}
function shouldUseLintTimeoutViaBun(rest: string[]): boolean {
if (rest.length === 0) {
return false;
}
const subcommand = rest[0];
if (!subcommand) {
return false;
}
if (subcommand === 'run') {
const script = rest[1];
return typeof script === 'string' && script.startsWith('lint');
}
if (subcommand === 'x' || subcommand === 'bunx') {
return rest.slice(1).some((token) => LINT_BINARIES.has(token.toLowerCase()));
}
return LINT_BINARIES.has(subcommand.toLowerCase());
}
// Detects when a user is running a single spec so we can keep the shorter timeout.
@ -268,27 +310,64 @@ function isSingleTestInvocation(commandArgs: string[]): boolean {
return false;
}
for (const token of tokens) {
if (SINGLE_TEST_FLAGS.has(token)) {
return true;
}
if (tokens.some((token) => SINGLE_TEST_FLAGS.has(token))) {
return true;
}
const first = tokens[0];
const [first, ...rest] = tokens;
if (!first) {
return false;
}
const rest = tokens.slice(1);
if (first === 'pnpm') {
if (rest[0] === 'test:file') {
return true;
}
} else if (first === 'vitest') {
if (rest.some((token) => SINGLE_TEST_FLAGS.has(token))) {
return true;
}
return isSingleTestViaPnpm(rest);
}
if (first === 'bun') {
return isSingleTestViaBun(rest);
}
if (first === 'vitest') {
return rest.some((token) => SINGLE_TEST_FLAGS.has(token));
}
return SINGLE_TEST_SCRIPTS.has(first);
}
function isSingleTestViaPnpm(rest: string[]): boolean {
if (rest.length === 0) {
return false;
}
const subcommand = rest[0];
if (!subcommand) {
return false;
}
if (subcommand === 'run') {
const script = rest[1];
return typeof script === 'string' && SINGLE_TEST_SCRIPTS.has(script);
}
if (subcommand === 'exec') {
return rest.slice(1).some((token) => SINGLE_TEST_FLAGS.has(token));
}
return SINGLE_TEST_SCRIPTS.has(subcommand);
}
function isSingleTestViaBun(rest: string[]): boolean {
if (rest.length === 0) {
return false;
}
const subcommand = rest[0];
if (!subcommand) {
return false;
}
if (subcommand === 'run') {
const script = rest[1];
return typeof script === 'string' && SINGLE_TEST_SCRIPTS.has(script);
}
if (subcommand === 'test') {
return true;
}
if (subcommand === 'x' || subcommand === 'bunx') {
return rest.slice(1).some((token) => SINGLE_TEST_FLAGS.has(token));
}
return false;
}
@ -529,11 +608,11 @@ async function runCommand(context: RunnerExecutionContext): Promise<void> {
const elapsedMs = Date.now() - startTime;
if (timedOut) {
console.error(
`[runner] Command terminated after ${formatDuration(context.timeoutMs)}. Re-run inside tmux for long-lived work.`
);
console.error(
`[runner] Finished ${commandLabel} (exit ${exitCode}, elapsed ${formatDuration(elapsedMs)}; timed out).`
`[runner] Command terminated after ${formatDuration(
context.timeoutMs
)}. Re-run inside tmux for long-lived work.`
);
console.error(formatCompletionSummary({ exitCode, elapsedMs, timedOut: true, commandLabel }));
process.exit(124);
}
@ -543,7 +622,7 @@ async function runCommand(context: RunnerExecutionContext): Promise<void> {
);
}
console.error(`[runner] Finished ${commandLabel} (exit ${exitCode}, elapsed ${formatDuration(elapsedMs)}).`);
console.error(formatCompletionSummary({ exitCode, elapsedMs, commandLabel }));
process.exit(exitCode);
} catch (error) {
console.error('[runner] Failed to launch command:', error instanceof Error ? error.message : String(error));
@ -552,6 +631,39 @@ async function runCommand(context: RunnerExecutionContext): Promise<void> {
}
}
async function runCommandWithoutTimeout(context: RunnerExecutionContext): Promise<void> {
const { command, args, env } = buildExecutionParams(context.commandArgs);
const commandLabel = formatDisplayCommand(context.commandArgs);
const startTime = Date.now();
const child = spawn(command, args, {
cwd: context.workspaceDir,
env,
stdio: 'inherit',
});
const removeSignalHandlers = registerSignalForwarding(child);
try {
const exitCode = await new Promise<number>((resolve, reject) => {
child.once('error', (error) => {
removeSignalHandlers();
reject(error);
});
child.once('exit', (code, signal) => {
removeSignalHandlers();
resolve(code ?? exitCodeFromSignal(signal));
});
});
const elapsedMs = Date.now() - startTime;
console.error(formatCompletionSummary({ exitCode, elapsedMs, commandLabel }));
process.exit(exitCode);
} catch (error) {
console.error('[runner] Failed to launch command:', error instanceof Error ? error.message : String(error));
process.exit(1);
}
}
// Prepares the executable, args, and sanitized env for the child process.
function buildExecutionParams(commandArgs: string[]): { command: string; args: string[]; env: NodeJS.ProcessEnv } {
const env = { ...process.env };
@ -616,8 +728,10 @@ function exitCodeFromSignal(signal: NodeJS.Signals | null): number {
// Gives policy interceptors a chance to fully handle a command before exec.
async function resolveCommandInterception(context: RunnerExecutionContext): Promise<CommandInterceptionResult> {
const interceptors: Array<(ctx: RunnerExecutionContext) => Promise<boolean>> = [
maybeHandleTmuxInvocation,
maybeHandleFindInvocation,
maybeHandleRmInvocation,
maybeHandleSleepInvocation,
];
for (const interceptor of interceptors) {
@ -769,6 +883,93 @@ async function maybeHandleGitRm(gitContext: GitExecutionContext): Promise<boolea
return true;
}
// Blocks `sleep` calls longer than the AGENTS.md ceiling so scripts cannot stall the runner.
async function maybeHandleSleepInvocation(context: RunnerExecutionContext): Promise<boolean> {
const tokens = stripWrappersAndAssignments(context.commandArgs);
if (tokens.length === 0) {
return false;
}
const [first, ...rest] = tokens;
if (!first || !isSleepBinary(first) || rest.length === 0) {
return false;
}
const commandIndex = context.commandArgs.length - tokens.length;
if (commandIndex < 0) {
return false;
}
const adjustedArgs = [...context.commandArgs];
const adjustments: string[] = [];
for (let offset = 0; offset < rest.length; offset += 1) {
const token = rest[offset];
if (!token) {
continue;
}
const durationSeconds = parseSleepDurationSeconds(token);
if (durationSeconds == null || durationSeconds <= MAX_SLEEP_SECONDS) {
continue;
}
adjustments.push(`${token}${formatSleepDuration(MAX_SLEEP_SECONDS)}`);
adjustedArgs[commandIndex + 1 + offset] = formatSleepArgument(MAX_SLEEP_SECONDS);
}
if (adjustments.length === 0) {
return false;
}
console.error(`[runner] sleep arguments exceed ${MAX_SLEEP_SECONDS}s; clamping (${adjustments.join(', ')}).`);
context.commandArgs = adjustedArgs;
return false;
}
async function maybeHandleTmuxInvocation(context: RunnerExecutionContext): Promise<boolean> {
const tokens = stripWrappersAndAssignments(context.commandArgs);
if (tokens.length === 0) {
return false;
}
const candidate = tokens[0];
if (!candidate) {
return false;
}
if (basename(candidate) !== 'tmux') {
return false;
}
console.error('[runner] Detected tmux invocation; executing command without runner timeout guardrails.');
await runCommandWithoutTimeout(context);
return true;
}
function parseSleepDurationSeconds(token: string): number | null {
const match = /^(\d+(?:\.\d+)?)([smhdSMHD]?)$/.exec(token);
if (!match) {
return null;
}
const value = Number(match[1]);
if (!Number.isFinite(value)) {
return null;
}
const unit = match[2]?.toLowerCase() ?? '';
const multiplier = unit === 'm' ? 60 : unit === 'h' ? 60 * 60 : unit === 'd' ? 60 * 60 * 24 : 1;
return value * multiplier;
}
function formatSleepArgument(seconds: number): string {
return Number.isInteger(seconds) ? `${seconds}` : seconds.toString();
}
function formatSleepDuration(seconds: number): string {
if (Number.isInteger(seconds)) {
return `${seconds}s`;
}
return `${seconds.toFixed(2)}s`;
}
function isSleepBinary(token: string): boolean {
return token === 'sleep' || token.endsWith('/sleep');
}
// Detects `git find` invocations that need policy enforcement.
function extractFindInvocation(commandArgs: string[]): GitInvocation | null {
for (const [index, token] of commandArgs.entries()) {
@ -1291,6 +1492,55 @@ function formatDuration(durationMs: number): string {
return `${hours}h ${remainingMinutes}m`;
}
function resolveSummaryStyle(rawValue: string | undefined | null): SummaryStyle {
if (!rawValue) {
return 'compact';
}
const normalized = rawValue.trim().toLowerCase();
if (normalized === 'minimal') {
return 'minimal';
}
if (normalized === 'verbose') {
return 'verbose';
}
if (normalized === 'short' || normalized === 'compact') {
return 'compact';
}
return 'compact';
}
function formatCompletionSummary(options: {
exitCode: number;
elapsedMs?: number;
timedOut?: boolean;
commandLabel: string;
}): string {
const { exitCode, elapsedMs, timedOut, commandLabel } = options;
const durationText = typeof elapsedMs === 'number' ? formatDuration(elapsedMs) : null;
switch (SUMMARY_STYLE) {
case 'minimal': {
const parts = [`${exitCode}`];
if (durationText) {
parts.push(durationText);
}
if (timedOut) {
parts.push('timeout');
}
return `[runner] ${parts.join(' · ')}`;
}
case 'verbose': {
const elapsedPart = durationText ? `, elapsed ${durationText}` : '';
const timeoutPart = timedOut ? '; timed out' : '';
return `[runner] Finished ${commandLabel} (exit ${exitCode}${elapsedPart}${timeoutPart}).`;
}
default: {
const elapsedPart = durationText ? ` in ${durationText}` : '';
const timeoutPart = timedOut ? ' (timeout)' : '';
return `[runner] exit ${exitCode}${elapsedPart}${timeoutPart}`;
}
}
}
// Joins the command args in a shell-friendly way for log display.
function formatDisplayCommand(commandArgs: string[]): string {
return commandArgs.map((token) => (token.includes(' ') ? `"${token}"` : token)).join(' ');

View File

@ -1,6 +1,6 @@
import { describe, expect, it, vi } from 'vitest';
import type { Runtime } from '../src/runtime.js';
import { prepareEphemeralServerTarget } from '../src/cli/ephemeral-target.js';
import type { Runtime } from '../src/runtime.js';
function createRuntimeStub(): Runtime {
return {
@ -18,6 +18,9 @@ describe('prepareEphemeralServerTarget', () => {
});
expect(target).toBe('xcodebuildmcp');
expect(resolution?.definition.command.kind).toBe('stdio');
expect(resolution?.definition.command.command).toBe('npx');
if (resolution?.definition.command.kind !== 'stdio') {
throw new Error('Expected stdio command');
}
expect(resolution.definition.command.command).toBe('npx');
});
});

View File

@ -1,18 +1,20 @@
import { describe, expect, it, vi } from 'vitest';
import type { Runtime, ServerToolInfo } from '../src/runtime.js';
import type { ServerDefinition } from '../src/config.js';
import { handleList } from '../src/cli/list-command.js';
import type { ServerDefinition } from '../src/config.js';
import type { Runtime, ServerToolInfo } from '../src/runtime.js';
function createRuntimeStub() {
const definitions: ServerDefinition[] = [];
const listTools = vi.fn(async (_server: string, _options?: unknown): Promise<ServerToolInfo[]> => [
{
name: 'doctor',
description: 'Runs diagnostics',
inputSchema: { type: 'object', properties: {}, required: [] },
outputSchema: undefined,
},
]);
const listTools = vi.fn(
async (_server: string, _options?: unknown): Promise<ServerToolInfo[]> => [
{
name: 'doctor',
description: 'Runs diagnostics',
inputSchema: { type: 'object', properties: {}, required: [] },
outputSchema: undefined,
},
]
);
const getDefinition = (name: string): ServerDefinition => {
const found = definitions.find((entry) => entry.name === name);