feat(cli): expose MCP wrappers and capture action

This commit is contained in:
Peter Steinberger 2026-06-07 07:35:00 +01:00
parent 3608d9c782
commit 7e61018019
No known key found for this signature in database
30 changed files with 2190 additions and 22 deletions

View File

@ -80,8 +80,9 @@ enum CommanderRuntimeRouter {
return true
}
if let index = arguments.firstIndex(where: { self.isHelpToken($0) }) {
let tokens = Array(arguments.prefix(index))
let helpSearchArguments = Array(arguments.prefix { $0 != "--" })
if let index = helpSearchArguments.firstIndex(where: { self.isHelpToken($0) }) {
let tokens = Array(helpSearchArguments.prefix(index))
if self.handleAgentPermissionHelp(tokens: tokens) {
return true
}

View File

@ -69,6 +69,8 @@ enum CommandRegistry {
.init(type: CompletionsCommand.self, category: .core),
.init(type: CommanderCommand.self, category: .core),
.init(type: AgentCommand.self, category: .ai),
.init(type: BrowserCommand.self, category: .mcp),
.init(type: InspectUICommand.self, category: .mcp),
.init(type: MCPCommand.self, category: .mcp),
]

View File

@ -26,7 +26,8 @@ extension ErrorHandlingCommand {
message: errorMessage(for: error),
code: errorCode,
details: errorDetails(for: error),
logger: logger)
logger: logger
)
} else {
let errorMessage: String = if let peekabooError = error as? PeekabooError {
peekabooError.errorDescription ?? String(describing: error)

View File

@ -23,6 +23,8 @@ struct CommandRuntimeOptions {
var autoStartDaemon = true
var bridgeSocketPath: String?
var requiresElementActions = false
var requiresInspectAccessibilityTree = false
var requiresBrowserMCP = false
func makeConfiguration() -> CommandRuntime.Configuration {
CommandRuntime.Configuration(
@ -251,6 +253,10 @@ extension CommandRuntime {
BridgeCapabilityPolicy.supportsInspectAccessibilityTree(for: handshake)
}
static func supportsBrowserMCP(for handshake: PeekabooBridgeHandshakeResponse) -> Bool {
BridgeCapabilityPolicy.supportsBrowserMCP(for: handshake)
}
static func supportsPostEventPermissionRequest(for handshake: PeekabooBridgeHandshakeResponse) -> Bool {
BridgeCapabilityPolicy.supportsPostEventPermissionRequest(for: handshake)
}

View File

@ -90,6 +90,12 @@ enum CommanderCLIBinder {
if commandType == SetValueCommand.self || commandType == PerformActionCommand.self {
options.requiresElementActions = true
}
if commandType == InspectUICommand.self {
options.requiresInspectAccessibilityTree = true
}
if commandType == BrowserCommand.self {
options.requiresBrowserMCP = true
}
return options
}

View File

@ -16,6 +16,14 @@ enum BridgeCapabilityPolicy {
return false
}
if options.requiresInspectAccessibilityTree && !self.supportsInspectAccessibilityTree(for: handshake) {
return false
}
if options.requiresBrowserMCP && !self.supportsBrowserMCP(for: handshake) {
return false
}
return true
}
@ -47,6 +55,14 @@ enum BridgeCapabilityPolicy {
handshake.supportedOperations.contains(.inspectAccessibilityTree)
}
static func supportsBrowserMCP(for handshake: PeekabooBridgeHandshakeResponse) -> Bool {
handshake.negotiatedVersion >= PeekabooBridgeProtocolVersion(major: 1, minor: 4) &&
handshake.supportedOperations.contains(.browserStatus) &&
handshake.supportedOperations.contains(.browserConnect) &&
handshake.supportedOperations.contains(.browserDisconnect) &&
handshake.supportedOperations.contains(.browserExecute)
}
static func supportsPostEventPermissionRequest(for handshake: PeekabooBridgeHandshakeResponse) -> Bool {
handshake.negotiatedVersion >= PeekabooBridgeProtocolVersion(major: 1, minor: 2) &&
handshake.supportedOperations.contains(.requestPostEventPermission)

View File

@ -0,0 +1,365 @@
import Darwin
import Dispatch
import Foundation
private struct CaptureActionProcessLaunchError: LocalizedError {
let message: String
var errorDescription: String? {
self.message
}
}
private final class BoundedPipeOutput: @unchecked Sendable {
private let lock = NSLock()
private nonisolated(unsafe) var data = Data()
private nonisolated(unsafe) var truncated = false
nonisolated func append(_ chunk: Data) {
let maxOutputBytes = 64 * 1024
self.lock.lock()
defer { self.lock.unlock() }
guard self.data.count < maxOutputBytes else {
self.truncated = true
return
}
let remaining = maxOutputBytes - self.data.count
if chunk.count <= remaining {
self.data.append(chunk)
} else {
self.data.append(contentsOf: chunk.prefix(remaining))
self.truncated = true
}
}
nonisolated func finish() -> (String, Bool) {
self.lock.lock()
defer { self.lock.unlock() }
return (String(bytes: self.data, encoding: .utf8) ?? "", self.truncated)
}
}
private final class CaptureActionSignalForwarder: @unchecked Sendable {
private let lock = NSLock()
private let queue = DispatchQueue(label: "boo.peekaboo.capture-action.signals")
private nonisolated(unsafe) var sources: [any DispatchSourceSignal] = []
private nonisolated(unsafe) var previousHandlers: [(Int32, sig_t?)] = []
private nonisolated(unsafe) var cancelled = false
nonisolated init(onSignal: @escaping @Sendable (Int32) -> Void) {
for signalNumber in [SIGINT, SIGTERM] {
self.previousHandlers.append((signalNumber, signal(signalNumber, SIG_IGN)))
let source = DispatchSource.makeSignalSource(signal: signalNumber, queue: self.queue)
source.setEventHandler {
onSignal(signalNumber)
}
source.resume()
self.sources.append(source)
}
}
nonisolated func cancel() {
self.lock.lock()
guard !self.cancelled else {
self.lock.unlock()
return
}
self.cancelled = true
let sources = self.sources
let previousHandlers = self.previousHandlers
self.sources.removeAll()
self.previousHandlers.removeAll()
self.lock.unlock()
for source in sources {
source.cancel()
}
for (signalNumber, previousHandler) in previousHandlers {
signal(signalNumber, previousHandler)
}
}
deinit {
self.cancel()
}
}
private final class CaptureActionProcessBox: @unchecked Sendable {
private let stdoutPipe = Pipe()
private let stderrPipe = Pipe()
private let stdoutOutput = BoundedPipeOutput()
private let stderrOutput = BoundedPipeOutput()
private let lock = NSLock()
private nonisolated(unsafe) var processIdentifier: pid_t?
private nonisolated(unsafe) var timedOut = false
private nonisolated(unsafe) var didExit = false
nonisolated func start(command: [String]) throws {
guard let executable = command.first else {
throw CaptureActionProcessLaunchError(message: "Action command cannot be empty")
}
self.installOutputHandlers()
try self.spawn(executable: executable, arguments: command)
}
nonisolated func waitUntilExit() -> Int32 {
guard let pid = self.currentProcessIdentifier() else { return -1 }
var status: Int32 = 0
while true {
let result = Darwin.waitpid(pid, &status, 0)
if result == pid {
self.markExited()
return Self.exitCode(fromWaitStatus: status)
}
if result == -1, errno == EINTR {
continue
}
self.markExited()
return -1
}
}
nonisolated func terminateAfterTimeout(seconds: TimeInterval) async {
do {
try await Task.sleep(nanoseconds: UInt64(seconds * 1_000_000_000))
} catch {
return
}
guard self.requestTimeoutTermination() else { return }
do {
try await Task.sleep(nanoseconds: 500_000_000)
} catch {
return
}
self.killTimedOutProcessGroup()
}
nonisolated func wasTimedOut() -> Bool {
self.lock.lock()
defer { self.lock.unlock() }
return self.timedOut
}
nonisolated func finishOutput() -> (stdout: (String, Bool), stderr: (String, Bool)) {
let stdoutHandle = self.stdoutPipe.fileHandleForReading
let stderrHandle = self.stderrPipe.fileHandleForReading
stdoutHandle.readabilityHandler = nil
stderrHandle.readabilityHandler = nil
self.drainAvailableNonBlocking(from: stdoutHandle, into: self.stdoutOutput)
self.drainAvailableNonBlocking(from: stderrHandle, into: self.stderrOutput)
stdoutHandle.closeFile()
stderrHandle.closeFile()
return (self.stdoutOutput.finish(), self.stderrOutput.finish())
}
nonisolated func killTimedOutProcessGroup() {
guard self.wasTimedOut(), let pid = self.currentProcessIdentifier() else { return }
self.killProcessGroup(pid: pid, signal: SIGKILL)
}
nonisolated func terminateProcessGroupForCancellation() {
guard let pid = self.currentProcessIdentifier() else { return }
self.killProcessGroup(pid: pid, signal: SIGTERM)
Task.detached {
do {
try await Task.sleep(nanoseconds: 500_000_000)
} catch {
return
}
self.killProcessGroup(pid: pid, signal: SIGKILL)
}
}
nonisolated func forwardSignalToProcessGroup(_ signalNumber: Int32) {
guard let pid = self.currentProcessIdentifier() else { return }
self.killProcessGroup(pid: pid, signal: signalNumber)
}
private nonisolated func spawn(executable: String, arguments: [String]) throws {
let stdoutRead = self.stdoutPipe.fileHandleForReading.fileDescriptor
let stdoutWrite = self.stdoutPipe.fileHandleForWriting.fileDescriptor
let stderrRead = self.stderrPipe.fileHandleForReading.fileDescriptor
let stderrWrite = self.stderrPipe.fileHandleForWriting.fileDescriptor
var fileActions: posix_spawn_file_actions_t?
try Self.check(posix_spawn_file_actions_init(&fileActions), "posix_spawn_file_actions_init")
defer { posix_spawn_file_actions_destroy(&fileActions) }
try Self.check(posix_spawn_file_actions_adddup2(&fileActions, stdoutWrite, STDOUT_FILENO), "dup stdout")
try Self.check(posix_spawn_file_actions_adddup2(&fileActions, stderrWrite, STDERR_FILENO), "dup stderr")
try Self.check(posix_spawn_file_actions_addclose(&fileActions, stdoutRead), "close child stdout read")
try Self.check(posix_spawn_file_actions_addclose(&fileActions, stderrRead), "close child stderr read")
if stdoutWrite != STDOUT_FILENO {
try Self.check(posix_spawn_file_actions_addclose(&fileActions, stdoutWrite), "close child stdout write")
}
if stderrWrite != STDERR_FILENO {
try Self.check(posix_spawn_file_actions_addclose(&fileActions, stderrWrite), "close child stderr write")
}
var attributes: posix_spawnattr_t?
try Self.check(posix_spawnattr_init(&attributes), "posix_spawnattr_init")
defer { posix_spawnattr_destroy(&attributes) }
let flags = Int16(POSIX_SPAWN_SETPGROUP)
try Self.check(posix_spawnattr_setflags(&attributes, flags), "set spawn flags")
try Self.check(posix_spawnattr_setpgroup(&attributes, 0), "set process group")
var argv = Self.makeCStringArray(arguments)
defer { Self.freeCStringArray(argv) }
let environment = ProcessInfo.processInfo.environment.map { key, value in "\(key)=\(value)" }
var envp = Self.makeCStringArray(environment)
defer { Self.freeCStringArray(envp) }
var pid: pid_t = 0
let spawnResult = executable.withCString { executablePath in
posix_spawnp(&pid, executablePath, &fileActions, &attributes, &argv, &envp)
}
self.stdoutPipe.fileHandleForWriting.closeFile()
self.stderrPipe.fileHandleForWriting.closeFile()
try Self.check(spawnResult, "posix_spawnp")
self.lock.lock()
self.processIdentifier = pid
self.lock.unlock()
}
private nonisolated func installOutputHandlers() {
self.stdoutPipe.fileHandleForReading.readabilityHandler = { [stdoutOutput] handle in
let chunk = handle.availableData
if chunk.isEmpty {
handle.readabilityHandler = nil
} else {
stdoutOutput.append(chunk)
}
}
self.stderrPipe.fileHandleForReading.readabilityHandler = { [stderrOutput] handle in
let chunk = handle.availableData
if chunk.isEmpty {
handle.readabilityHandler = nil
} else {
stderrOutput.append(chunk)
}
}
}
private nonisolated func requestTimeoutTermination() -> Bool {
self.lock.lock()
defer { self.lock.unlock() }
guard let pid = self.processIdentifier, !self.didExit else { return false }
self.timedOut = true
self.killProcessGroup(pid: pid, signal: SIGTERM)
return true
}
private nonisolated func currentProcessIdentifier() -> pid_t? {
self.lock.lock()
defer { self.lock.unlock() }
return self.processIdentifier
}
private nonisolated func markExited() {
self.lock.lock()
self.didExit = true
self.lock.unlock()
}
private nonisolated func killProcessGroup(pid: pid_t, signal: Int32) {
_ = Darwin.kill(-pid, signal)
}
private nonisolated func drainAvailableNonBlocking(from handle: FileHandle, into output: BoundedPipeOutput) {
let outputReadChunkBytes = 4096
let fileDescriptor = handle.fileDescriptor
let flags = fcntl(fileDescriptor, F_GETFL)
if flags >= 0 {
_ = fcntl(fileDescriptor, F_SETFL, flags | O_NONBLOCK)
}
var buffer = [UInt8](repeating: 0, count: outputReadChunkBytes)
while true {
let count = Darwin.read(fileDescriptor, &buffer, outputReadChunkBytes)
if count > 0 {
output.append(Data(buffer.prefix(count)))
} else if count == 0 || errno == EAGAIN || errno == EWOULDBLOCK {
break
} else {
break
}
}
}
private nonisolated static func makeCStringArray(_ strings: [String]) -> [UnsafeMutablePointer<CChar>?] {
var pointers = strings.map { strdup($0) }
pointers.append(nil)
return pointers
}
private nonisolated static func freeCStringArray(_ pointers: [UnsafeMutablePointer<CChar>?]) {
for pointer in pointers {
free(pointer)
}
}
private nonisolated static func check(_ code: Int32, _ operation: String) throws {
guard code != 0 else { return }
throw CaptureActionProcessLaunchError(
message: "\(operation) failed: \(String(cString: strerror(code)))"
)
}
private nonisolated static func exitCode(fromWaitStatus status: Int32) -> Int32 {
let signal = status & 0x7F
if signal == 0 {
return (status >> 8) & 0xFF
}
if signal != 0x7F {
return 128 + signal
}
return status
}
}
enum CaptureActionProcessRunner {
nonisolated static func run(
command: [String],
timeoutSeconds: TimeInterval
) async throws -> CaptureActionProcessResult {
let box = CaptureActionProcessBox()
let started = Date()
try box.start(command: command)
let signalForwarder = CaptureActionSignalForwarder { signalNumber in
box.forwardSignalToProcessGroup(signalNumber)
}
defer { signalForwarder.cancel() }
return await withTaskCancellationHandler {
let waitTask = Task.detached { box.waitUntilExit() }
let timeoutTask = Task.detached { await box.terminateAfterTimeout(seconds: timeoutSeconds) }
let exitCode = await waitTask.value
box.killTimedOutProcessGroup()
timeoutTask.cancel()
try? await Task.sleep(nanoseconds: 50_000_000)
let output = box.finishOutput()
let durationMs = Int(Date().timeIntervalSince(started) * 1000)
return CaptureActionProcessResult(
command: command,
exitCode: exitCode,
timedOut: box.wasTimedOut(),
timeoutSeconds: timeoutSeconds,
durationMs: durationMs,
stdout: output.stdout.0,
stderr: output.stderr.0,
stdoutTruncated: output.stdout.1,
stderrTruncated: output.stderr.1
)
} onCancel: {
box.terminateProcessGroupForCancellation()
}
}
}

View File

@ -0,0 +1,640 @@
import Commander
import CoreGraphics
import Foundation
import PeekabooCore
import PeekabooFoundation
@MainActor
struct CaptureActionCommand: ApplicationResolvable, ErrorHandlingCommand, OutputFormattable,
RuntimeOptionsConfigurable {
var app: String?
var pid: Int32?
var mode: String?
var windowTitle: String?
var windowIndex: Int?
var screenIndex: Int?
var region: String?
var captureFocus: LiveCaptureFocus = .auto
var captureEngine: String?
var durationLimit: Double?
var preRollMs: Int?
var postRollMs: Int?
var actionTimeout: Double?
var idleFps: Double?
var activeFps: Double?
var threshold: Double?
var heartbeatSec: Double?
var quietMs: Int?
var highlightChanges = false
var maxFrames: Int?
var maxMb: Int?
var resolutionCap: Double?
var diffStrategy: String?
var diffBudgetMs: Int?
var path: String?
var autocleanMinutes: Int?
var videoOut: String?
var command: [String] = []
@RuntimeStorage private var runtime: CommandRuntime?
var runtimeOptions = CommandRuntimeOptions()
nonisolated(unsafe) static var commandDescription: CommandDescription {
MainActorCommandDescription.describe {
CommandDescription(
commandName: "action",
abstract: "Capture around a child command with pre/post-roll",
discussion: """
Starts adaptive live capture, runs a child command, keeps post-roll, then
stops capture and verifies the resulting artifacts.
Examples:
peekaboo capture action --duration-limit 10 -- echo smoke
peekaboo capture action --mode area --region 0,0,640,360 -- ./test-flow.sh
""",
version: "1.0.0"
)
}
}
private var resolvedRuntime: CommandRuntime {
guard let runtime else {
preconditionFailure("CommandRuntime must be configured before accessing runtime resources")
}
return runtime
}
private var logger: Logger {
self.resolvedRuntime.logger
}
var services: any PeekabooServiceProviding {
self.resolvedRuntime.services
}
var jsonOutput: Bool {
self.resolvedRuntime.configuration.jsonOutput
}
var outputLogger: Logger {
self.logger
}
mutating func run(using runtime: CommandRuntime) async throws {
self.runtime = runtime
self.logger.setJsonOutputMode(self.jsonOutput)
self.logger.operationStart("capture_action", metadata: ["mode": self.mode ?? "auto"])
do {
guard !self.command.isEmpty else {
throw ValidationError("Pass the action command after --")
}
let scope = try await self.resolveScope()
let options = try self.buildOptions()
let timing = try self.resolveActionTiming(durationLimit: options.duration)
if scope.kind == .window, let identifier = scope.applicationIdentifier {
try await self.focusIfNeeded(appIdentifier: identifier)
}
let outputDir = try self.resolveOutputDirectory()
let deps = WatchCaptureDependencies(
screenCapture: self.services.screenCapture,
screenService: self.services.screens,
frameSource: nil
)
let config = WatchCaptureConfiguration(
scope: scope,
options: options,
outputRoot: outputDir,
autoclean: WatchAutocleanConfig(minutes: self.autocleanMinutes ?? 120, managed: self.path == nil),
sourceKind: .live,
videoIn: nil,
videoOut: CaptureCommandPathResolver.filePath(from: self.videoOut),
keepAllFrames: false
)
let session = WatchCaptureSession(dependencies: deps, configuration: config)
let captureTask = self.startCaptureTask(session: session, scope: scope)
do {
if try await Self.waitForPreRollOrCaptureEnd(
milliseconds: timing.startupGateMs,
captureTask: captureTask
) != nil {
throw ValidationError("Capture ended before action started")
}
let action = try await CaptureActionProcessRunner.run(
command: self.command,
timeoutSeconds: timing.actionTimeout
)
try await Self.sleep(milliseconds: timing.postRollMs)
session.requestStop()
let capture = try await captureTask.value
let validation = self.validateArtifacts(capture)
let result = CaptureActionCommandResult(
success: action.succeeded && validation.ok,
action: action,
capture: capture,
validation: validation
)
self.output(result)
self.logger.operationComplete(
"capture_action",
success: result.success,
metadata: ["frames_kept": capture.stats.framesKept]
)
if !result.success {
throw ExitCode(1)
}
} catch {
session.requestStop()
captureTask.cancel()
_ = try? await captureTask.value
throw error
}
} catch let exit as ExitCode {
throw exit
} catch {
self.handleError(error)
self.logger.operationComplete(
"capture_action",
success: false,
metadata: ["error": error.localizedDescription]
)
throw ExitCode(1)
}
}
private func startCaptureTask(
session: WatchCaptureSession,
scope: CaptureScope
) -> Task<CaptureSessionResult, any Error> {
let runSession: @MainActor @Sendable () async throws -> CaptureSessionResult = {
try await session.run()
}
let enginePreference = self.liveCaptureEnginePreference(for: scope)
return Task { @MainActor in
if let engineAware = self.services.screenCapture as? any EngineAwareScreenCaptureServiceProtocol {
try await engineAware.withCaptureEngine(enginePreference, operation: runSession)
} else {
try await runSession()
}
}
}
private func output(_ result: CaptureActionCommandResult) {
if self.jsonOutput {
let error = result.success
? nil
: ErrorInfo(message: result.failureMessage, code: .VALIDATION_ERROR)
let envelope = CaptureActionJSONEnvelope(
success: result.success,
data: result,
messages: nil,
debug_logs: self.outputLogger.getDebugLogs(),
error: error
)
outputJSONCodable(envelope, logger: self.outputLogger)
return
}
print(
"capture(action) kept \(result.capture.stats.framesKept) frames " +
"(dropped \(result.capture.stats.framesDropped))"
)
print("contact sheet: \(result.capture.contactSheet.path)")
print("metadata: \(result.capture.metadataFile)")
if let videoOut = result.capture.videoOut {
print("video: \(videoOut)")
}
print("action exit: \(result.action.exitCode)")
if result.action.timedOut {
print("action timed out after \(String(format: "%.2f", result.action.timeoutSeconds))s")
}
if !result.validation.ok {
print("artifact validation failed: \(result.validation.missing.joined(separator: ", "))")
}
}
private func buildOptions() throws -> CaptureOptions {
let duration = max(1, min(self.durationLimit ?? 60, 180))
let idle = min(max(self.idleFps ?? 2, 0.1), 5)
let active = min(max(self.activeFps ?? 8, 0.5), 15)
let threshold = min(max(self.threshold ?? 2.5, 0), 100)
let heartbeat = max(self.heartbeatSec ?? 5, 0)
let quiet = max(self.quietMs ?? 1000, 0)
let maxFrames = max(self.maxFrames ?? 800, 1)
let resolutionCap = self.resolutionCap ?? 1440
let diffStrategy = try CaptureCommandOptionParser.diffStrategy(self.diffStrategy)
let diffBudgetMs = self.diffBudgetMs ?? (diffStrategy == .quality ? 30 : nil)
let maxMb = self.maxMb.flatMap { $0 > 0 ? $0 : nil }
return CaptureOptions(
duration: duration,
idleFps: idle,
activeFps: active,
changeThresholdPercent: threshold,
heartbeatSeconds: heartbeat,
quietMsToIdle: quiet,
maxFrames: maxFrames,
maxMegabytes: maxMb,
highlightChanges: self.highlightChanges,
captureFocus: self.captureFocus,
resolutionCap: resolutionCap,
diffStrategy: diffStrategy,
diffBudgetMs: diffBudgetMs
)
}
private func resolveActionTiming(durationLimit: TimeInterval) throws -> CaptureActionTiming {
let preRoll = max(self.preRollMs ?? 250, 0)
let postRoll = max(self.postRollMs ?? 500, 0)
let rollSeconds = Double(preRoll + postRoll) / 1000.0
guard rollSeconds < durationLimit else {
throw ValidationError("--pre-roll-ms + --post-roll-ms must be less than --duration-limit")
}
let defaultActionTimeout = max(0.1, durationLimit - rollSeconds)
let actionTimeout = max(0.1, min(self.actionTimeout ?? defaultActionTimeout, durationLimit - rollSeconds))
return CaptureActionTiming(
preRollMs: preRoll,
postRollMs: postRoll,
startupGateMs: max(preRoll, 100),
actionTimeout: actionTimeout
)
}
private func resolveOutputDirectory() throws -> URL {
CaptureCommandPathResolver.outputDirectory(from: self.path)
}
private static func sleep(milliseconds: Int) async throws {
guard milliseconds > 0 else { return }
try await Task.sleep(nanoseconds: UInt64(milliseconds) * 1_000_000)
}
private static func waitForPreRollOrCaptureEnd(
milliseconds: Int,
captureTask: Task<CaptureSessionResult, any Error>
) async throws -> CaptureSessionResult? {
try await withThrowingTaskGroup(of: CaptureActionStartupGate.self) { group in
group.addTask {
if milliseconds > 0 {
try await Task.sleep(nanoseconds: UInt64(milliseconds) * 1_000_000)
}
return .preRollElapsed
}
group.addTask {
try await .captureEnded(captureTask.value)
}
guard let first = try await group.next() else {
return nil
}
group.cancelAll()
switch first {
case .preRollElapsed:
return nil
case let .captureEnded(result):
return result
}
}
}
}
private struct CaptureActionTiming {
let preRollMs: Int
let postRollMs: Int
let startupGateMs: Int
let actionTimeout: TimeInterval
}
private enum CaptureActionStartupGate {
case preRollElapsed
case captureEnded(CaptureSessionResult)
}
struct CaptureActionCommandResult: Codable {
let success: Bool
let action: CaptureActionProcessResult
let capture: CaptureSessionResult
let validation: CaptureActionArtifactValidation
var failureMessage: String {
if self.action.timedOut {
return "Action timed out after \(self.action.timeoutSeconds)s"
}
if !self.action.succeeded {
return "Action exited with status \(self.action.exitCode)"
}
return "Capture artifact validation failed"
}
}
struct CaptureActionJSONEnvelope: Codable {
let success: Bool
let data: CaptureActionCommandResult
let messages: [String]?
let debug_logs: [String]
let error: ErrorInfo?
}
struct CaptureActionArtifactValidation: Codable {
let ok: Bool
let checked: [String]
let missing: [String]
}
struct CaptureActionProcessResult: Codable {
let command: [String]
let exitCode: Int32
let timedOut: Bool
let timeoutSeconds: TimeInterval
let durationMs: Int
let stdout: String
let stderr: String
let stdoutTruncated: Bool
let stderrTruncated: Bool
var succeeded: Bool {
!self.timedOut && self.exitCode == 0
}
}
@MainActor
extension CaptureActionCommand {
private func validateArtifacts(_ result: CaptureSessionResult) -> CaptureActionArtifactValidation {
var checked = [result.metadataFile, result.contactSheet.path]
checked.append(contentsOf: result.frames.map(\.path))
if let videoOut = result.videoOut {
checked.append(videoOut)
} else if let expectedVideoOut = CaptureCommandPathResolver.filePath(from: self.videoOut) {
checked.append(expectedVideoOut)
}
var missing: [String] = []
if result.frames.isEmpty {
missing.append("frame files")
}
for path in checked where !Self.fileExistsAndIsNonEmpty(path) {
missing.append(path)
}
return CaptureActionArtifactValidation(ok: missing.isEmpty, checked: checked, missing: missing)
}
private static func fileExistsAndIsNonEmpty(_ path: String) -> Bool {
let manager = FileManager.default
guard manager.fileExists(atPath: path),
let attributes = try? manager.attributesOfItem(atPath: path),
let size = attributes[.size] as? NSNumber
else {
return false
}
return size.intValue > 0
}
}
@MainActor
extension CaptureActionCommand {
func resolveScope() async throws -> CaptureScope {
let mode = try self.resolveMode()
switch mode {
case .screen:
let displayInfo = try await self.displayInfo(for: self.screenIndex)
return CaptureScope(
kind: .screen,
screenIndex: displayInfo?.index,
displayUUID: displayInfo?.uuid,
windowId: nil,
applicationIdentifier: nil,
windowIndex: nil,
region: nil
)
case .frontmost:
return CaptureScope(kind: .frontmost)
case .window:
let identifier = try self.resolveApplicationIdentifier()
let windowReference = try await self.resolveWindowReference(for: identifier)
return CaptureScope(
kind: .window,
screenIndex: nil,
displayUUID: nil,
windowId: windowReference.windowID,
applicationIdentifier: identifier,
windowIndex: windowReference.windowIndex,
region: nil
)
case .area:
let rect = try self.parseRegion()
return CaptureScope(kind: .region, region: rect)
case .multi:
throw ValidationError("capture action does not support multi-mode captures")
}
}
func resolveMode() throws -> LiveCaptureMode {
if let explicit = self.mode {
let normalized = explicit.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
if normalized == "region" { return .area }
guard let mode = LiveCaptureMode(rawValue: normalized) else {
throw ValidationError(
"Unsupported capture action mode '\(explicit)'. Use screen, window, frontmost, or area."
)
}
return mode
}
if self.region != nil { return .area }
if self.app != nil || self.pid != nil || self.windowTitle != nil || self.windowIndex != nil { return .window }
return .frontmost
}
func parseRegion() throws -> CGRect {
guard let region = self.region?.trimmingCharacters(in: .whitespacesAndNewlines),
!region.isEmpty
else {
throw PeekabooError.invalidInput("Region must be provided when --mode area is set")
}
let parts = region
.split(separator: ",", omittingEmptySubsequences: false)
.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
guard parts.count == 4,
let x = Double(parts[0]),
let y = Double(parts[1]),
let width = Double(parts[2]),
let height = Double(parts[3])
else {
throw PeekabooError.invalidInput("Region must be x,y,width,height")
}
guard width > 0, height > 0 else {
throw PeekabooError.invalidInput("Region width and height must be greater than zero")
}
return CGRect(x: x, y: y, width: width, height: height)
}
func focusIfNeeded(appIdentifier: String) async throws {
switch self.captureFocus {
case .background:
return
case .auto:
let options = FocusOptions(
autoFocus: true,
focusTimeout: nil,
focusRetryCount: nil,
spaceSwitch: false,
bringToCurrentSpace: false
)
try await ensureFocused(
applicationName: appIdentifier,
windowTitle: self.windowTitle,
options: options,
services: self.services
)
case .foreground:
let options = FocusOptions(
autoFocus: true,
focusTimeout: nil,
focusRetryCount: nil,
spaceSwitch: true,
bringToCurrentSpace: true
)
try await ensureFocused(
applicationName: appIdentifier,
windowTitle: self.windowTitle,
options: options,
services: self.services
)
}
}
private func liveCaptureEnginePreference(for scope: CaptureScope) -> CaptureEnginePreference {
let value = (self.captureEngine ?? self.resolvedRuntime.configuration.captureEnginePreference)?
.trimmingCharacters(in: .whitespacesAndNewlines)
.lowercased()
switch value {
case "modern", "modern-only", "sckit", "sc", "screen-capture-kit", "sck":
return .modern
case "classic", "cg", "legacy", "legacy-only", "false", "0", "no":
return .legacy
default:
return scope.kind == .region ? .legacy : .auto
}
}
private func displayInfo(for index: Int?) async throws -> (index: Int, uuid: String)? {
guard let index else { return nil }
let screens = self.services.screens.listScreens()
guard let match = screens.first(where: { $0.index == index }) else {
throw PeekabooError.invalidInput("Screen index \(index) not found")
}
return (index, "\(match.displayID)")
}
private func resolveWindowReference(for identifier: String) async throws -> (windowID: UInt32?, windowIndex: Int?) {
guard self.windowTitle != nil || self.windowIndex != nil else {
return (nil, nil)
}
let windows = try await WindowServiceBridge.listWindows(
windows: self.services.windows,
target: .application(identifier)
)
let renderable = ObservationTargetResolver.captureCandidates(from: windows)
let selectedWindow: ServiceWindowInfo? = if let title = self.windowTitle?
.trimmingCharacters(in: .whitespacesAndNewlines),
!title.isEmpty {
renderable.first { $0.title.localizedCaseInsensitiveContains(title) }
} else if let explicitIndex = self.windowIndex {
renderable.first { $0.index == explicitIndex }
} else {
nil
}
guard let selectedWindow else {
let criteria = self.windowTitle.map { "window title '\($0)' for \(identifier)" }
?? self.windowIndex.map { "window index \($0) for \(identifier)" }
?? "window for \(identifier)"
throw PeekabooError.windowNotFound(criteria: criteria)
}
return (
windowID: UInt32(exactly: selectedWindow.windowID),
windowIndex: selectedWindow.index
)
}
}
extension CaptureActionCommand: ParsableCommand {}
extension CaptureActionCommand: AsyncRuntimeCommand {}
extension CaptureActionCommand: CommanderSignatureProviding {
static func commanderSignature() -> CommandSignature {
let live = CaptureLiveCommand.commanderSignature()
let options = live.options.filter { $0.label != "duration" } + [
.commandOption(
"durationLimit",
help: "Hard capture limit seconds (default 60, max 180)",
long: "duration-limit"
),
.commandOption("preRollMs", help: "Milliseconds to capture before running the action", long: "pre-roll-ms"),
.commandOption("postRollMs", help: "Milliseconds to capture after the action exits", long: "post-roll-ms"),
.commandOption(
"actionTimeout",
help: "Action timeout seconds (defaults to remaining duration)",
long: "action-timeout"
),
.commandOption(
"command",
help: "Command to run; usually pass after --",
long: "command",
parsing: .remaining
),
]
return CommandSignature(
arguments: live.arguments,
options: options,
flags: live.flags,
optionGroups: live.optionGroups
)
}
}
@MainActor
extension CaptureActionCommand: CommanderBindableCommand {
mutating func applyCommanderValues(_ values: CommanderBindableValues) throws {
self.app = values.singleOption("app")
self.pid = try values.decodeOption("pid", as: Int32.self)
self.mode = values.singleOption("mode")
self.windowTitle = values.singleOption("windowTitle")
self.windowIndex = try values.decodeOption("windowIndex", as: Int.self)
self.screenIndex = try values.decodeOption("screenIndex", as: Int.self)
self.region = values.singleOption("region")
if let parsedFocus: LiveCaptureFocus = try values.decodeOptionEnum("captureFocus") {
self.captureFocus = parsedFocus
}
self.captureEngine = values.singleOption("captureEngine")
self.durationLimit = try values.decodeOption("durationLimit", as: Double.self)
self.preRollMs = try values.decodeOption("preRollMs", as: Int.self)
self.postRollMs = try values.decodeOption("postRollMs", as: Int.self)
self.actionTimeout = try values.decodeOption("actionTimeout", as: Double.self)
self.idleFps = try values.decodeOption("idleFps", as: Double.self)
self.activeFps = try values.decodeOption("activeFps", as: Double.self)
self.threshold = try values.decodeOption("threshold", as: Double.self)
self.heartbeatSec = try values.decodeOption("heartbeatSec", as: Double.self)
self.quietMs = try values.decodeOption("quietMs", as: Int.self)
self.maxFrames = try values.decodeOption("maxFrames", as: Int.self)
self.maxMb = try values.decodeOption("maxMb", as: Int.self)
self.resolutionCap = try values.decodeOption("resolutionCap", as: Double.self)
self.diffStrategy = values.singleOption("diffStrategy")
self.diffBudgetMs = try values.decodeOption("diffBudgetMs", as: Int.self)
if values.flag("highlightChanges") { self.highlightChanges = true }
self.path = values.singleOption("path")
self.autocleanMinutes = try values.decodeOption("autocleanMinutes", as: Int.self)
self.videoOut = values.singleOption("videoOut")
self.command = values.optionValues("command")
}
}

View File

@ -23,7 +23,12 @@ struct CaptureCommand: ParsableCommand {
CommandDescription(
commandName: "capture",
abstract: "Capture live screens/windows or ingest a video and extract frames",
subcommands: [CaptureLiveCommand.self, CaptureVideoCommand.self, CaptureWatchAlias.self],
subcommands: [
CaptureLiveCommand.self,
CaptureActionCommand.self,
CaptureVideoCommand.self,
CaptureWatchAlias.self,
],
showHelpOnEmptyInvocation: true
)
}

View File

@ -6,7 +6,7 @@ import TachikomaMCP
@MainActor
struct ToolsCommand: OutputFormattable, RuntimeOptionsConfigurable {
private static let abstractText = "List the MCP/agent tool catalog (not CLI commands)"
private static let abstractText = "List the MCP/agent tool catalog"
private static let descriptionText = "Tools command for listing the MCP/agent tool catalog"
static let commandDescription = CommandDescription(
@ -14,9 +14,9 @@ struct ToolsCommand: OutputFormattable, RuntimeOptionsConfigurable {
abstract: Self.abstractText,
discussion: """
Display the Peekaboo MCP/agent tool catalog. These tools are exposed to agents
and `peekaboo mcp` clients (e.g. Codex, Claude Code, Cursor); they are not
runnable as top-level CLI subcommands. Run `peekaboo --help` for the CLI
command list.
and `peekaboo mcp` clients (e.g. Codex, Claude Code, Cursor). Some tools also
have dedicated CLI wrappers, such as `peekaboo browser` and `peekaboo inspect-ui`.
Run `peekaboo --help` for the CLI command list.
Examples:
peekaboo tools # Show all tools

View File

@ -0,0 +1,334 @@
import Commander
import Foundation
import PeekabooCore
import TachikomaMCP
@MainActor
struct BrowserCommand: ErrorHandlingCommand, OutputFormattable, RuntimeOptionsConfigurable {
var action = "status"
var channel: String?
var pageId: Int?
var url: String?
var navigationType: String?
var uid: String?
var toUid: String?
var text: String?
var value: String?
var key: String?
var submitKey: String?
var dialogAction: String?
var includeSnapshot = false
var double = false
var noBringToFront = false
var background = false
var timeout: Int?
var pageSize: Int?
var pageIndex: Int?
var types: [String] = []
var resourceTypes: [String] = []
var includePreserved = false
var messageId: Int?
var requestId: Int?
var requestFilePath: String?
var responseFilePath: String?
var path: String?
var format: String?
var quality: Int?
var fullPage = false
var traceAction: String?
var noReload = false
var noAutoStop = false
var insightSetId: String?
var insightName: String?
var mcpTool: String?
var mcpArgsJson: String?
var runtimeOptions: CommandRuntimeOptions = {
var options = CommandRuntimeOptions()
options.requiresBrowserMCP = true
return options
}()
@RuntimeStorage private var runtime: CommandRuntime?
static let commandDescription = CommandDescription(
commandName: "browser",
abstract: "Control Chrome page content through the browser MCP tool",
discussion: """
Dedicated CLI wrapper around Peekaboo's browser MCP tool. Use it for DOM/page
operations such as status, connect, navigate, snapshot, click, fill, type,
screenshots, console/network inspection, and performance traces.
Examples:
peekaboo browser status --json
peekaboo browser connect --channel chrome
peekaboo browser navigate --url https://example.com
peekaboo browser snapshot --path /tmp/page.txt
"""
)
private var resolvedRuntime: CommandRuntime {
guard let runtime else {
preconditionFailure("CommandRuntime must be configured before accessing runtime resources")
}
return runtime
}
private var services: any PeekabooServiceProviding {
self.resolvedRuntime.services
}
private var logger: Logger {
self.resolvedRuntime.logger
}
var jsonOutput: Bool {
self.resolvedRuntime.configuration.jsonOutput
}
var outputLogger: Logger {
self.logger
}
mutating func setRuntimeOptions(_ options: CommandRuntimeOptions) {
var options = options
options.requiresBrowserMCP = true
self.runtimeOptions = options
}
mutating func run(using runtime: CommandRuntime) async throws {
self.runtime = runtime
self.logger.setJsonOutputMode(self.jsonOutput)
do {
let context = MCPToolContext(services: self.services)
let tool = BrowserTool(context: context)
let response = try await tool.execute(arguments: ToolArguments(raw: self.arguments()))
try MCPToolCommandOutput.output(
tool: tool.name,
response: response,
jsonOutput: self.jsonOutput,
logger: self.outputLogger
)
} catch let exit as ExitCode {
throw exit
} catch {
self.handleError(error)
throw ExitCode(1)
}
}
private func arguments() throws -> [String: Any] {
let normalizedAction = self.action
.trimmingCharacters(in: .whitespacesAndNewlines)
.replacingOccurrences(of: "-", with: "_")
guard BrowserAction(rawValue: normalizedAction) != nil else {
throw ValidationError("Unsupported browser action '\(self.action)'")
}
var arguments: [String: Any] = ["action": normalizedAction]
self.add(self.channel, as: "channel", to: &arguments)
self.add(self.pageId, as: "page_id", to: &arguments)
self.add(self.url, as: "url", to: &arguments)
self.add(self.navigationType, as: "navigation_type", to: &arguments)
self.add(self.uid, as: "uid", to: &arguments)
self.add(self.toUid, as: "to_uid", to: &arguments)
self.add(self.text, as: "text", to: &arguments)
self.add(self.value, as: "value", to: &arguments)
self.add(self.key, as: "key", to: &arguments)
self.add(self.submitKey, as: "submit_key", to: &arguments)
self.add(self.dialogAction, as: "dialog_action", to: &arguments)
self.addFlag(self.includeSnapshot, as: "include_snapshot", to: &arguments)
self.addFlag(self.double, as: "double", to: &arguments)
if self.noBringToFront {
arguments["bring_to_front"] = false
}
self.addFlag(self.background, as: "background", to: &arguments)
self.add(self.timeout, as: "timeout", to: &arguments)
self.add(self.pageSize, as: "page_size", to: &arguments)
self.add(self.pageIndex, as: "page_index", to: &arguments)
if !self.types.isEmpty {
arguments["types"] = self.types
}
if !self.resourceTypes.isEmpty {
arguments["resource_types"] = self.resourceTypes
}
self.addFlag(self.includePreserved, as: "include_preserved", to: &arguments)
self.add(self.messageId, as: "message_id", to: &arguments)
self.add(self.requestId, as: "request_id", to: &arguments)
self.add(self.requestFilePath, as: "request_file_path", to: &arguments)
self.add(self.responseFilePath, as: "response_file_path", to: &arguments)
self.add(self.path, as: "path", to: &arguments)
self.add(self.format, as: "format", to: &arguments)
self.add(self.quality, as: "quality", to: &arguments)
self.addFlag(self.fullPage, as: "full_page", to: &arguments)
self.add(self.traceAction, as: "trace_action", to: &arguments)
if self.noReload {
arguments["reload"] = false
}
if self.noAutoStop {
arguments["auto_stop"] = false
}
self.add(self.insightSetId, as: "insight_set_id", to: &arguments)
self.add(self.insightName, as: "insight_name", to: &arguments)
self.add(self.mcpTool, as: "mcp_tool", to: &arguments)
if let mcpArgsJson {
do {
_ = try MCPArgumentParsing.parseJSONObject(mcpArgsJson)
} catch {
throw ValidationError("--mcp-args-json must be a JSON object")
}
arguments["mcp_args_json"] = mcpArgsJson
}
return arguments
}
private func add(_ value: String?, as key: String, to arguments: inout [String: Any]) {
guard let value, !value.isEmpty else { return }
arguments[key] = value
}
private func add(_ value: Int?, as key: String, to arguments: inout [String: Any]) {
guard let value else { return }
arguments[key] = value
}
private func addFlag(_ value: Bool, as key: String, to arguments: inout [String: Any]) {
if value {
arguments[key] = true
}
}
}
extension BrowserCommand: ParsableCommand {}
extension BrowserCommand: AsyncRuntimeCommand {}
extension BrowserCommand: CommanderSignatureProviding {
static func commanderSignature() -> CommandSignature {
CommandSignature(
arguments: [
.make(
label: "action",
help: "Browser action (default: status)",
isOptional: true
),
],
options: [
.commandOption("channel", help: "Chrome channel", long: "channel"),
.commandOption("pageId", help: "Chrome DevTools page ID", long: "page-id"),
.commandOption("url", help: "URL for navigate/new-page", long: "url"),
.commandOption(
"navigationType",
help: "Navigation type: url|back|forward|reload",
long: "navigation-type"
),
.commandOption("uid", help: "Element uid from browser snapshot", long: "uid"),
.commandOption("toUid", help: "Drop target uid for drag", long: "to-uid"),
.commandOption("text", help: "Text for type/wait/dialog", long: "text"),
.commandOption("value", help: "Value for fill", long: "value"),
.commandOption("key", help: "Key or key combination for press-key", long: "key"),
.commandOption("submitKey", help: "Optional key after type", long: "submit-key"),
.commandOption("dialogAction", help: "Dialog action: accept|dismiss", long: "dialog-action"),
.commandOption("timeout", help: "Timeout in milliseconds", long: "timeout"),
.commandOption("pageSize", help: "Console/network page size", long: "page-size"),
.commandOption("pageIndex", help: "Console/network page index", long: "page-index"),
OptionDefinition.make(
label: "types",
names: [.long("type"), .aliasLong("types")],
help: "Console message type; repeat or comma-separate",
parsing: .singleValue
),
OptionDefinition.make(
label: "resourceTypes",
names: [.long("resource-type"), .aliasLong("resource-types")],
help: "Network resource type; repeat or comma-separate",
parsing: .singleValue
),
.commandOption("messageId", help: "Console message ID", long: "message-id"),
.commandOption("requestId", help: "Network request ID", long: "request-id"),
.commandOption("requestFilePath", help: "Path for saving a request body", long: "request-file-path"),
.commandOption("responseFilePath", help: "Path for saving a response body", long: "response-file-path"),
.commandOption("path", help: "Output path for snapshot/screenshot/trace", long: "path"),
.commandOption("format", help: "Screenshot format: png|jpeg|webp", long: "format"),
.commandOption("quality", help: "Screenshot quality for jpeg/webp", long: "quality"),
.commandOption("traceAction", help: "Trace action: start|stop|analyze", long: "trace-action"),
.commandOption("insightSetId", help: "Trace insight set ID", long: "insight-set-id"),
.commandOption("insightName", help: "Trace insight name", long: "insight-name"),
.commandOption("mcpTool", help: "Advanced browser MCP tool for call action", long: "mcp-tool"),
.commandOption(
"mcpArgsJson",
help: "Advanced JSON object args for call/fill-form",
long: "mcp-args-json"
),
],
flags: [
.commandFlag(
"includeSnapshot",
help: "Include fresh snapshot when supported",
long: "include-snapshot"
),
.commandFlag("double", help: "Double-click for click", long: "double"),
.commandFlag("noBringToFront", help: "Do not bring selected page to front", long: "no-bring-to-front"),
.commandFlag("background", help: "Open new page in background", long: "background"),
.commandFlag(
"includePreserved",
help: "Include preserved console/network data",
long: "include-preserved"
),
.commandFlag("fullPage", help: "Capture full-page screenshot", long: "full-page"),
.commandFlag("noReload", help: "Do not reload when starting a trace", long: "no-reload"),
.commandFlag("noAutoStop", help: "Do not auto-stop performance trace", long: "no-auto-stop"),
]
)
}
}
extension BrowserCommand: CommanderBindableCommand {
mutating func applyCommanderValues(_ values: CommanderBindableValues) throws {
self.action = values.positionalValue(at: 0) ?? "status"
self.channel = values.singleOption("channel")
self.pageId = try values.decodeOption("pageId", as: Int.self)
self.url = values.singleOption("url")
self.navigationType = values.singleOption("navigationType")
self.uid = values.singleOption("uid")
self.toUid = values.singleOption("toUid")
self.text = values.singleOption("text")
self.value = values.singleOption("value")
self.key = values.singleOption("key")
self.submitKey = values.singleOption("submitKey")
self.dialogAction = values.singleOption("dialogAction")
self.includeSnapshot = values.flag("includeSnapshot")
self.double = values.flag("double")
self.noBringToFront = values.flag("noBringToFront")
self.background = values.flag("background")
self.timeout = try values.decodeOption("timeout", as: Int.self)
self.pageSize = try values.decodeOption("pageSize", as: Int.self)
self.pageIndex = try values.decodeOption("pageIndex", as: Int.self)
self.types = Self.splitCSV(values.optionValues("types"))
self.resourceTypes = Self.splitCSV(values.optionValues("resourceTypes"))
self.includePreserved = values.flag("includePreserved")
self.messageId = try values.decodeOption("messageId", as: Int.self)
self.requestId = try values.decodeOption("requestId", as: Int.self)
self.requestFilePath = values.singleOption("requestFilePath")
self.responseFilePath = values.singleOption("responseFilePath")
self.path = values.singleOption("path")
self.format = values.singleOption("format")
self.quality = try values.decodeOption("quality", as: Int.self)
self.fullPage = values.flag("fullPage")
self.traceAction = values.singleOption("traceAction")
self.noReload = values.flag("noReload")
self.noAutoStop = values.flag("noAutoStop")
self.insightSetId = values.singleOption("insightSetId")
self.insightName = values.singleOption("insightName")
self.mcpTool = values.singleOption("mcpTool")
self.mcpArgsJson = values.singleOption("mcpArgsJson")
}
private static func splitCSV(_ values: [String]) -> [String] {
values.flatMap { value in
value.split(separator: ",")
.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
.filter { !$0.isEmpty }
}
}
}

View File

@ -0,0 +1,132 @@
import Commander
import Foundation
import PeekabooCore
import TachikomaMCP
@MainActor
struct InspectUICommand: ErrorHandlingCommand, OutputFormattable, RuntimeOptionsConfigurable {
var appTarget: String?
var snapshot: String?
var maxDepth: Int?
var maxElements: Int?
var maxChildren: Int?
var runtimeOptions: CommandRuntimeOptions = {
var options = CommandRuntimeOptions()
options.requiresInspectAccessibilityTree = true
return options
}()
@RuntimeStorage private var runtime: CommandRuntime?
static let commandDescription = CommandDescription(
commandName: "inspect-ui",
abstract: "Inspect accessible UI text through the inspect_ui MCP tool",
discussion: """
Dedicated CLI wrapper around Peekaboo's inspect_ui MCP tool. Use this for
accessibility-tree text inspection when `see` screenshots are too broad.
Examples:
peekaboo inspect-ui --app-target TextEdit
peekaboo inspect-ui --snapshot 1234 --max-elements 200 --json
"""
)
private var resolvedRuntime: CommandRuntime {
guard let runtime else {
preconditionFailure("CommandRuntime must be configured before accessing runtime resources")
}
return runtime
}
private var services: any PeekabooServiceProviding {
self.resolvedRuntime.services
}
private var logger: Logger {
self.resolvedRuntime.logger
}
var jsonOutput: Bool {
self.resolvedRuntime.configuration.jsonOutput
}
var outputLogger: Logger {
self.logger
}
mutating func setRuntimeOptions(_ options: CommandRuntimeOptions) {
var options = options
options.requiresInspectAccessibilityTree = true
self.runtimeOptions = options
}
mutating func run(using runtime: CommandRuntime) async throws {
self.runtime = runtime
self.logger.setJsonOutputMode(self.jsonOutput)
do {
let context = MCPToolContext(services: self.services)
let tool = InspectUITool(context: context)
let response = try await tool.execute(arguments: ToolArguments(raw: self.arguments()))
try MCPToolCommandOutput.output(
tool: tool.name,
response: response,
jsonOutput: self.jsonOutput,
logger: self.outputLogger
)
} catch let exit as ExitCode {
throw exit
} catch {
self.handleError(error)
throw ExitCode(1)
}
}
private func arguments() -> [String: Any] {
var arguments: [String: Any] = [:]
self.add(self.appTarget, as: "app_target", to: &arguments)
self.add(self.snapshot, as: "snapshot", to: &arguments)
self.add(self.maxDepth, as: "max_depth", to: &arguments)
self.add(self.maxElements, as: "max_elements", to: &arguments)
self.add(self.maxChildren, as: "max_children", to: &arguments)
return arguments
}
private func add(_ value: String?, as key: String, to arguments: inout [String: Any]) {
guard let value, !value.isEmpty else { return }
arguments[key] = value
}
private func add(_ value: Int?, as key: String, to arguments: inout [String: Any]) {
guard let value else { return }
arguments[key] = value
}
}
extension InspectUICommand: ParsableCommand {}
extension InspectUICommand: AsyncRuntimeCommand {}
extension InspectUICommand: CommanderSignatureProviding {
static func commanderSignature() -> CommandSignature {
CommandSignature(
options: [
.commandOption("appTarget", help: "App name, bundle ID, PID, or frontmost", long: "app-target"),
.commandOption("snapshot", help: "Existing UI snapshot ID", long: "snapshot"),
.commandOption("maxDepth", help: "Maximum accessibility-tree depth", long: "max-depth"),
.commandOption("maxElements", help: "Maximum elements to inspect", long: "max-elements"),
.commandOption("maxChildren", help: "Maximum children per node", long: "max-children"),
]
)
}
}
extension InspectUICommand: CommanderBindableCommand {
mutating func applyCommanderValues(_ values: CommanderBindableValues) throws {
self.appTarget = values.singleOption("appTarget")
self.snapshot = values.singleOption("snapshot")
self.maxDepth = try values.decodeOption("maxDepth", as: Int.self)
self.maxElements = try values.decodeOption("maxElements", as: Int.self)
self.maxChildren = try values.decodeOption("maxChildren", as: Int.self)
}
}

View File

@ -0,0 +1,88 @@
import Commander
import Foundation
import MCP
import PeekabooCore
import TachikomaMCP
struct MCPToolCommandPayload: Codable {
let tool: String
let isError: Bool
let content: [MCP.Tool.Content]
let text: String
let meta: Value?
}
struct MCPToolCommandJSONEnvelope: Codable {
let success: Bool
let data: MCPToolCommandPayload
let messages: [String]?
let debug_logs: [String]
let error: ErrorInfo?
}
@MainActor
enum MCPToolCommandOutput {
static func payload(tool: String, response: ToolResponse) -> MCPToolCommandPayload {
MCPToolCommandPayload(
tool: tool,
isError: response.isError,
content: response.content,
text: response.content.map(self.summary).joined(separator: "\n"),
meta: response.meta
)
}
static func output(
tool: String,
response: ToolResponse,
jsonOutput: Bool,
logger: Logger
) throws {
let payload = self.payload(tool: tool, response: response)
if jsonOutput {
let error = response.isError
? ErrorInfo(message: payload.text, code: .VALIDATION_ERROR)
: nil
let envelope = MCPToolCommandJSONEnvelope(
success: !response.isError,
data: payload,
messages: nil,
debug_logs: logger.getDebugLogs(),
error: error
)
outputJSONCodable(envelope, logger: logger)
} else if !payload.text.isEmpty {
print(payload.text)
}
if response.isError {
throw ExitCode(1)
}
}
private static func summary(for content: MCP.Tool.Content) -> String {
switch content {
case let .text(text, _, _):
return text
case let .image(data, mimeType, _, _):
return "[Image: \(mimeType), base64 bytes: \(data.count)]"
case let .audio(data, mimeType, _, _):
return "[Audio: \(mimeType), base64 bytes: \(data.count)]"
case let .resource(resource, _, _):
if let text = resource.text {
return text
} else if let blob = resource.blob {
return "[Resource: \(resource.uri), blob bytes: \(blob.count)]"
} else {
return "[Resource: \(resource.uri)]"
}
case let .resourceLink(uri, name, title, _, mimeType, _):
let label = title ?? name
if let mimeType {
return "[Resource Link: \(label) \(uri), type: \(mimeType)]"
} else {
return "[Resource Link: \(label) \(uri)]"
}
}
}
}

View File

@ -144,6 +144,26 @@ struct CLIRuntimeSmokeTests {
#expect((error["message"] as? String)?.contains("Unexpected argument: extra") == true)
}
@Test
func `peekaboo browser status emits standard JSON envelope`() async throws {
guard Self.ensureLocalRuntimeAvailable() else { return }
let result = try await TestChildProcess.runPeekaboo(["browser", "status", "--json", "--no-remote"])
#expect(result.status == .exited(0))
let data = Data(result.standardOutput.utf8)
let object = try JSONSerialization.jsonObject(with: data)
guard let json = object as? [String: Any],
let payload = json["data"] as? [String: Any] else {
Issue.record("Expected JSON object output from browser status.")
return
}
#expect(json["success"] as? Bool == true)
#expect(payload["tool"] as? String == "browser")
#expect(payload["isError"] as? Bool == false)
#expect((payload["text"] as? String)?.contains("Chrome DevTools MCP Status") == true)
}
@Test
func `peekaboo commander emits diagnostics JSON`() async throws {
guard Self.ensureLocalRuntimeAvailable() else { return }

View File

@ -267,6 +267,58 @@ struct CommandRuntimeInjectionTests {
#expect(!CommandRuntime.supportsInspectAccessibilityTree(for: hidden))
}
@Test
func `remote requirements reject inspect UI when required capability is unavailable`() {
var options = CommandRuntimeOptions()
options.requiresInspectAccessibilityTree = true
let supported = PeekabooBridgeHandshakeResponse(
negotiatedVersion: PeekabooBridgeProtocolVersion(major: 1, minor: 7),
hostKind: .gui,
build: nil,
supportedOperations: [.captureScreen, .inspectAccessibilityTree]
)
let unsupported = PeekabooBridgeHandshakeResponse(
negotiatedVersion: PeekabooBridgeProtocolVersion(major: 1, minor: 6),
hostKind: .gui,
build: nil,
supportedOperations: [.captureScreen, .inspectAccessibilityTree]
)
#expect(CommandRuntime.supportsRemoteRequirements(for: supported, options: options))
#expect(!CommandRuntime.supportsRemoteRequirements(for: unsupported, options: options))
}
@Test
func `remote requirements reject browser MCP when required capability is unavailable`() {
var options = CommandRuntimeOptions()
options.requiresBrowserMCP = true
let supported = PeekabooBridgeHandshakeResponse(
negotiatedVersion: PeekabooBridgeProtocolVersion(major: 1, minor: 4),
hostKind: .gui,
build: nil,
supportedOperations: [.captureScreen, .browserStatus, .browserConnect, .browserDisconnect, .browserExecute]
)
let older = PeekabooBridgeHandshakeResponse(
negotiatedVersion: PeekabooBridgeProtocolVersion(major: 1, minor: 3),
hostKind: .gui,
build: nil,
supportedOperations: [.captureScreen, .browserStatus, .browserConnect, .browserDisconnect, .browserExecute]
)
let missingExecute = PeekabooBridgeHandshakeResponse(
negotiatedVersion: PeekabooBridgeProtocolVersion(major: 1, minor: 4),
hostKind: .gui,
build: nil,
supportedOperations: [.captureScreen, .browserStatus, .browserConnect, .browserDisconnect]
)
#expect(CommandRuntime.supportsBrowserMCP(for: supported))
#expect(!CommandRuntime.supportsBrowserMCP(for: older))
#expect(!CommandRuntime.supportsBrowserMCP(for: missingExecute))
#expect(CommandRuntime.supportsRemoteRequirements(for: supported, options: options))
#expect(!CommandRuntime.supportsRemoteRequirements(for: older, options: options))
#expect(!CommandRuntime.supportsRemoteRequirements(for: missingExecute, options: options))
}
@Test
func `environment bridge socket disables daemon auto start`() {
let options = CommandRuntimeOptions()

View File

@ -0,0 +1,46 @@
import Commander
import Testing
@testable import PeekabooCLI
struct CaptureActionCommandBindingTests {
@Test
func `Capture action command binding`() throws {
let parsed = ParsedValues(
positional: [],
options: [
"mode": ["area"],
"region": ["0,0,320,240"],
"captureEngine": ["cg"],
"durationLimit": ["5"],
"preRollMs": ["100"],
"postRollMs": ["250"],
"actionTimeout": ["3"],
"path": ["/tmp/action-capture"],
"command": ["echo", "hello", "--flag"],
],
flags: ["highlightChanges"]
)
let command = try CommanderCLIBinder.instantiateCommand(
ofType: CaptureActionCommand.self,
parsedValues: parsed
)
#expect(command.mode == "area")
#expect(command.region == "0,0,320,240")
#expect(command.captureEngine == "cg")
#expect(command.durationLimit == 5)
#expect(command.preRollMs == 100)
#expect(command.postRollMs == 250)
#expect(command.actionTimeout == 3)
#expect(command.path == "/tmp/action-capture")
#expect(command.command == ["echo", "hello", "--flag"])
#expect(command.highlightChanges == true)
}
@Test
func `Capture action commander signature captures remaining command`() {
let signature = CaptureActionCommand.commanderSignature()
#expect(signature.options.contains { $0.label == "durationLimit" })
#expect(signature.options.contains { $0.label == "command" && $0.parsing == .remaining })
#expect(!signature.options.contains { $0.label == "duration" })
}
}

View File

@ -0,0 +1,98 @@
import Foundation
import Testing
@testable import PeekabooCLI
struct CaptureActionProcessRunnerTests {
@Test
func `runner escalates timeout for TERM ignoring child`() async throws {
let started = Date()
let result = try await CaptureActionProcessRunner.run(
command: ["/bin/sh", "-c", "trap '' TERM; while true; do sleep 0.2; done"],
timeoutSeconds: 0.1
)
#expect(result.timedOut == true)
#expect(result.exitCode != 0)
#expect(Date().timeIntervalSince(started) < 2)
}
@Test
func `runner drains output while retaining bounded text`() async throws {
let result = try await CaptureActionProcessRunner.run(
command: ["/bin/sh", "-c", "yes x | head -c 70000; yes e | head -c 70000 >&2"],
timeoutSeconds: 5
)
#expect(result.exitCode == 0)
#expect(result.stdout.utf8.count == 64 * 1024)
#expect(result.stderr.utf8.count == 64 * 1024)
#expect(result.stdoutTruncated == true)
#expect(result.stderrTruncated == true)
}
@Test
func `runner returns when background child inherits output pipes`() async throws {
let started = Date()
let result = try await CaptureActionProcessRunner.run(
command: ["/bin/sh", "-c", "sleep 2 &"],
timeoutSeconds: 5
)
#expect(result.exitCode == 0)
#expect(result.timedOut == false)
#expect(Date().timeIntervalSince(started) < 1)
}
@Test
func `timeout kills descendant processes`() async throws {
let root = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
.appendingPathComponent("peekaboo-action-timeout-\(UUID().uuidString)", isDirectory: true)
try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
defer { try? FileManager.default.removeItem(at: root) }
let marker = root.appendingPathComponent("descendant-survived")
let result = try await CaptureActionProcessRunner.run(
command: [
"/bin/sh",
"-c",
"trap '' TERM; (trap '' TERM; sleep 1; touch \"$1\") & wait",
"sh",
marker.path,
],
timeoutSeconds: 0.1
)
try await Task.sleep(nanoseconds: 1_200_000_000)
#expect(result.timedOut == true)
#expect(FileManager.default.fileExists(atPath: marker.path) == false)
}
@Test
func `cancellation kills descendant processes`() async throws {
let root = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
.appendingPathComponent("peekaboo-action-cancel-\(UUID().uuidString)", isDirectory: true)
try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
defer { try? FileManager.default.removeItem(at: root) }
let marker = root.appendingPathComponent("descendant-survived")
let task = Task {
try await CaptureActionProcessRunner.run(
command: [
"/bin/sh",
"-c",
"(trap '' TERM; sleep 1; touch \"$1\") & wait",
"sh",
marker.path,
],
timeoutSeconds: 5
)
}
try await Task.sleep(nanoseconds: 100_000_000)
task.cancel()
_ = try? await task.value
try await Task.sleep(nanoseconds: 1_200_000_000)
#expect(FileManager.default.fileExists(atPath: marker.path) == false)
}
}

View File

@ -62,6 +62,65 @@ struct CommanderBinderProgramResolutionTests {
#expect(values.options["includeDetails"] == ["ids,bounds"])
}
@Test
@MainActor
func `Commander program resolves browser command`() throws {
let descriptors = CommanderRegistryBuilder.buildDescriptors()
let program = Program(descriptors: descriptors.map(\.metadata))
let invocation = try program.resolve(argv: [
"peekaboo",
"browser",
"navigate",
"--url", "https://example.com",
"--timeout", "5000",
"--json",
])
let values = invocation.parsedValues
#expect(invocation.path == ["browser"])
#expect(values.positional == ["navigate"])
#expect(values.options["url"] == ["https://example.com"])
#expect(values.options["timeout"] == ["5000"])
#expect(values.flags.contains("jsonOutput"))
}
@Test
@MainActor
func `Commander program resolves inspect UI command`() throws {
let descriptors = CommanderRegistryBuilder.buildDescriptors()
let program = Program(descriptors: descriptors.map(\.metadata))
let invocation = try program.resolve(argv: [
"peekaboo",
"inspect-ui",
"--app-target", "TextEdit",
"--max-elements", "200",
])
let values = invocation.parsedValues
#expect(invocation.path == ["inspect-ui"])
#expect(values.options["appTarget"] == ["TextEdit"])
#expect(values.options["maxElements"] == ["200"])
}
@Test
@MainActor
func `Commander program resolves capture action command tail`() throws {
let descriptors = CommanderRegistryBuilder.buildDescriptors()
let program = Program(descriptors: descriptors.map(\.metadata))
let invocation = try program.resolve(argv: [
"peekaboo",
"capture",
"action",
"--duration-limit", "3",
"--",
"echo",
"hello",
"--flag",
])
let values = invocation.parsedValues
#expect(invocation.path == ["capture", "action"])
#expect(values.options["durationLimit"] == ["3"])
#expect(values.options["command"] == ["echo", "hello", "--flag"])
}
@Test
@MainActor
func `Commander router resolves agent permission alias before task argument`() throws {

View File

@ -19,4 +19,14 @@ struct CommanderRuntimeRouterHelpPathTests {
}
#expect(exitCode == .success)
}
@Test
func `help tokens after double dash stay in capture action command tail`() throws {
let resolved = try CommanderRuntimeRouter.resolve(
argv: ["peekaboo", "capture", "action", "--", "/bin/echo", "--help"]
)
#expect(ObjectIdentifier(resolved.type) == ObjectIdentifier(CaptureActionCommand.self))
#expect(resolved.parsedValues.options["command"] == ["/bin/echo", "--help"])
}
}

View File

@ -46,7 +46,8 @@ struct FocusErrorMappingTests {
let envelope = PeekabooBridgeErrorEnvelope(
code: .permissionDenied,
message: "Operation captureScreen is not allowed with current permissions",
permission: .screenRecording)
permission: .screenRecording
)
#expect(errorCode(for: envelope) == .PERMISSION_ERROR_SCREEN_RECORDING)
}
@ -56,7 +57,8 @@ struct FocusErrorMappingTests {
let envelope = PeekabooBridgeErrorEnvelope(
code: .permissionDenied,
message: "Operation captureArea is not allowed with current permissions",
permission: .screenRecording)
permission: .screenRecording
)
#expect(errorMessage(for: envelope) == "Operation captureArea is not allowed with current permissions")
#expect(!errorMessage(for: envelope).contains("PeekabooBridgeErrorEnvelope error"))
@ -68,7 +70,8 @@ struct FocusErrorMappingTests {
code: .internalError,
message: "Bridge operation failed",
details: "Screen capture service rejected the request",
permission: .screenRecording)
permission: .screenRecording
)
let details = errorDetails(for: envelope)
#expect(details?.contains("Screen capture service rejected the request") == true)

View File

@ -0,0 +1,78 @@
import Commander
import Testing
@testable import PeekabooCLI
struct MCPWrapperCommandBindingTests {
@Test
func `Browser command binding`() throws {
let parsed = ParsedValues(
positional: ["navigate"],
options: [
"channel": ["chrome"],
"url": ["https://example.com"],
"timeout": ["5000"],
"types": ["error,warning", "info"],
"resourceTypes": ["script", "xhr"],
],
flags: ["background", "includeSnapshot", "noReload"]
)
let command = try CommanderCLIBinder.instantiateCommand(ofType: BrowserCommand.self, parsedValues: parsed)
#expect(command.action == "navigate")
#expect(command.channel == "chrome")
#expect(command.url == "https://example.com")
#expect(command.timeout == 5000)
#expect(command.types == ["error", "warning", "info"])
#expect(command.resourceTypes == ["script", "xhr"])
#expect(command.background == true)
#expect(command.includeSnapshot == true)
#expect(command.noReload == true)
}
@Test
func `Browser command requires remote browser MCP capability`() throws {
let command = try CommanderCLIBinder.instantiateCommand(
ofType: BrowserCommand.self,
parsedValues: ParsedValues(positional: [], options: [:], flags: [])
)
#expect(command.runtimeOptions.requiresBrowserMCP == true)
}
@Test
func `Browser command defaults to status`() throws {
let parsed = ParsedValues(positional: [], options: [:], flags: [])
let command = try CommanderCLIBinder.instantiateCommand(ofType: BrowserCommand.self, parsedValues: parsed)
#expect(command.action == "status")
}
@Test
func `Inspect UI command binding`() throws {
let parsed = ParsedValues(
positional: [],
options: [
"appTarget": ["TextEdit"],
"snapshot": ["snapshot-123"],
"maxDepth": ["4"],
"maxElements": ["200"],
"maxChildren": ["20"],
],
flags: []
)
let command = try CommanderCLIBinder.instantiateCommand(ofType: InspectUICommand.self, parsedValues: parsed)
#expect(command.appTarget == "TextEdit")
#expect(command.snapshot == "snapshot-123")
#expect(command.maxDepth == 4)
#expect(command.maxElements == 200)
#expect(command.maxChildren == 20)
}
@Test
func `Inspect UI command requires remote inspect capability`() throws {
let command = try CommanderCLIBinder.instantiateCommand(
ofType: InspectUICommand.self,
parsedValues: ParsedValues(positional: [], options: [:], flags: [])
)
#expect(command.runtimeOptions.requiresInspectAccessibilityTree == true)
}
}

View File

@ -10,7 +10,7 @@ struct ToolsCommandTests {
let config = ToolsCommand.commandDescription
#expect(config.commandName == "tools")
#expect(config.abstract == "List the MCP/agent tool catalog (not CLI commands)")
#expect(config.abstract == "List the MCP/agent tool catalog")
#expect(config.discussion != nil)
let discussion = config.discussion ?? ""
#expect(discussion.contains("Examples:"))

View File

@ -4,6 +4,8 @@
### Added
- MCP now exposes the bounded `capture` tool for live/video frame capture, contact sheets, metadata, and optional MP4 output. Thanks @coygeek for #169.
- Added dedicated CLI wrappers for MCP-only browser/page and accessibility-tree inspection via `peekaboo browser` and `peekaboo inspect-ui`. Thanks @coygeek for #173.
- Added `peekaboo capture action`, which records adaptive live capture around a child command with pre-roll, post-roll, timeout, artifact validation, and optional MP4 output. Thanks @coygeek for #171.
### Changed
- Documented background vs. foreground input delivery across the README, automation guide, quickstart, permissions, and interaction command docs.

View File

@ -171,7 +171,7 @@ extension WatchCaptureSession {
}
func shouldEndSession(elapsedNs: UInt64, durationNs: UInt64) -> Bool {
elapsedNs >= durationNs
self.hasStopRequest() || elapsedNs >= durationNs
}
func hitFrameCap() -> Bool {
@ -269,9 +269,22 @@ extension WatchCaptureSession {
func sleep(ns: UInt64, since start: Date) async throws {
// Video input already has intrinsic cadence; do not add wall-clock throttling.
if self.frameSource != nil { return }
if self.hasStopRequest() { return }
let elapsed = UInt64(Date().timeIntervalSince(start) * 1_000_000_000)
if ns > elapsed {
try await Task.sleep(nanoseconds: ns - elapsed)
guard ns > elapsed else { return }
try Task.checkCancellation()
try await withThrowingTaskGroup(of: Void.self) { group in
group.addTask {
try await Task.sleep(nanoseconds: ns - elapsed)
}
group.addTask { [weak self] in
await self?.waitForStopRequest()
}
_ = try await group.next()
group.cancelAll()
try Task.checkCancellation()
}
}
}

View File

@ -62,6 +62,66 @@ public struct WatchCaptureConfiguration {
}
}
private final class WatchCaptureStopSignal: @unchecked Sendable {
private let lock = NSLock()
private var requested = false
private var continuations: [UUID: CheckedContinuation<Void, Never>] = [:]
func request() {
self.lock.lock()
guard !self.requested else {
self.lock.unlock()
return
}
self.requested = true
let continuations = self.continuations.values
self.continuations.removeAll()
self.lock.unlock()
for continuation in continuations {
continuation.resume()
}
}
func isRequested() -> Bool {
self.lock.lock()
defer { self.lock.unlock() }
return self.requested
}
func wait() async {
if self.isRequested() { return }
let id = UUID()
await withTaskCancellationHandler {
await withCheckedContinuation { continuation in
let shouldResume: Bool
self.lock.lock()
if self.requested || Task.isCancelled {
shouldResume = true
} else {
shouldResume = false
self.continuations[id] = continuation
}
self.lock.unlock()
if shouldResume {
continuation.resume()
}
}
} onCancel: {
self.cancelWait(id)
}
}
private func cancelWait(_ id: UUID) {
self.lock.lock()
let continuation = self.continuations.removeValue(forKey: id)
self.lock.unlock()
continuation?.resume()
}
}
/// Adaptive PNG capture session for agents.
@MainActor
public final class WatchCaptureSession {
@ -91,6 +151,7 @@ public final class WatchCaptureSession {
var warnings: [CaptureWarning] = []
var framesDropped: Int = 0
var totalBytes: Int = 0
private let stopSignal = WatchCaptureStopSignal()
public init(dependencies: WatchCaptureDependencies, configuration: WatchCaptureConfiguration) {
let regionValidator = WatchCaptureRegionValidator(screenService: dependencies.screenService)
@ -163,4 +224,16 @@ public final class WatchCaptureSession {
try self.store.writeJSON(metadata, to: metadataURL)
return metadata
}
public func requestStop() {
self.stopSignal.request()
}
func hasStopRequest() -> Bool {
self.stopSignal.isRequested()
}
func waitForStopRequest() async {
await self.stopSignal.wait()
}
}

View File

@ -253,6 +253,98 @@ struct WatchCaptureSessionTests {
#expect(result.warnings.contains { $0.code == .sizeCap })
}
@Test
@MainActor
func `Stop request wakes cadence sleep`() async throws {
let png = Self.makePNG(size: CGSize(width: 20, height: 20))
let capture = StubScreenCaptureService(result: png, size: CGSize(width: 20, height: 20))
let screens = StubScreenService()
let output = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
.appendingPathComponent("watch-stop-\(UUID().uuidString)", isDirectory: true)
defer { try? FileManager.default.removeItem(at: output) }
let options = WatchCaptureOptions(
duration: 30,
idleFps: 0.1,
activeFps: 1,
changeThresholdPercent: 100,
heartbeatSeconds: 0,
quietMsToIdle: 0,
maxFrames: 10,
maxMegabytes: nil,
highlightChanges: false,
captureFocus: .auto,
resolutionCap: nil,
diffStrategy: .fast,
diffBudgetMs: nil)
let session = WatchCaptureSession(
dependencies: WatchCaptureDependencies(screenCapture: capture, screenService: screens),
configuration: WatchCaptureConfiguration(
scope: WatchScope(kind: .frontmost),
options: options,
outputRoot: output,
autoclean: WatchAutocleanConfig(minutes: 1, managed: false)))
let task = Task { @MainActor in
try await session.run()
}
try await Task.sleep(nanoseconds: 200_000_000)
let stopStarted = Date()
session.requestStop()
let result = try await task.value
#expect(result.frames.count >= 1)
#expect(Date().timeIntervalSince(stopStarted) < 1)
}
@Test
@MainActor
func `Task cancellation wakes cadence sleep`() async throws {
let png = Self.makePNG(size: CGSize(width: 20, height: 20))
let capture = StubScreenCaptureService(result: png, size: CGSize(width: 20, height: 20))
let screens = StubScreenService()
let output = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
.appendingPathComponent("watch-cancel-\(UUID().uuidString)", isDirectory: true)
defer { try? FileManager.default.removeItem(at: output) }
let options = WatchCaptureOptions(
duration: 30,
idleFps: 0.1,
activeFps: 1,
changeThresholdPercent: 100,
heartbeatSeconds: 0,
quietMsToIdle: 0,
maxFrames: 10,
maxMegabytes: nil,
highlightChanges: false,
captureFocus: .auto,
resolutionCap: nil,
diffStrategy: .fast,
diffBudgetMs: nil)
let session = WatchCaptureSession(
dependencies: WatchCaptureDependencies(screenCapture: capture, screenService: screens),
configuration: WatchCaptureConfiguration(
scope: WatchScope(kind: .frontmost),
options: options,
outputRoot: output,
autoclean: WatchAutocleanConfig(minutes: 1, managed: false)))
let task = Task { @MainActor in
try await session.run()
}
try await Task.sleep(nanoseconds: 200_000_000)
let cancelStarted = Date()
task.cancel()
do {
_ = try await task.value
Issue.record("Expected cancellation to propagate")
} catch is CancellationError {
#expect(Date().timeIntervalSince(cancelStarted) < 1)
}
}
@Test
@MainActor
func `Frame provider prefers stable window id when present`() async throws {

View File

@ -8,7 +8,7 @@ read_when:
# Browser Tool (Chrome DevTools MCP)
Peekaboo exposes a native `browser` tool that brokers Chrome DevTools MCP. Use it for Chrome page content:
Peekaboo exposes a native `browser` tool that brokers Chrome DevTools MCP. Agents call it through MCP, and scripts can use the dedicated `peekaboo browser` CLI wrapper. Use it for Chrome page content:
- DOM/accessibility snapshots
- page-level click/fill/type/navigation
@ -89,6 +89,18 @@ Advanced escape hatch:
## Examples
CLI:
```bash
peekaboo browser status --json
peekaboo browser connect --channel chrome
peekaboo browser navigate --url https://example.com
peekaboo browser snapshot --path /tmp/page.txt
peekaboo browser network --resource-type xhr --page-size 20 --json
```
MCP JSON:
```json
{ "action": "status" }
```

View File

@ -7,7 +7,7 @@ read_when:
# CLI Command Reference
Peekaboos CLI covers most of what agents can do; a few capabilities (notably `browser` and `inspect_ui`) are currently exposed only as MCP/agent tools via `peekaboo mcp` and have no top-level CLI command. Run `peekaboo tools` to see the MCP/agent catalog and `peekaboo --help` for the CLI command list. Commands share the same snapshot cache and most support `--json` (alias: `--json-output`) for scripting. Run `peekaboo` with no arguments to print the root help menu, and `peekaboo --version` at any time to see the embedded build/commit metadata that Poltergeist stamped into the binary.
Peekaboos CLI covers most of what agents can do, and selected MCP/agent tools also have dedicated per-tool CLI wrappers such as `browser` and `inspect-ui`. Run `peekaboo tools` to see the MCP/agent catalog and `peekaboo --help` for the CLI command list. Commands share the same snapshot cache and most support `--json` (alias: `--json-output`) for scripting. Run `peekaboo` with no arguments to print the root help menu, and `peekaboo --version` at any time to see the embedded build/commit metadata that Poltergeist stamped into the binary.
Use `peekaboo <command> --help` for inline flag descriptions; this page links to the authoritative docs in `docs/commands/`.
@ -15,9 +15,9 @@ Use `peekaboo <command> --help` for inline flag descriptions; this page links to
- [`see`](commands/see.md) Capture annotated UI maps, produce snapshot IDs, and optionally run AI analysis.
- [`image`](commands/image.md) Save raw PNG/JPG captures of screens, windows, or menu bar regions; supports `--analyze` prompts.
- `capture` Long-running capture. `capture live` (adaptive PNG frames) replaces watch; `capture video` ingests a video and samples frames. Outputs frames, contact sheet, metadata, optional MP4.
- `capture` Long-running capture. `capture live` (adaptive PNG frames) replaces watch; `capture action` records around a child command; `capture video` ingests a video and samples frames. Outputs frames, contact sheet, metadata, optional MP4.
- [`list`](commands/list.md) Subcommands: `apps`, `windows`, `screens`, `menubar`, `permissions`.
- [`tools`](commands/tools.md) List the MCP/agent tool catalog (not CLI commands); supports `--verbose` and `--json`.
- [`tools`](commands/tools.md) List the MCP/agent tool catalog; supports `--verbose` and `--json`.
- [`completions`](commands/completions.md) Generate shell-native completions for zsh, bash, and fish from Commander metadata.
- [`run`](commands/run.md) Execute `.peekaboo.json` scripts (`--output`, `--no-fail-fast`).
- [`sleep`](commands/sleep.md) Millisecond pauses between steps.
@ -54,6 +54,8 @@ Use `peekaboo <command> --help` for inline flag descriptions; this page links to
## Automation & Integrations
- [`agent`](commands/agent.md) Natural-language automation with dry-run planning, resume, audio modes, and model overrides.
- [`browser`](browser-mcp.md) Dedicated CLI wrapper for the browser MCP tool: Chrome page status/connect/navigation/snapshot/click/fill/type/console/network/screenshot/trace.
- `inspect-ui` Dedicated CLI wrapper for the `inspect_ui` MCP tool, useful for accessibility-tree text/control inspection without screenshots.
- [`mcp`](commands/mcp.md) `serve`, `list`, `add`, `remove`, `enable`, `disable`, `info`, `test`, `call`, `inspect` (stub) for Model Context Protocol workflows.
Need structured payloads? Pass `--json` (or `--json-output`) where supported, or orchestrate multiple commands inside `.peekaboo.json` scripts executed via [`peekaboo run`](commands/run.md).

View File

@ -7,9 +7,10 @@ read_when:
# `peekaboo capture`
`capture` replaces `watch` as the unified long-running capture tool. It has two subcommands:
`capture` replaces `watch` as the unified long-running capture tool. It has three subcommands:
- `capture live` — adaptive PNG burst capture of screens/windows/regions with idle/active FPS, diff-based frame keeping, contact sheet, and metadata.
- `capture action` — start adaptive live capture, run a child command, keep post-roll, stop early, and validate output artifacts.
- `capture video` — ingest an existing video, sample frames (by FPS or interval), optionally skip diff filtering, and emit the same outputs.
The MCP server exposes the same primitive as the `capture` tool. MCP arguments use snake_case names such as `duration_seconds`, `active_fps`, `threshold_percent`, `output_dir`, and `video_out`.
@ -32,6 +33,13 @@ For `capture video`, `metadata.json` and JSON stdout include `options.video` wit
- Diff/output: `--highlight-changes`, `--resolution-cap` (default 1440), `--diff-strategy fast|quality`, `--diff-budget-ms`, `--video-out <path>`
- Paths: `--path <dir>` (default temp `capture-sessions/capture-<uuid>`), `--autoclean-minutes` (default 120)
## `capture action` flags
- Targeting/focus/cadence/caps/output: same as `capture live`, except `--duration` is replaced by `--duration-limit` (default 60, max 180).
- Action timing: `--pre-roll-ms` (default 250), `--post-roll-ms` (default 500), `--action-timeout` (defaults to the remaining duration after roll time).
- Command: pass the child command after `--`, e.g. `peekaboo capture action -- echo smoke`. Commander also accepts `--command -- echo smoke`, but the `--` form is clearer for commands with their own flags.
The command exits non-zero if the child command exits non-zero, times out, or required capture artifacts are missing/empty. JSON output includes the child command exit code/stdout/stderr, the normal `CaptureResult`, and artifact validation details.
## `capture video` flags
- Required: `--input <video>` (positional `input` argument)
- Sampling: `--sample-fps <fps>` (default 2) XOR `--every-ms <ms>`
@ -53,6 +61,9 @@ peekaboo capture live --mode screen --screen-index 1 --video-out /tmp/capture.mp
# Live, record an explicit desktop region; --region also infers area mode
peekaboo capture live --region 100,120,640,360 --duration 10
# Capture a command-driven flow with pre/post-roll and JSON proof
peekaboo capture action --duration-limit 10 --json -- ./test-flow.sh --smoke
# Video ingest, sample 2 fps, trim first 5s
peekaboo capture video /path/to/demo.mov --sample-fps 2 --start-ms 5000 --video-out /tmp/demo.mp4
@ -63,6 +74,7 @@ peekaboo capture video /path/to/demo.mov --every-ms 500 --no-diff
## Design notes
- Hidden alias: `capture watch` maps to `capture live`; the old standalone `watch` tool was removed.
- Live defaults: max duration 180s, `--max-frames` 800, resolution cap 1440, diff strategy `fast` unless `--diff-strategy quality` is set.
- Action capture uses the same live sampler and can stop it early once the child command and post-roll complete.
- Video ingest uses the same diff/keep logic as live; `--no-diff` keeps every sampled frame. When no motion is detected, you may end up with a single kept frame plus a `noMotion` warning.
- Core types: `CaptureScope/Options/Result` with a pluggable `CaptureFrameSource` (ScreenCapture for live, AVAssetReader for video). Optional MP4 is written by `VideoWriter` when `--video-out` is set.
- Quick smokes:

View File

@ -7,7 +7,7 @@ read_when:
# `peekaboo tools`
`peekaboo tools` prints the MCP/agent tool catalog that `peekaboo mcp` exposes (Image, See, Click, Window, Browser, Inspect UI, etc.). These names are the tools available to agents and MCP clients — they are **not** the same as top-level CLI subcommands. Run `peekaboo --help` for the CLI command list, or invoke MCP-only tools through `peekaboo mcp` / an attached MCP client.
`peekaboo tools` prints the MCP/agent tool catalog that `peekaboo mcp` exposes (Image, See, Click, Window, Browser, Inspect UI, etc.). These names are the tools available to agents and MCP clients. Some tools also have dedicated top-level CLI wrappers, including `peekaboo browser` and `peekaboo inspect-ui`; run `peekaboo --help` for the full CLI command list.
## Key options
| Flag | Description |
@ -20,7 +20,7 @@ read_when:
- The command and MCP server both use `MCPToolCatalog`, so tool additions only need to be registered once.
- Allow/deny filtering happens before formatting (`ToolFiltering.apply`), so the output matches MCP server behavior.
- Input-strategy availability filtering also runs before formatting, so action-only tools are hidden when the current policy cannot support them.
- The command runs locally by default because it only reports the static native catalog; pass `--bridge-socket <path>` only when you need to inspect a specific bridge host.
- The command runs locally by default because it only reports the static native catalog; use per-tool wrappers or an attached MCP client to execute tools.
- Because the command implements `RuntimeOptionsConfigurable`, it respects global `--json`/`--verbose` flags even when invoked from other commands (e.g., `peekaboo learn` can embed the summaries verbatim).
## Examples