feat(mcp): expose capture tool
This commit is contained in:
parent
87d4721e29
commit
3608d9c782
@ -118,10 +118,10 @@ struct CLIRuntimeSmokeTests {
|
||||
#expect(tools?.isEmpty == false)
|
||||
#expect((dataPayload?["count"] as? Int ?? 0) > 0)
|
||||
#expect(names.contains("clipboard"))
|
||||
#expect(names.contains("capture"))
|
||||
#expect(names.contains("paste"))
|
||||
#expect(names.contains("set_value"))
|
||||
#expect(names.contains("perform_action"))
|
||||
#expect(!names.contains("capture"))
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@ -2,6 +2,9 @@
|
||||
|
||||
## [3.3.1] - Unreleased
|
||||
|
||||
### Added
|
||||
- MCP now exposes the bounded `capture` tool for live/video frame capture, contact sheets, metadata, and optional MP4 output. Thanks @coygeek for #169.
|
||||
|
||||
### Changed
|
||||
- Documented background vs. foreground input delivery across the README, automation guide, quickstart, permissions, and interaction command docs.
|
||||
- Clarified that `peekaboo tools` lists the MCP/agent tool catalog rather than top-level CLI commands. Thanks @lonexreb for #174.
|
||||
|
||||
@ -6,8 +6,7 @@ import PeekabooFoundation
|
||||
/// Frame source that samples frames from a video asset.
|
||||
public final class VideoFrameSource: CaptureFrameSource {
|
||||
private let generator: AVAssetImageGenerator
|
||||
private let times: [CMTime]
|
||||
private var index: Int = 0
|
||||
private var timeline: VideoFrameTimeline
|
||||
private let mode: CaptureMode = .screen
|
||||
public let effectiveFPS: Double
|
||||
|
||||
@ -40,22 +39,16 @@ public final class VideoFrameSource: CaptureFrameSource {
|
||||
interval = CMTime(milliseconds: everyMs)
|
||||
self.effectiveFPS = everyMs > 0 ? min(240, max(0.1, 1000.0 / Double(everyMs))) : 2.0
|
||||
} else {
|
||||
let fps = sampleFps ?? 2.0
|
||||
let fps = min(240, max(sampleFps ?? 2.0, 0.1))
|
||||
interval = CMTime(seconds: 1.0 / max(fps, 0.1), preferredTimescale: 1_000_000)
|
||||
self.effectiveFPS = fps
|
||||
}
|
||||
|
||||
var cursor = start
|
||||
var requested: [CMTime] = []
|
||||
while cursor <= end {
|
||||
requested.append(cursor)
|
||||
cursor = CMTimeAdd(cursor, interval)
|
||||
}
|
||||
if requested.count < 2 {
|
||||
requested.append(end)
|
||||
}
|
||||
self.timeline = VideoFrameTimeline(
|
||||
start: start,
|
||||
end: end,
|
||||
interval: interval)
|
||||
|
||||
self.times = requested
|
||||
self.generator = AVAssetImageGenerator(asset: asset)
|
||||
self.generator.appliesPreferredTrackTransform = true
|
||||
self.generator.requestedTimeToleranceBefore = .zero
|
||||
@ -67,9 +60,7 @@ public final class VideoFrameSource: CaptureFrameSource {
|
||||
|
||||
@MainActor
|
||||
public func nextFrame() async throws -> (cgImage: CGImage?, metadata: CaptureMetadata)? {
|
||||
guard self.index < self.times.count else { return nil }
|
||||
let time = self.times[self.index]
|
||||
self.index += 1
|
||||
guard let time = self.timeline.next() else { return nil }
|
||||
|
||||
var actual = CMTime.zero
|
||||
do {
|
||||
@ -108,6 +99,37 @@ public final class VideoFrameSource: CaptureFrameSource {
|
||||
}
|
||||
}
|
||||
|
||||
struct VideoFrameTimeline {
|
||||
private var nextTime: CMTime
|
||||
private let end: CMTime
|
||||
private let interval: CMTime
|
||||
private var exhausted = false
|
||||
|
||||
init(start: CMTime, end: CMTime, interval: CMTime) {
|
||||
self.nextTime = start
|
||||
self.end = end
|
||||
self.interval = interval
|
||||
}
|
||||
|
||||
mutating func next() -> CMTime? {
|
||||
guard !self.exhausted else { return nil }
|
||||
let current = self.nextTime
|
||||
if current >= self.end {
|
||||
self.exhausted = true
|
||||
return current
|
||||
}
|
||||
|
||||
let next = CMTimeAdd(current, self.interval)
|
||||
guard next.isNumeric, next > current else {
|
||||
self.exhausted = true
|
||||
return current
|
||||
}
|
||||
|
||||
self.nextTime = next >= self.end ? self.end : next
|
||||
return current
|
||||
}
|
||||
}
|
||||
|
||||
extension CMTime {
|
||||
fileprivate init(milliseconds: Int) {
|
||||
self.init(value: CMTimeValue(milliseconds), timescale: 1000)
|
||||
|
||||
@ -15,8 +15,9 @@ struct WatchCaptureSessionStore {
|
||||
|
||||
func performAutoclean() -> WatchWarning? {
|
||||
guard self.managedAutoclean else { return nil }
|
||||
guard self.autocleanMinutes > 0 else { return nil }
|
||||
let root = self.outputRoot.deletingLastPathComponent()
|
||||
guard root.lastPathComponent == "watch-sessions" else { return nil }
|
||||
guard Self.autocleanRootNames.contains(root.lastPathComponent) else { return nil }
|
||||
guard let contents = try? self.fileManager.contentsOfDirectory(
|
||||
at: root,
|
||||
includingPropertiesForKeys: [.contentModificationDateKey],
|
||||
@ -26,6 +27,7 @@ struct WatchCaptureSessionStore {
|
||||
let deadline = Date().addingTimeInterval(TimeInterval(-self.autocleanMinutes) * 60)
|
||||
var removed = 0
|
||||
for url in contents {
|
||||
guard url.standardizedFileURL != self.outputRoot.standardizedFileURL else { continue }
|
||||
guard let attrs = try? url.resourceValues(forKeys: [.contentModificationDateKey]),
|
||||
let modified = attrs.contentModificationDate else { continue }
|
||||
if modified < deadline {
|
||||
@ -38,7 +40,7 @@ struct WatchCaptureSessionStore {
|
||||
guard removed > 0 else { return nil }
|
||||
return WatchWarning(
|
||||
code: .autoclean,
|
||||
message: "Autoclean removed \(removed) old watch sessions",
|
||||
message: "Autoclean removed \(removed) old capture sessions",
|
||||
details: ["session": self.sessionId])
|
||||
}
|
||||
|
||||
@ -46,4 +48,6 @@ struct WatchCaptureSessionStore {
|
||||
let data = try JSONEncoder().encode(value)
|
||||
try data.write(to: url, options: .atomic)
|
||||
}
|
||||
|
||||
private static let autocleanRootNames: Set<String> = ["watch-sessions", "capture-sessions"]
|
||||
}
|
||||
|
||||
@ -25,6 +25,7 @@ public enum MCPToolCatalog {
|
||||
[
|
||||
// Core tools
|
||||
ImageTool(context: context),
|
||||
CaptureTool(context: context),
|
||||
AnalyzeTool(),
|
||||
BrowserTool(context: context),
|
||||
ListTool(context: context),
|
||||
|
||||
@ -3,7 +3,34 @@ import PeekabooAutomationKit
|
||||
|
||||
enum CaptureMetaBuilder {
|
||||
static func buildMeta(from summary: CaptureMetaSummary) -> Value {
|
||||
let meta: [String: Value] = [
|
||||
.object(self.summaryMeta(from: summary))
|
||||
}
|
||||
|
||||
static func buildMeta(from result: CaptureSessionResult) -> Value {
|
||||
var meta = self.summaryMeta(from: .make(from: result))
|
||||
meta["source"] = .string(result.source.rawValue)
|
||||
if let videoIn = result.videoIn {
|
||||
meta["video_in"] = .string(videoIn)
|
||||
}
|
||||
if let videoOut = result.videoOut {
|
||||
meta["video_out"] = .string(videoOut)
|
||||
}
|
||||
meta["stats"] = .object([
|
||||
"duration_ms": .int(result.stats.durationMs),
|
||||
"fps_idle": .double(result.stats.fpsIdle),
|
||||
"fps_active": .double(result.stats.fpsActive),
|
||||
"fps_effective": .double(result.stats.fpsEffective),
|
||||
"frames_kept": .int(result.stats.framesKept),
|
||||
"frames_dropped": .int(result.stats.framesDropped),
|
||||
"max_frames_hit": .bool(result.stats.maxFramesHit),
|
||||
"max_mb_hit": .bool(result.stats.maxMbHit),
|
||||
])
|
||||
meta["warnings"] = .array(result.warnings.map(self.warningMeta))
|
||||
return .object(meta)
|
||||
}
|
||||
|
||||
private static func summaryMeta(from summary: CaptureMetaSummary) -> [String: Value] {
|
||||
[
|
||||
"frames": .array(summary.frames.map { .string($0) }),
|
||||
"contact": .string(summary.contactPath),
|
||||
"metadata": .string(summary.metadataPath),
|
||||
@ -15,6 +42,16 @@ enum CaptureMetaBuilder {
|
||||
"contact_thumb_height": .string("\(summary.contactThumbSize.height)"),
|
||||
"contact_sampled_indexes": .array(summary.contactSampledIndexes.map { .string("\($0)") }),
|
||||
]
|
||||
}
|
||||
|
||||
private static func warningMeta(_ warning: CaptureWarning) -> Value {
|
||||
var meta: [String: Value] = [
|
||||
"code": .string(warning.code.rawValue),
|
||||
"message": .string(warning.message),
|
||||
]
|
||||
if let details = warning.details {
|
||||
meta["details"] = .object(details.mapValues(Value.string))
|
||||
}
|
||||
return .object(meta)
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,11 +18,11 @@ struct CaptureRequest {
|
||||
let videoOut: String?
|
||||
|
||||
init(arguments: ToolArguments, windows: any WindowManagementServiceProtocol) async throws {
|
||||
let input = try arguments.decode(CaptureInput.self)
|
||||
let input = try CaptureInput(arguments: arguments)
|
||||
self.source = try CaptureToolArgumentResolver.source(from: input.source)
|
||||
|
||||
let constraints = try CaptureRequest.constraints(from: input)
|
||||
let outputDir = if let dir = input.output_dir {
|
||||
let outputDir = if let dir = input.outputDir {
|
||||
CaptureToolPathResolver.outputDirectory(from: dir)
|
||||
} else {
|
||||
URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
|
||||
@ -30,7 +30,7 @@ struct CaptureRequest {
|
||||
}
|
||||
self.outputDirectory = outputDir
|
||||
self.autocleanMinutes = input.autocleanMinutes ?? 120
|
||||
self.usesDefaultOutput = input.output_dir == nil
|
||||
self.usesDefaultOutput = input.outputDir == nil
|
||||
self.videoOut = CaptureToolPathResolver.filePath(from: input.videoOut)
|
||||
|
||||
switch self.source {
|
||||
@ -82,11 +82,11 @@ private struct CaptureInput: Codable {
|
||||
let mode: String?
|
||||
let app: String?
|
||||
let pid: Int?
|
||||
let window_title: String?
|
||||
let window_index: Int?
|
||||
let screen_index: Int?
|
||||
let windowTitle: String?
|
||||
let windowIndex: Int?
|
||||
let screenIndex: Int?
|
||||
let region: String?
|
||||
let capture_focus: String?
|
||||
let captureFocus: String?
|
||||
|
||||
let durationSeconds: Double?
|
||||
let idleFps: Double?
|
||||
@ -108,9 +108,16 @@ private struct CaptureInput: Codable {
|
||||
let resolutionCap: Double?
|
||||
let diffStrategy: String?
|
||||
let diffBudgetMs: Int?
|
||||
let output_dir: String?
|
||||
let outputDir: String?
|
||||
let autocleanMinutes: Int?
|
||||
let videoOut: String?
|
||||
|
||||
init(arguments: ToolArguments) throws {
|
||||
let data = try JSONEncoder().encode(arguments.rawValue)
|
||||
let decoder = JSONDecoder()
|
||||
decoder.keyDecodingStrategy = .convertFromSnakeCase
|
||||
self = try decoder.decode(Self.self, from: data)
|
||||
}
|
||||
}
|
||||
|
||||
extension CaptureRequest {
|
||||
@ -131,8 +138,8 @@ extension CaptureRequest {
|
||||
{
|
||||
let modeStr = input.mode
|
||||
let explicitApp = input.app
|
||||
let windowTitle = input.window_title
|
||||
let windowIndex = input.window_index
|
||||
let windowTitle = input.windowTitle
|
||||
let windowIndex = input.windowIndex
|
||||
|
||||
let mode = try CaptureToolArgumentResolver.mode(
|
||||
from: modeStr,
|
||||
@ -141,7 +148,7 @@ extension CaptureRequest {
|
||||
|
||||
switch mode {
|
||||
case .screen:
|
||||
let screenIndex = input.screen_index
|
||||
let screenIndex = input.screenIndex
|
||||
return CaptureScope(
|
||||
kind: .screen,
|
||||
screenIndex: screenIndex,
|
||||
@ -190,7 +197,7 @@ extension CaptureRequest {
|
||||
let quiet = max(Int(input.quietMs ?? 1000), 0)
|
||||
let maxFrames = max(constraints.maxFrames, 1)
|
||||
let maxMbAdjusted = constraints.maxMb.flatMap { $0 > 0 ? $0 : nil }
|
||||
let focus = try CaptureToolArgumentResolver.captureFocus(from: input.capture_focus)
|
||||
let focus = try CaptureToolArgumentResolver.captureFocus(from: input.captureFocus)
|
||||
|
||||
return CaptureOptions(
|
||||
duration: duration,
|
||||
|
||||
@ -104,19 +104,28 @@ public struct CaptureTool: MCPTool {
|
||||
configuration: configuration)
|
||||
let result = try await session.run()
|
||||
|
||||
let summary = """
|
||||
capture kept \(result.stats.framesKept) frames (dropped \(result.stats.framesDropped)),
|
||||
contact sheet \(result.contactSheet.path)
|
||||
"""
|
||||
var summaryLines = [
|
||||
"capture kept \(result.stats.framesKept) frames (dropped \(result.stats.framesDropped))",
|
||||
"contact: \(result.contactSheet.path)",
|
||||
"metadata: \(result.metadataFile)",
|
||||
"frames: \(result.frames.count) files",
|
||||
]
|
||||
if let videoOut = result.videoOut {
|
||||
summaryLines.insert("video: \(videoOut)", at: 3)
|
||||
}
|
||||
if !result.warnings.isEmpty {
|
||||
let warnings = result.warnings.map(\.message).joined(separator: "; ")
|
||||
summaryLines.append("warnings: \(warnings)")
|
||||
}
|
||||
let summary = summaryLines.joined(separator: "\n")
|
||||
let meta = ToolEventSummary(
|
||||
actionDescription: "Capture",
|
||||
notes: summary)
|
||||
|
||||
let metaSummary = CaptureMetaSummary.make(from: result)
|
||||
return ToolResponse.text(
|
||||
summary,
|
||||
meta: ToolEventSummary.merge(
|
||||
summary: meta,
|
||||
into: CaptureMetaBuilder.buildMeta(from: metaSummary)))
|
||||
into: CaptureMetaBuilder.buildMeta(from: result)))
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
@preconcurrency import AVFoundation
|
||||
import CoreGraphics
|
||||
import Foundation
|
||||
import ImageIO
|
||||
@ -98,6 +99,65 @@ struct WatchCaptureSessionTests {
|
||||
#expect(result.boundingBoxes.count <= 5)
|
||||
}
|
||||
|
||||
@Test
|
||||
func `Video frame timeline samples lazily without precomputed frame cap`() {
|
||||
var timeline = VideoFrameTimeline(
|
||||
start: .zero,
|
||||
end: CMTime(seconds: 60, preferredTimescale: 1000),
|
||||
interval: CMTime(value: 1, timescale: 1000))
|
||||
|
||||
#expect(timeline.next() == .zero)
|
||||
#expect(timeline.next() == CMTime(value: 1, timescale: 1000))
|
||||
#expect(timeline.next() == CMTime(value: 2, timescale: 1000))
|
||||
}
|
||||
|
||||
@Test
|
||||
func `Autoclean removes old default capture sessions`() throws {
|
||||
let root = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
|
||||
.appendingPathComponent("peekaboo-autoclean-\(UUID().uuidString)", isDirectory: true)
|
||||
.appendingPathComponent("capture-sessions", isDirectory: true)
|
||||
let oldSession = root.appendingPathComponent("capture-old", isDirectory: true)
|
||||
let currentSession = root.appendingPathComponent("capture-current", isDirectory: true)
|
||||
try FileManager.default.createDirectory(at: oldSession, withIntermediateDirectories: true)
|
||||
try FileManager.default.createDirectory(at: currentSession, withIntermediateDirectories: true)
|
||||
try FileManager.default.setAttributes(
|
||||
[.modificationDate: Date().addingTimeInterval(-3600)],
|
||||
ofItemAtPath: oldSession.path)
|
||||
|
||||
let store = WatchCaptureSessionStore(
|
||||
outputRoot: currentSession,
|
||||
autocleanMinutes: 1,
|
||||
managedAutoclean: true,
|
||||
sessionId: "capture-current")
|
||||
let warning = store.performAutoclean()
|
||||
|
||||
#expect(warning?.code == .autoclean)
|
||||
#expect(!FileManager.default.fileExists(atPath: oldSession.path))
|
||||
#expect(FileManager.default.fileExists(atPath: currentSession.path))
|
||||
}
|
||||
|
||||
@Test
|
||||
func `Autoclean ignores non-positive retention and keeps current session`() throws {
|
||||
let root = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
|
||||
.appendingPathComponent("peekaboo-autoclean-current-\(UUID().uuidString)", isDirectory: true)
|
||||
.appendingPathComponent("capture-sessions", isDirectory: true)
|
||||
let currentSession = root.appendingPathComponent("capture-current", isDirectory: true)
|
||||
try FileManager.default.createDirectory(at: currentSession, withIntermediateDirectories: true)
|
||||
try FileManager.default.setAttributes(
|
||||
[.modificationDate: Date().addingTimeInterval(-3600)],
|
||||
ofItemAtPath: currentSession.path)
|
||||
|
||||
let store = WatchCaptureSessionStore(
|
||||
outputRoot: currentSession,
|
||||
autocleanMinutes: 0,
|
||||
managedAutoclean: true,
|
||||
sessionId: "capture-current")
|
||||
let warning = store.performAutoclean()
|
||||
|
||||
#expect(warning == nil)
|
||||
#expect(FileManager.default.fileExists(atPath: currentSession.path))
|
||||
}
|
||||
|
||||
@Test
|
||||
@MainActor
|
||||
func `Stops at max-frames cap and keeps first frame`() async throws {
|
||||
|
||||
@ -2,6 +2,7 @@ import Foundation
|
||||
import PeekabooAutomation
|
||||
import PeekabooAutomationKit
|
||||
import PeekabooFoundation
|
||||
import TachikomaMCP
|
||||
import Testing
|
||||
@testable import PeekabooAgentRuntime
|
||||
|
||||
@ -70,6 +71,53 @@ struct CaptureToolPathResolverTests {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
func `request decodes snake case MCP capture options`() async throws {
|
||||
let windows = CaptureWindowResolverWindowService(windows: [])
|
||||
|
||||
let request = try await CaptureRequest(arguments: ToolArguments(raw: [
|
||||
"source": "live",
|
||||
"mode": "area",
|
||||
"region": "1,2,30,40",
|
||||
"duration_seconds": 2.5,
|
||||
"idle_fps": 0.5,
|
||||
"active_fps": 3.0,
|
||||
"threshold_percent": 0.25,
|
||||
"heartbeat_sec": 0,
|
||||
"quiet_ms": 250,
|
||||
"capture_focus": "background",
|
||||
"highlight_changes": true,
|
||||
"max_frames": 3,
|
||||
"max_mb": 1,
|
||||
"resolution_cap": 320,
|
||||
"diff_strategy": "quality",
|
||||
"diff_budget_ms": 12,
|
||||
"output_dir": "~/Desktop/mcp-capture",
|
||||
"autoclean_minutes": 5,
|
||||
"video_out": "~/Desktop/mcp-capture.mp4",
|
||||
]), windows: windows)
|
||||
|
||||
#expect(request.source == .live)
|
||||
#expect(request.scope.kind == .region)
|
||||
#expect(request.scope.region == CGRect(x: 1, y: 2, width: 30, height: 40))
|
||||
#expect(request.options.duration == 2.5)
|
||||
#expect(request.options.idleFps == 0.5)
|
||||
#expect(request.options.activeFps == 3.0)
|
||||
#expect(request.options.changeThresholdPercent == 0.25)
|
||||
#expect(request.options.heartbeatSeconds == 0)
|
||||
#expect(request.options.quietMsToIdle == 250)
|
||||
#expect(request.options.captureFocus == .background)
|
||||
#expect(request.options.highlightChanges)
|
||||
#expect(request.options.maxFrames == 3)
|
||||
#expect(request.options.maxMegabytes == 1)
|
||||
#expect(request.options.resolutionCap == 320)
|
||||
#expect(request.options.diffStrategy == .quality)
|
||||
#expect(request.options.diffBudgetMs == 12)
|
||||
#expect(request.outputDirectory.path == NSString(string: "~/Desktop/mcp-capture").expandingTildeInPath)
|
||||
#expect(request.autocleanMinutes == 5)
|
||||
#expect(request.videoOut == NSString(string: "~/Desktop/mcp-capture.mp4").expandingTildeInPath)
|
||||
}
|
||||
|
||||
@Test
|
||||
func `window resolver maps app title selection to stable window id`() async throws {
|
||||
let windows = CaptureWindowResolverWindowService(windows: [
|
||||
|
||||
@ -183,13 +183,13 @@ struct MCPToolRegistryIntegrationTests {
|
||||
filters: noToolFilters)
|
||||
let names = Set(tools.map(\.name))
|
||||
|
||||
#expect(tools.count == 26)
|
||||
#expect(tools.count == 27)
|
||||
#expect(names.contains("clipboard"))
|
||||
#expect(names.contains("paste"))
|
||||
#expect(names.contains("set_value"))
|
||||
#expect(names.contains("perform_action"))
|
||||
#expect(names.contains("inspect_ui"))
|
||||
#expect(!names.contains("capture"))
|
||||
#expect(names.contains("capture"))
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -204,7 +204,7 @@ struct MCPToolRegistryIntegrationTests {
|
||||
filters: noToolFilters))
|
||||
|
||||
let tools = registry.allTools()
|
||||
#expect(tools.count == 26)
|
||||
#expect(tools.count == 27)
|
||||
|
||||
// Verify some key tools are present
|
||||
let imageToolExists = registry.tool(named: "image") != nil
|
||||
@ -212,12 +212,14 @@ struct MCPToolRegistryIntegrationTests {
|
||||
let agentToolExists = registry.tool(named: "agent") != nil
|
||||
let clipboardToolExists = registry.tool(named: "clipboard") != nil
|
||||
let inspectUIToolExists = registry.tool(named: "inspect_ui") != nil
|
||||
let captureToolExists = registry.tool(named: "capture") != nil
|
||||
|
||||
#expect(imageToolExists)
|
||||
#expect(clickToolExists)
|
||||
#expect(agentToolExists)
|
||||
#expect(clipboardToolExists)
|
||||
#expect(inspectUIToolExists)
|
||||
#expect(captureToolExists)
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@ -10,8 +10,9 @@ struct PeekabooMCPServerTests {
|
||||
let server = try await makeServer()
|
||||
let names = await server.registeredToolNamesForTesting()
|
||||
|
||||
#expect(names.count == 26)
|
||||
#expect(names.count == 27)
|
||||
#expect(names == names.sorted())
|
||||
#expect(names.contains("capture"))
|
||||
#expect(names.contains("image"))
|
||||
#expect(names.contains("inspect_ui"))
|
||||
#expect(names.contains("click"))
|
||||
@ -19,7 +20,6 @@ struct PeekabooMCPServerTests {
|
||||
#expect(names.contains("paste"))
|
||||
#expect(names.contains("set_value"))
|
||||
#expect(names.contains("perform_action"))
|
||||
#expect(!names.contains("capture"))
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@ -12,6 +12,8 @@ read_when:
|
||||
- `capture live` — adaptive PNG burst capture of screens/windows/regions with idle/active FPS, diff-based frame keeping, contact sheet, and metadata.
|
||||
- `capture video` — ingest an existing video, sample frames (by FPS or interval), optionally skip diff filtering, and emit the same outputs.
|
||||
|
||||
The MCP server exposes the same primitive as the `capture` tool. MCP arguments use snake_case names such as `duration_seconds`, `active_fps`, `threshold_percent`, `output_dir`, and `video_out`.
|
||||
|
||||
A hidden alias `capture watch` maps to `capture live` for backwards compatibility. The old standalone `watch` command/tool is removed.
|
||||
|
||||
## Common Outputs
|
||||
|
||||
@ -17,6 +17,7 @@ read_when:
|
||||
## Implementation notes
|
||||
- `serve` instantiates `PeekabooMCPServer` and maps the transport string to `PeekabooCore.TransportType`. Stdio is the default for Claude Code integrations.
|
||||
- HTTP/SSE server transports are stubbed; they currently throw “not implemented.”
|
||||
- The native tool catalog includes bounded `capture` for live screen/window/region recording or video ingest. It writes retained frames, `contact.png`, `metadata.json`, and optional MP4 output, so use tool allow/deny filters when exposing MCP to untrusted clients.
|
||||
- UI automation tools include action-first additions: `set_value` directly mutates a settable accessibility value, and `perform_action` invokes a named accessibility action on an element from `see`.
|
||||
- `click` preserves element IDs and queries when forwarding to automation, so action-first policy can use accessibility actions before synthetic fallback.
|
||||
|
||||
|
||||
@ -26,7 +26,7 @@ Use this checklist to exercise the Swift MCP server with mcporter. It mirrors th
|
||||
```
|
||||
$MCPORTER list --stdio "$PEEKABOO_BIN mcp serve" --name peekaboo-local --schema --timeout 30000
|
||||
```
|
||||
Expect: tool catalog prints Peekaboo-native tools (image, see, list, permissions, click, type, drag, window, menu, dock, space, swipe, hotkey, clipboard, agent, sleep). Any transport/auth errors here block the rest of the suite.
|
||||
Expect: tool catalog prints Peekaboo-native tools (image, capture, see, list, permissions, click, type, drag, window, menu, dock, space, swipe, hotkey, clipboard, agent, sleep). Any transport/auth errors here block the rest of the suite.
|
||||
|
||||
2) **Permissions sanity**
|
||||
```
|
||||
@ -57,7 +57,17 @@ Use this checklist to exercise the Swift MCP server with mcporter. It mirrors th
|
||||
```
|
||||
Expect `📸 Captured …` text plus a saved file path. Open the PNG to confirm the active window is captured without the shadow frame.
|
||||
|
||||
6) **Image + analysis (optional, needs AI keys)**
|
||||
6) **Bounded live capture smoke**
|
||||
```
|
||||
$MCPORTER call --stdio "$PEEKABOO_BIN mcp serve" --name peekaboo-local \
|
||||
capture source:live mode:area region:100,100,640,360 \
|
||||
duration_seconds:2 active_fps:4 threshold_percent:0 \
|
||||
output_dir:/tmp/peekaboo-mcp/live video_out:/tmp/peekaboo-mcp/live.mp4 \
|
||||
--timeout 45000
|
||||
```
|
||||
Expect kept-frame text plus `contact.png`, `metadata.json`, one or more frame PNGs, and a non-empty MP4 when `video_out` is set.
|
||||
|
||||
7) **Image + analysis (optional, needs AI keys)**
|
||||
```
|
||||
$MCPORTER call --stdio "$PEEKABOO_BIN mcp serve" --name peekaboo-local \
|
||||
image path:/tmp/peekaboo-mcp/frontmost-analysis.png format:png \
|
||||
@ -67,7 +77,7 @@ Use this checklist to exercise the Swift MCP server with mcporter. It mirrors th
|
||||
Expect an analysis paragraph plus `savedFiles` metadata; failures here usually mean provider config or permissions issues.
|
||||
Note: OpenAI Responses (GPT‑5.x) requires `image_url` to be a string (URL or data URL). Peekaboo normalizes legacy `{ url, detail }` objects internally, but upstream tools should prefer the string form to avoid 400s.
|
||||
|
||||
7) **List cached tools after reuse (daemon/keep-alive sanity)**
|
||||
8) **List cached tools after reuse (daemon/keep-alive sanity)**
|
||||
```
|
||||
$MCPORTER list --stdio "$PEEKABOO_BIN mcp serve" --name peekaboo-local --timeout 15000
|
||||
```
|
||||
|
||||
@ -56,6 +56,7 @@ If you disable the `clipboard` tool via allow/deny filters, the injected DESKTOP
|
||||
- Any audio capture path (`AudioInputService`, voice command helpers) that transcribes speech through `PeekabooAIService`.
|
||||
Disable by clearing `PEEKABOO_AI_PROVIDERS`, removing API keys, or adding these names to your deny list when running offline.
|
||||
- **Medium risk** – can manipulate apps or data
|
||||
- `capture`: records retained screen/window/region frames, contact sheets, metadata, and optional MP4 files. Disable it when MCP or agent clients should not persist screen contents.
|
||||
- `click`, `type`, `hotkey`, `press`, and `paste`: can trigger actions in foreground apps or send process-targeted events to a background app by default when a target process is known. Use `--foreground` for focused foreground delivery. Background delivery still requires macOS event-posting access and does not prove the target app handled the event.
|
||||
- `scroll`, `swipe`, `drag`, `move`: can trigger pointer actions in foreground apps.
|
||||
- `window`, `app`, `menu_click`, `dock_launch`, `space`: can close apps, move windows, switch spaces.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user