Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1657ecfb86 | ||
|
|
6909e0adf0 | ||
|
|
707151c793 | ||
|
|
c411fb2355 |
3
.gitignore
vendored
3
.gitignore
vendored
@ -164,6 +164,9 @@ Core/**/.swiftpm/
|
||||
.cache/
|
||||
debug
|
||||
|
||||
# Crush directory
|
||||
.crush/
|
||||
|
||||
# OS generated files
|
||||
Thumbs.db
|
||||
|
||||
|
||||
@ -13,6 +13,7 @@ let package = Package(
|
||||
],
|
||||
dependencies: [
|
||||
.package(url: "https://github.com/apple/swift-argument-parser", from: "1.0.0"),
|
||||
.package(url: "https://github.com/modelcontextprotocol/swift-sdk.git", from: "0.9.0"),
|
||||
.package(path: "../../Core/PeekabooCore"),
|
||||
],
|
||||
targets: [
|
||||
@ -20,6 +21,7 @@ let package = Package(
|
||||
name: "peekaboo",
|
||||
dependencies: [
|
||||
.product(name: "ArgumentParser", package: "swift-argument-parser"),
|
||||
.product(name: "MCP", package: "swift-sdk"),
|
||||
.product(name: "PeekabooCore", package: "PeekabooCore"),
|
||||
],
|
||||
swiftSettings: [
|
||||
|
||||
134
Apps/CLI/Sources/peekaboo/Commands/MCP/MCPCommand.swift
Normal file
134
Apps/CLI/Sources/peekaboo/Commands/MCP/MCPCommand.swift
Normal file
@ -0,0 +1,134 @@
|
||||
import ArgumentParser
|
||||
import Foundation
|
||||
import PeekabooCore
|
||||
import MCP
|
||||
import Logging
|
||||
|
||||
/// Command for Model Context Protocol server operations
|
||||
struct MCPCommand: AsyncParsableCommand {
|
||||
static let configuration = CommandConfiguration(
|
||||
commandName: "mcp",
|
||||
abstract: "Model Context Protocol server and client operations",
|
||||
discussion: """
|
||||
The MCP command allows Peekaboo to act as both an MCP server (exposing its tools
|
||||
to AI clients like Claude) and an MCP client (consuming other MCP servers).
|
||||
|
||||
EXAMPLES:
|
||||
peekaboo mcp serve # Start MCP server on stdio
|
||||
peekaboo mcp serve --transport http # HTTP transport (future)
|
||||
peekaboo mcp call <server> <tool> # Call tool on another MCP server
|
||||
peekaboo mcp list # List available MCP servers
|
||||
""",
|
||||
subcommands: [
|
||||
Serve.self,
|
||||
Call.self,
|
||||
List.self,
|
||||
Inspect.self,
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Subcommands
|
||||
|
||||
extension MCPCommand {
|
||||
/// Start MCP server
|
||||
struct Serve: AsyncParsableCommand {
|
||||
static let configuration = CommandConfiguration(
|
||||
abstract: "Start Peekaboo as an MCP server",
|
||||
discussion: """
|
||||
Starts Peekaboo as an MCP server, exposing all its tools via the
|
||||
Model Context Protocol. This allows AI clients like Claude to use
|
||||
Peekaboo's automation capabilities.
|
||||
|
||||
USAGE WITH CLAUDE CODE:
|
||||
claude mcp add peekaboo -- peekaboo mcp serve
|
||||
|
||||
USAGE WITH MCP INSPECTOR:
|
||||
npx @modelcontextprotocol/inspector peekaboo mcp serve
|
||||
"""
|
||||
)
|
||||
|
||||
@Option(help: "Transport type (stdio, http, sse)")
|
||||
var transport: String = "stdio"
|
||||
|
||||
@Option(help: "Port for HTTP/SSE transport")
|
||||
var port: Int = 8080
|
||||
|
||||
func run() async throws {
|
||||
do {
|
||||
// Convert string transport to PeekabooCore.TransportType
|
||||
let transportType: PeekabooCore.TransportType
|
||||
switch transport.lowercased() {
|
||||
case "stdio": transportType = .stdio
|
||||
case "http": transportType = .http
|
||||
case "sse": transportType = .sse
|
||||
default: transportType = .stdio
|
||||
}
|
||||
|
||||
let server = try await PeekabooMCPServer()
|
||||
try await server.serve(transport: transportType, port: port)
|
||||
} catch {
|
||||
Logger.shared.error("Failed to start MCP server: \(error)")
|
||||
throw ExitCode.failure
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Call tool on MCP server
|
||||
struct Call: AsyncParsableCommand {
|
||||
static let configuration = CommandConfiguration(
|
||||
abstract: "Call a tool on another MCP server",
|
||||
discussion: """
|
||||
Connect to another MCP server and execute a tool. This allows
|
||||
Peekaboo to consume services from other MCP servers.
|
||||
|
||||
EXAMPLE:
|
||||
peekaboo mcp call claude-code edit_file --args '{"path": "main.swift"}'
|
||||
"""
|
||||
)
|
||||
|
||||
@Argument(help: "MCP server to connect to")
|
||||
var server: String
|
||||
|
||||
@Option(help: "Tool to call")
|
||||
var tool: String
|
||||
|
||||
@Option(help: "Tool arguments as JSON")
|
||||
var args: String = "{}"
|
||||
|
||||
func run() async throws {
|
||||
Logger.shared.error("MCP client functionality not yet implemented")
|
||||
throw ExitCode.failure
|
||||
}
|
||||
}
|
||||
|
||||
/// List available MCP servers
|
||||
struct List: AsyncParsableCommand {
|
||||
static let configuration = CommandConfiguration(
|
||||
abstract: "List available MCP servers",
|
||||
discussion: "Shows configured MCP servers that can be connected to."
|
||||
)
|
||||
|
||||
func run() async throws {
|
||||
Logger.shared.error("MCP server listing not yet implemented")
|
||||
throw ExitCode.failure
|
||||
}
|
||||
}
|
||||
|
||||
/// Inspect MCP connection
|
||||
struct Inspect: AsyncParsableCommand {
|
||||
static let configuration = CommandConfiguration(
|
||||
abstract: "Debug MCP connections",
|
||||
discussion: "Provides debugging information for MCP connections."
|
||||
)
|
||||
|
||||
@Argument(help: "Server to inspect", completion: .default)
|
||||
var server: String?
|
||||
|
||||
func run() async throws {
|
||||
Logger.shared.error("MCP inspection not yet implemented")
|
||||
throw ExitCode.failure
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -135,6 +135,8 @@ struct Peekaboo: AsyncParsableCommand {
|
||||
SpaceCommand.self,
|
||||
// Agent commands
|
||||
AgentCommand.self,
|
||||
// MCP commands
|
||||
MCPCommand.self,
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@ let package = Package(
|
||||
],
|
||||
dependencies: [
|
||||
.package(url: "https://github.com/apple/swift-argument-parser", from: "1.3.0"),
|
||||
.package(url: "https://github.com/modelcontextprotocol/swift-sdk.git", from: "0.9.0"),
|
||||
.package(path: "../AXorcist"),
|
||||
],
|
||||
targets: [
|
||||
@ -21,6 +22,7 @@ let package = Package(
|
||||
name: "PeekabooCore",
|
||||
dependencies: [
|
||||
.product(name: "ArgumentParser", package: "swift-argument-parser"),
|
||||
.product(name: "MCP", package: "swift-sdk"),
|
||||
.product(name: "AXorcist", package: "AXorcist"),
|
||||
],
|
||||
exclude: [
|
||||
|
||||
@ -0,0 +1,206 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
/// Protocol defining the interface for MCP tools
|
||||
public protocol MCPTool: Sendable {
|
||||
/// The unique name of the tool
|
||||
var name: String { get }
|
||||
|
||||
/// A human-readable description of what the tool does
|
||||
var description: String { get }
|
||||
|
||||
/// JSON Schema defining the input parameters
|
||||
var inputSchema: Value { get }
|
||||
|
||||
/// Execute the tool with the given arguments
|
||||
func execute(arguments: ToolArguments) async throws -> ToolResponse
|
||||
}
|
||||
|
||||
/// Wrapper for tool arguments received from MCP
|
||||
public struct ToolArguments: Sendable {
|
||||
private let raw: Value
|
||||
|
||||
public init(raw: [String: Any]) {
|
||||
// Convert [String: Any] to Value for Sendable compliance
|
||||
self.raw = .object(raw.mapValues { convertToValue($0) })
|
||||
}
|
||||
|
||||
public init(value: Value) {
|
||||
self.raw = value
|
||||
}
|
||||
|
||||
/// Decode arguments into a specific type
|
||||
public func decode<T: Decodable>(_ type: T.Type) throws -> T {
|
||||
let data = try JSONEncoder().encode(raw)
|
||||
return try JSONDecoder().decode(type, from: data)
|
||||
}
|
||||
|
||||
/// Get a specific value by key
|
||||
public func getValue(for key: String) -> Value? {
|
||||
if case let .object(dict) = raw {
|
||||
return dict[key]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Check if arguments are empty
|
||||
public var isEmpty: Bool {
|
||||
if case let .object(dict) = raw {
|
||||
return dict.isEmpty
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// MARK: - Convenience methods for common types
|
||||
|
||||
/// Get a string value
|
||||
public func getString(_ key: String) -> String? {
|
||||
guard let value = getValue(for: key) else { return nil }
|
||||
switch value {
|
||||
case .string(let str):
|
||||
return str
|
||||
case .int(let num):
|
||||
return String(num)
|
||||
case .double(let num):
|
||||
return String(num)
|
||||
case .bool(let bool):
|
||||
return String(bool)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a number (Int or Double) as Double
|
||||
public func getNumber(_ key: String) -> Double? {
|
||||
guard let value = getValue(for: key) else { return nil }
|
||||
switch value {
|
||||
case .int(let num):
|
||||
return Double(num)
|
||||
case .double(let num):
|
||||
return num
|
||||
case .string(let str):
|
||||
return Double(str)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
/// Get an integer value
|
||||
public func getInt(_ key: String) -> Int? {
|
||||
guard let value = getValue(for: key) else { return nil }
|
||||
switch value {
|
||||
case .int(let num):
|
||||
return num
|
||||
case .double(let num):
|
||||
return Int(num)
|
||||
case .string(let str):
|
||||
return Int(str)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a boolean value
|
||||
public func getBool(_ key: String) -> Bool? {
|
||||
guard let value = getValue(for: key) else { return nil }
|
||||
switch value {
|
||||
case .bool(let bool):
|
||||
return bool
|
||||
case .string(let str):
|
||||
return ["true", "yes", "1"].contains(str.lowercased())
|
||||
case .int(let num):
|
||||
return num != 0
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
/// Get an array of strings
|
||||
public func getStringArray(_ key: String) -> [String]? {
|
||||
guard let value = getValue(for: key) else { return nil }
|
||||
if case .array(let array) = value {
|
||||
return array.compactMap { element in
|
||||
if case .string(let str) = element {
|
||||
return str
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to convert Any to Value
|
||||
private func convertToValue(_ value: Any) -> Value {
|
||||
switch value {
|
||||
case let string as String:
|
||||
return .string(string)
|
||||
case let number as Int:
|
||||
return .int(number)
|
||||
case let number as Double:
|
||||
return .double(number)
|
||||
case let bool as Bool:
|
||||
return .bool(bool)
|
||||
case let array as [Any]:
|
||||
return .array(array.map { convertToValue($0) })
|
||||
case let dict as [String: Any]:
|
||||
return .object(dict.mapValues { convertToValue($0) })
|
||||
case is NSNull:
|
||||
return .null
|
||||
default:
|
||||
// Fallback for unexpected types
|
||||
return .string(String(describing: value))
|
||||
}
|
||||
}
|
||||
|
||||
/// Response from tool execution
|
||||
public struct ToolResponse: Sendable {
|
||||
public let content: [MCP.Tool.Content]
|
||||
public let isError: Bool
|
||||
public let meta: Value?
|
||||
|
||||
public init(content: [MCP.Tool.Content], isError: Bool = false, meta: Value? = nil) {
|
||||
self.content = content
|
||||
self.isError = isError
|
||||
self.meta = meta
|
||||
}
|
||||
|
||||
/// Create a text response
|
||||
public static func text(_ text: String, meta: Value? = nil) -> ToolResponse {
|
||||
ToolResponse(
|
||||
content: [.text(text)],
|
||||
isError: false,
|
||||
meta: meta
|
||||
)
|
||||
}
|
||||
|
||||
/// Create an error response
|
||||
public static func error(_ message: String, meta: Value? = nil) -> ToolResponse {
|
||||
ToolResponse(
|
||||
content: [.text(message)],
|
||||
isError: true,
|
||||
meta: meta
|
||||
)
|
||||
}
|
||||
|
||||
/// Create an image response
|
||||
public static func image(data: Data, mimeType: String = "image/png", meta: Value? = nil) -> ToolResponse {
|
||||
ToolResponse(
|
||||
content: [.image(data: data.base64EncodedString(), mimeType: mimeType, metadata: nil)],
|
||||
isError: false,
|
||||
meta: meta
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a multi-content response
|
||||
public static func multiContent(_ contents: [MCP.Tool.Content], meta: Value? = nil) -> ToolResponse {
|
||||
ToolResponse(
|
||||
content: contents,
|
||||
isError: false,
|
||||
meta: meta
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Type alias for convenience
|
||||
public typealias Content = MCP.Tool.Content
|
||||
@ -0,0 +1,161 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
/// Builder for JSON Schema using MCP's Value type
|
||||
public struct SchemaBuilder {
|
||||
/// Build a JSON Schema for an object
|
||||
public static func object(
|
||||
properties: [String: Value],
|
||||
required: [String] = [],
|
||||
description: String? = nil
|
||||
) -> Value {
|
||||
var schema: [String: Value] = [
|
||||
"type": .string("object"),
|
||||
"properties": .object(properties)
|
||||
]
|
||||
|
||||
if !required.isEmpty {
|
||||
schema["required"] = .array(required.map { .string($0) })
|
||||
}
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
|
||||
/// Build a JSON Schema for a string
|
||||
public static func string(
|
||||
description: String? = nil,
|
||||
enum values: [String]? = nil,
|
||||
default: String? = nil,
|
||||
minLength: Int? = nil,
|
||||
maxLength: Int? = nil
|
||||
) -> Value {
|
||||
var schema: [String: Value] = ["type": .string("string")]
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
if let values = values {
|
||||
schema["enum"] = .array(values.map { .string($0) })
|
||||
}
|
||||
|
||||
if let defaultValue = `default` {
|
||||
schema["default"] = .string(defaultValue)
|
||||
}
|
||||
|
||||
if let minLen = minLength {
|
||||
schema["minLength"] = .int(minLen)
|
||||
}
|
||||
|
||||
if let maxLen = maxLength {
|
||||
schema["maxLength"] = .int(maxLen)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
|
||||
/// Build a JSON Schema for a boolean
|
||||
public static func boolean(
|
||||
description: String? = nil,
|
||||
default: Bool? = nil
|
||||
) -> Value {
|
||||
var schema: [String: Value] = ["type": .string("boolean")]
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
if let defaultValue = `default` {
|
||||
schema["default"] = .bool(defaultValue)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
|
||||
/// Build a JSON Schema for a number
|
||||
public static func number(
|
||||
description: String? = nil,
|
||||
minimum: Double? = nil,
|
||||
maximum: Double? = nil,
|
||||
default: Double? = nil
|
||||
) -> Value {
|
||||
var schema: [String: Value] = ["type": .string("number")]
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
if let min = minimum {
|
||||
schema["minimum"] = .double(min)
|
||||
}
|
||||
|
||||
if let max = maximum {
|
||||
schema["maximum"] = .double(max)
|
||||
}
|
||||
|
||||
if let defaultValue = `default` {
|
||||
schema["default"] = .double(defaultValue)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
|
||||
/// Build a JSON Schema for an integer
|
||||
public static func integer(
|
||||
description: String? = nil,
|
||||
minimum: Int? = nil,
|
||||
maximum: Int? = nil,
|
||||
default: Int? = nil
|
||||
) -> Value {
|
||||
var schema: [String: Value] = ["type": .string("integer")]
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
if let min = minimum {
|
||||
schema["minimum"] = .int(min)
|
||||
}
|
||||
|
||||
if let max = maximum {
|
||||
schema["maximum"] = .int(max)
|
||||
}
|
||||
|
||||
if let defaultValue = `default` {
|
||||
schema["default"] = .int(defaultValue)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
|
||||
/// Build a JSON Schema for an array
|
||||
public static func array(
|
||||
items: Value,
|
||||
description: String? = nil,
|
||||
minItems: Int? = nil,
|
||||
maxItems: Int? = nil
|
||||
) -> Value {
|
||||
var schema: [String: Value] = [
|
||||
"type": .string("array"),
|
||||
"items": items
|
||||
]
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
if let min = minItems {
|
||||
schema["minItems"] = .int(min)
|
||||
}
|
||||
|
||||
if let max = maxItems {
|
||||
schema["maxItems"] = .int(max)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,63 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// Registry for managing MCP tools
|
||||
@MainActor
|
||||
public final class MCPToolRegistry: Sendable {
|
||||
private let logger = Logger(subsystem: "boo.peekaboo.mcp", category: "registry")
|
||||
private var tools: [String: MCPTool] = [:]
|
||||
|
||||
public init() {}
|
||||
|
||||
/// Register a tool
|
||||
public func register(_ tool: MCPTool) {
|
||||
tools[tool.name] = tool
|
||||
logger.debug("Registered tool: \(tool.name)")
|
||||
}
|
||||
|
||||
/// Register multiple tools
|
||||
public func register(_ tools: [MCPTool]) {
|
||||
for tool in tools {
|
||||
register(tool)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a tool by name
|
||||
public func tool(named name: String) -> MCPTool? {
|
||||
tools[name]
|
||||
}
|
||||
|
||||
/// Get all registered tools
|
||||
public func allTools() -> [MCPTool] {
|
||||
Array(tools.values)
|
||||
}
|
||||
|
||||
/// Get tool information for MCP
|
||||
public func toolInfos() -> [MCP.Tool] {
|
||||
allTools().map { tool in
|
||||
MCP.Tool(
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
inputSchema: tool.inputSchema
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a tool is registered
|
||||
public func hasToolNamed(_ name: String) -> Bool {
|
||||
tools[name] != nil
|
||||
}
|
||||
|
||||
/// Remove a tool
|
||||
public func unregister(_ name: String) {
|
||||
tools.removeValue(forKey: name)
|
||||
logger.debug("Unregistered tool: \(name)")
|
||||
}
|
||||
|
||||
/// Remove all tools
|
||||
public func unregisterAll() {
|
||||
tools.removeAll()
|
||||
logger.debug("Unregistered all tools")
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,203 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// Transport types supported by the MCP server
|
||||
public enum TransportType: CustomStringConvertible {
|
||||
case stdio
|
||||
case http
|
||||
case sse
|
||||
|
||||
public var description: String {
|
||||
switch self {
|
||||
case .stdio: return "stdio"
|
||||
case .http: return "http"
|
||||
case .sse: return "sse"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Peekaboo MCP Server implementation
|
||||
public actor PeekabooMCPServer {
|
||||
private let server: Server
|
||||
private let toolRegistry: MCPToolRegistry
|
||||
private let logger: os.Logger
|
||||
private let serverName = "peekaboo-mcp"
|
||||
private let serverVersion = "3.0.0-beta.2"
|
||||
|
||||
public init() async throws {
|
||||
self.logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "server")
|
||||
self.toolRegistry = await MCPToolRegistry()
|
||||
|
||||
// Initialize the official MCP Server
|
||||
self.server = Server(
|
||||
name: serverName,
|
||||
version: serverVersion,
|
||||
capabilities: Server.Capabilities(
|
||||
prompts: .init(listChanged: false),
|
||||
resources: .init(subscribe: false, listChanged: false),
|
||||
tools: .init(listChanged: true)
|
||||
)
|
||||
)
|
||||
|
||||
await setupHandlers()
|
||||
await registerAllTools()
|
||||
}
|
||||
|
||||
private func setupHandlers() async {
|
||||
// Tool list handler
|
||||
await server.withMethodHandler(ListTools.self) { [weak self] _ in
|
||||
guard let self = self else { return ListTools.Result(tools: []) }
|
||||
|
||||
let tools = await self.toolRegistry.toolInfos()
|
||||
return ListTools.Result(tools: tools)
|
||||
}
|
||||
|
||||
// Tool call handler
|
||||
await server.withMethodHandler(CallTool.self) { [weak self] params in
|
||||
guard let self = self else {
|
||||
throw MCP.MCPError.methodNotFound("Server deallocated")
|
||||
}
|
||||
|
||||
guard let tool = await self.toolRegistry.tool(named: params.name) else {
|
||||
throw MCP.MCPError.invalidParams("Tool '\(params.name)' not found")
|
||||
}
|
||||
|
||||
let arguments = ToolArguments(value: .object(params.arguments ?? [:]))
|
||||
let response = try await tool.execute(arguments: arguments)
|
||||
|
||||
return CallTool.Result(
|
||||
content: response.content,
|
||||
isError: response.isError
|
||||
)
|
||||
}
|
||||
|
||||
// Resources list handler (empty for now, but prevents inspector errors)
|
||||
await server.withMethodHandler(ListResources.self) { _ in
|
||||
// Return empty resources list
|
||||
return ListResources.Result(resources: [], nextCursor: nil)
|
||||
}
|
||||
|
||||
// Resources read handler (returns error for now)
|
||||
await server.withMethodHandler(ReadResource.self) { params in
|
||||
throw MCP.MCPError.invalidParams("Resource '\(params.uri)' not found")
|
||||
}
|
||||
|
||||
// Initialize handler
|
||||
await server.withMethodHandler(Initialize.self) { [weak self] request in
|
||||
guard let self = self else {
|
||||
throw MCP.MCPError.methodNotFound("Server deallocated")
|
||||
}
|
||||
|
||||
self.logger.info("Client connected: \(request.clientInfo.name) \(request.clientInfo.version), protocol: \(request.protocolVersion)")
|
||||
|
||||
// Create a response struct that matches Initialize.Result
|
||||
struct InitializeResult: Codable {
|
||||
let protocolVersion: String
|
||||
let capabilities: Server.Capabilities
|
||||
let serverInfo: Server.Info
|
||||
let instructions: String?
|
||||
}
|
||||
|
||||
let result = InitializeResult(
|
||||
protocolVersion: "2024-11-05",
|
||||
capabilities: await self.server.capabilities,
|
||||
serverInfo: Server.Info(
|
||||
name: self.serverName,
|
||||
version: self.serverVersion
|
||||
),
|
||||
instructions: nil
|
||||
)
|
||||
|
||||
// Convert to Initialize.Result via JSON
|
||||
let data = try JSONEncoder().encode(result)
|
||||
return try JSONDecoder().decode(Initialize.Result.self, from: data)
|
||||
}
|
||||
}
|
||||
|
||||
private func registerAllTools() async {
|
||||
// Register all Peekaboo tools
|
||||
await toolRegistry.register([
|
||||
// Core tools
|
||||
ImageTool(),
|
||||
AnalyzeTool(),
|
||||
ListTool(),
|
||||
PermissionsTool(),
|
||||
SleepTool(),
|
||||
|
||||
// UI automation tools
|
||||
SeeTool(),
|
||||
ClickTool(),
|
||||
TypeTool(),
|
||||
ScrollTool(),
|
||||
HotkeyTool(),
|
||||
SwipeTool(),
|
||||
DragTool(),
|
||||
MoveTool(),
|
||||
|
||||
// App management tools
|
||||
AppTool(),
|
||||
WindowTool(),
|
||||
MenuTool(),
|
||||
|
||||
// System tools
|
||||
// RunTool(), // Removed: Security risk - allows arbitrary script execution
|
||||
// CleanTool(), // Removed: Internal maintenance tool, not for external use
|
||||
|
||||
// Advanced tools
|
||||
AgentTool(),
|
||||
DockTool(),
|
||||
DialogTool(),
|
||||
SpaceTool(),
|
||||
])
|
||||
|
||||
let toolCount = await self.toolRegistry.allTools().count
|
||||
logger.info("Registered \(toolCount) tools")
|
||||
}
|
||||
|
||||
public func serve(transport: TransportType, port: Int = 8080) async throws {
|
||||
logger.info("Starting Peekaboo MCP server on \(transport) transport, version: \(self.serverVersion)")
|
||||
|
||||
let serverTransport: any Transport
|
||||
|
||||
switch transport {
|
||||
case .stdio:
|
||||
serverTransport = StdioTransport()
|
||||
|
||||
case .http:
|
||||
// Note: HTTP transport would need custom implementation
|
||||
// as the SDK only provides HTTPClientTransport
|
||||
throw MCPError.notImplemented("HTTP server transport not yet implemented")
|
||||
|
||||
case .sse:
|
||||
throw MCPError.notImplemented("SSE server transport not yet implemented")
|
||||
}
|
||||
|
||||
try await server.start(transport: serverTransport)
|
||||
|
||||
// Keep the server running
|
||||
await server.waitUntilCompleted()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Supporting Types
|
||||
|
||||
public enum MCPError: LocalizedError {
|
||||
case notImplemented(String)
|
||||
case toolNotFound(String)
|
||||
case invalidArguments(String)
|
||||
case executionFailed(String)
|
||||
|
||||
public var errorDescription: String? {
|
||||
switch self {
|
||||
case .notImplemented(let feature):
|
||||
return "\(feature) is not yet implemented"
|
||||
case .toolNotFound(let tool):
|
||||
return "Tool '\(tool)' not found"
|
||||
case .invalidArguments(let details):
|
||||
return "Invalid arguments: \(details)"
|
||||
case .executionFailed(let message):
|
||||
return "Execution failed: \(message)"
|
||||
}
|
||||
}
|
||||
}
|
||||
273
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/AgentTool.swift
Normal file
273
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/AgentTool.swift
Normal file
@ -0,0 +1,273 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for executing complex automation tasks using an AI agent
|
||||
public struct AgentTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "AgentTool")
|
||||
|
||||
public let name = "agent"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Execute complex automation tasks using an AI agent powered by OpenAI's Assistants API.
|
||||
The agent can understand natural language instructions and break them down into specific
|
||||
Peekaboo commands to accomplish complex workflows.
|
||||
|
||||
Capabilities:
|
||||
- Natural Language Processing: Understands tasks described in plain English
|
||||
- Multi-step Automation: Breaks complex tasks into sequential steps
|
||||
- Visual Feedback: Can take screenshots to verify results
|
||||
- Context Awareness: Maintains session state across multiple actions
|
||||
- Error Recovery: Can adapt and retry when actions fail
|
||||
|
||||
The agent has access to all Peekaboo automation tools including:
|
||||
- Screen capture and analysis
|
||||
- UI element interaction (click, type, scroll)
|
||||
- Application control (launch, quit, focus)
|
||||
- Window management (move, resize, close)
|
||||
- System interaction (hotkeys, shell commands)
|
||||
|
||||
Example tasks:
|
||||
- "Open Safari and navigate to apple.com"
|
||||
- "Take a screenshot of the current window and save it to Desktop"
|
||||
- "Find the login button and click it, then type my credentials"
|
||||
- "Open TextEdit, write 'Hello World', and save the document"
|
||||
|
||||
Requires OPENAI_API_KEY environment variable to be set.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"task": SchemaBuilder.string(
|
||||
description: "Natural language description of the task to perform (optional when listing sessions)"
|
||||
),
|
||||
"model": SchemaBuilder.string(
|
||||
description: "OpenAI model to use (e.g., gpt-4-turbo, gpt-4o). Call `list_models` first to see available presets and their descriptions. Choose based on task requirements (e.g., 'FastChat' for quick responses, 'DeepAnalysis' for complex reasoning). If omitted, auto-selects first mode-compatible preset."
|
||||
),
|
||||
"quiet": SchemaBuilder.boolean(
|
||||
description: "Quiet mode - only show final result",
|
||||
default: false
|
||||
),
|
||||
"verbose": SchemaBuilder.boolean(
|
||||
description: "Enable verbose output with full JSON debug information",
|
||||
default: false
|
||||
),
|
||||
"dry_run": SchemaBuilder.boolean(
|
||||
description: "Dry run - show planned steps without executing",
|
||||
default: false
|
||||
),
|
||||
"max_steps": SchemaBuilder.integer(
|
||||
description: "Maximum number of steps the agent can take"
|
||||
),
|
||||
"resume": SchemaBuilder.boolean(
|
||||
description: "Resume the most recent session",
|
||||
default: false
|
||||
),
|
||||
"resumeSession": SchemaBuilder.string(
|
||||
description: "Resume a specific session by ID"
|
||||
),
|
||||
"listSessions": SchemaBuilder.boolean(
|
||||
description: "List available sessions",
|
||||
default: false
|
||||
),
|
||||
"noCache": SchemaBuilder.boolean(
|
||||
description: "Disable session caching (always create new session)",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: []
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
let input = try arguments.decode(AgentInput.self)
|
||||
|
||||
logger.info("AgentTool executing with task: \(input.task ?? "none"), listSessions: \(input.listSessions)")
|
||||
|
||||
// Handle listing sessions
|
||||
if input.listSessions {
|
||||
do {
|
||||
guard let agent = PeekabooServices.shared.agent as? PeekabooAgentService else {
|
||||
return ToolResponse.error("Agent service not available")
|
||||
}
|
||||
let sessions = try await agent.listSessions()
|
||||
let sessionDescriptions = sessions.map { session in
|
||||
let dateFormatter = DateFormatter()
|
||||
dateFormatter.dateStyle = .medium
|
||||
dateFormatter.timeStyle = .short
|
||||
|
||||
return "ID: \(session.id)\nCreated: \(dateFormatter.string(from: session.createdAt))\nUpdated: \(dateFormatter.string(from: session.updatedAt))\nMessage Count: \(session.messageCount)"
|
||||
}.joined(separator: "\n---\n")
|
||||
|
||||
return ToolResponse.text(
|
||||
"Available Sessions:\n\n\(sessionDescriptions)",
|
||||
meta: .object([
|
||||
"sessionCount": .string(String(sessions.count)),
|
||||
"sessions": .array(sessions.map { session in
|
||||
let dateFormatter = ISO8601DateFormatter()
|
||||
return .object([
|
||||
"id": .string(session.id),
|
||||
"createdAt": .string(dateFormatter.string(from: session.createdAt)),
|
||||
"updatedAt": .string(dateFormatter.string(from: session.updatedAt)),
|
||||
"messageCount": .string(String(session.messageCount))
|
||||
])
|
||||
})
|
||||
])
|
||||
)
|
||||
} catch {
|
||||
logger.error("Failed to list sessions: \(error.localizedDescription)")
|
||||
return ToolResponse.error("Failed to list sessions: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// Require task for execution
|
||||
guard let task = input.task else {
|
||||
return ToolResponse.error("Missing required parameter: task")
|
||||
}
|
||||
|
||||
do {
|
||||
guard let agent = PeekabooServices.shared.agent as? PeekabooAgentService else {
|
||||
return ToolResponse.error("Agent service not available")
|
||||
}
|
||||
|
||||
let result: AgentExecutionResult
|
||||
|
||||
// Handle resume scenarios
|
||||
if let resumeSessionId = input.resumeSession {
|
||||
// Resume specific session
|
||||
result = try await agent.resumeSession(
|
||||
sessionId: resumeSessionId,
|
||||
modelName: input.model ?? "claude-opus-4-20250514"
|
||||
)
|
||||
} else if input.resume {
|
||||
// Resume most recent session - get latest session and resume it
|
||||
let sessions = try await agent.listSessions()
|
||||
guard let latestSession = sessions.first else {
|
||||
return ToolResponse.error("No sessions available to resume")
|
||||
}
|
||||
|
||||
result = try await agent.resumeSession(
|
||||
sessionId: latestSession.id,
|
||||
modelName: input.model ?? "claude-opus-4-20250514"
|
||||
)
|
||||
} else {
|
||||
// Execute new task
|
||||
if input.dryRun {
|
||||
// Use the dryRun version
|
||||
result = try await agent.executeTask(
|
||||
task,
|
||||
dryRun: true,
|
||||
eventDelegate: nil
|
||||
)
|
||||
} else {
|
||||
// Use the full-featured version with session and model
|
||||
let sessionId = input.noCache ? nil : UUID().uuidString
|
||||
result = try await agent.executeTask(
|
||||
task,
|
||||
sessionId: sessionId,
|
||||
modelName: input.model ?? "claude-opus-4-20250514",
|
||||
eventDelegate: nil
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Format response based on verbosity level
|
||||
if input.quiet {
|
||||
return ToolResponse.text(result.content)
|
||||
} else if input.verbose {
|
||||
var metadata: [String: Value] = [
|
||||
"sessionId": .string(result.sessionId),
|
||||
"modelName": .string(result.metadata.modelName),
|
||||
"toolCallCount": .string(String(result.metadata.toolCallCount)),
|
||||
"executionTime": .string(String(format: "%.2f", result.metadata.endTime.timeIntervalSince(result.metadata.startTime))),
|
||||
"isResumed": .string(result.metadata.isResumed ? "true" : "false")
|
||||
]
|
||||
|
||||
if let usage = result.usage {
|
||||
metadata["usage"] = .object([
|
||||
"promptTokens": .string(String(usage.promptTokens)),
|
||||
"completionTokens": .string(String(usage.completionTokens)),
|
||||
"totalTokens": .string(String(usage.totalTokens))
|
||||
])
|
||||
}
|
||||
|
||||
return ToolResponse.text(
|
||||
result.content,
|
||||
meta: .object(metadata)
|
||||
)
|
||||
} else {
|
||||
// Default output format
|
||||
var output = result.content
|
||||
|
||||
if result.metadata.toolCallCount > 0 {
|
||||
output += "\n\n🔧 Tools used: \(result.metadata.toolCallCount)"
|
||||
}
|
||||
|
||||
if let usage = result.usage {
|
||||
output += "\n📊 Tokens: \(usage.promptTokens) in, \(usage.completionTokens) out"
|
||||
}
|
||||
|
||||
let executionTime = result.metadata.endTime.timeIntervalSince(result.metadata.startTime)
|
||||
output += "\n⏱️ Execution time: \(String(format: "%.1f", executionTime))s"
|
||||
|
||||
return ToolResponse.text(
|
||||
output,
|
||||
meta: .object([
|
||||
"sessionId": .string(result.sessionId),
|
||||
"modelName": .string(result.metadata.modelName),
|
||||
"toolCallCount": .string(String(result.metadata.toolCallCount))
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
} catch {
|
||||
logger.error("Agent execution failed: \(error.localizedDescription)")
|
||||
return ToolResponse.error("Agent execution failed: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Supporting Types
|
||||
|
||||
struct AgentInput: Codable {
|
||||
let task: String?
|
||||
let model: String?
|
||||
let quiet: Bool
|
||||
let verbose: Bool
|
||||
let dryRun: Bool
|
||||
let maxSteps: Int?
|
||||
let resume: Bool
|
||||
let resumeSession: String?
|
||||
let listSessions: Bool
|
||||
let noCache: Bool
|
||||
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case task, model, quiet, verbose, resume, noCache
|
||||
case dryRun = "dry_run"
|
||||
case maxSteps = "max_steps"
|
||||
case resumeSession
|
||||
case listSessions
|
||||
}
|
||||
|
||||
init(from decoder: Decoder) throws {
|
||||
let container = try decoder.container(keyedBy: CodingKeys.self)
|
||||
|
||||
task = try container.decodeIfPresent(String.self, forKey: .task)
|
||||
model = try container.decodeIfPresent(String.self, forKey: .model)
|
||||
quiet = try container.decodeIfPresent(Bool.self, forKey: .quiet) ?? false
|
||||
verbose = try container.decodeIfPresent(Bool.self, forKey: .verbose) ?? false
|
||||
dryRun = try container.decodeIfPresent(Bool.self, forKey: .dryRun) ?? false
|
||||
maxSteps = try container.decodeIfPresent(Int.self, forKey: .maxSteps)
|
||||
resume = try container.decodeIfPresent(Bool.self, forKey: .resume) ?? false
|
||||
resumeSession = try container.decodeIfPresent(String.self, forKey: .resumeSession)
|
||||
listSessions = try container.decodeIfPresent(Bool.self, forKey: .listSessions) ?? false
|
||||
noCache = try container.decodeIfPresent(Bool.self, forKey: .noCache) ?? false
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,250 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for analyzing images with AI
|
||||
public struct AnalyzeTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "AnalyzeTool")
|
||||
|
||||
public let name = "analyze"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Analyzes a pre-existing image file from the local filesystem using a configured AI model.
|
||||
|
||||
This tool is useful when an image already exists (e.g., previously captured, downloaded, or generated) and you
|
||||
need to understand its content, extract text, or answer specific questions about it.
|
||||
|
||||
Capabilities:
|
||||
- Image Understanding: Provide any question about the image (e.g., "What objects are in this picture?",
|
||||
"Describe the scene.", "Is there a red car?").
|
||||
- Text Extraction (OCR): Ask the AI to extract text from the image (e.g., "What text is visible in this screenshot?").
|
||||
- Flexible AI Configuration: Can use server-default AI providers/models or specify a particular one per call
|
||||
via 'provider_config'.
|
||||
|
||||
Example:
|
||||
If you have an image '/tmp/chart.png' showing a bar chart, you could ask:
|
||||
{ "image_path": "/tmp/chart.png", "question": "Which category has the highest value in this bar chart?" }
|
||||
The AI will analyze the image and attempt to answer your question based on its visual content.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"image_path": SchemaBuilder.string(
|
||||
description: "Required. Absolute path to image file (.png, .jpg, .webp) to be analyzed."
|
||||
),
|
||||
"question": SchemaBuilder.string(
|
||||
description: "Required. Question for the AI about the image."
|
||||
),
|
||||
"provider_config": SchemaBuilder.object(
|
||||
properties: [
|
||||
"type": SchemaBuilder.string(
|
||||
description: "AI provider, default: auto. 'auto' uses server's PEEKABOO_AI_PROVIDERS environment preference.",
|
||||
enum: ["auto", "ollama", "openai", "anthropic", "grok"],
|
||||
default: "auto"
|
||||
),
|
||||
"model": SchemaBuilder.string(
|
||||
description: "Optional. Model name. If omitted, uses model from server's PEEKABOO_AI_PROVIDERS."
|
||||
)
|
||||
],
|
||||
description: "Optional. Explicit provider/model. Validated against server's PEEKABOO_AI_PROVIDERS."
|
||||
)
|
||||
],
|
||||
required: ["question"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Get required parameters
|
||||
guard let imagePath = arguments.getString("image_path") else {
|
||||
return ToolResponse.error("Missing required parameter: image_path")
|
||||
}
|
||||
|
||||
guard let question = arguments.getString("question") else {
|
||||
return ToolResponse.error("Missing required parameter: question")
|
||||
}
|
||||
|
||||
// Validate image file extension
|
||||
let fileExtension = (imagePath as NSString).pathExtension.lowercased()
|
||||
let supportedFormats = ["png", "jpg", "jpeg", "webp"]
|
||||
guard supportedFormats.contains(fileExtension) else {
|
||||
return ToolResponse.error("Unsupported image format: .\(fileExtension). Supported formats: .png, .jpg, .jpeg, .webp")
|
||||
}
|
||||
|
||||
// Check if file exists
|
||||
let expandedPath = (imagePath as NSString).expandingTildeInPath
|
||||
let fileManager = FileManager.default
|
||||
guard fileManager.fileExists(atPath: expandedPath) else {
|
||||
return ToolResponse.error("Image file not found: \(imagePath)")
|
||||
}
|
||||
|
||||
// Check AI providers configuration
|
||||
guard let aiProviders = ProcessInfo.processInfo.environment["PEEKABOO_AI_PROVIDERS"], !aiProviders.isEmpty else {
|
||||
return ToolResponse.error("AI analysis not configured on this server. Set the PEEKABOO_AI_PROVIDERS environment variable.")
|
||||
}
|
||||
|
||||
// Parse the AI providers to determine which to use
|
||||
let (modelName, providerType) = parseAIProviders(aiProviders)
|
||||
|
||||
do {
|
||||
// Read the image file
|
||||
let imageData = try Data(contentsOf: URL(fileURLWithPath: expandedPath))
|
||||
let base64String = imageData.base64EncodedString()
|
||||
|
||||
// Get or create model instance
|
||||
let model = try await getOrCreateModel(modelName: modelName, providerType: providerType)
|
||||
|
||||
// Create a request with the image
|
||||
let imageContent = ImageContent(base64: base64String)
|
||||
let messageContent = MessageContent.multimodal([
|
||||
MessageContentPart(type: "text", text: question),
|
||||
MessageContentPart(type: "image_url", imageUrl: imageContent)
|
||||
])
|
||||
|
||||
let messages: [Message] = [
|
||||
.user(content: messageContent)
|
||||
]
|
||||
|
||||
let request = ModelRequest(
|
||||
messages: messages,
|
||||
tools: nil,
|
||||
settings: ModelSettings(
|
||||
modelName: modelName,
|
||||
temperature: 0.7,
|
||||
maxTokens: 4096,
|
||||
toolChoice: ToolChoice.none
|
||||
),
|
||||
systemInstructions: "You are a helpful AI assistant analyzing images. Provide clear, detailed answers about what you see."
|
||||
)
|
||||
|
||||
logger.info("Analyzing image with \(providerType ?? "auto")/\(modelName)")
|
||||
let startTime = Date()
|
||||
|
||||
// Get the response
|
||||
let response = try await model.getResponse(request: request)
|
||||
|
||||
let duration = Date().timeIntervalSince(startTime)
|
||||
logger.info("Analysis completed in \(String(format: "%.2f", duration))s")
|
||||
|
||||
// Extract text content from response
|
||||
var analysisText = ""
|
||||
for content in response.content {
|
||||
if case .outputText(let text) = content {
|
||||
analysisText += text
|
||||
}
|
||||
}
|
||||
|
||||
// Create response with metadata
|
||||
let metadata: [String: Any] = [
|
||||
"model_used": "\(providerType ?? "unknown")/\(modelName)",
|
||||
"analysis_text": analysisText,
|
||||
"duration_seconds": String(format: "%.2f", duration)
|
||||
]
|
||||
|
||||
let timingMessage = "\n\n👻 Peekaboo: Analyzed image with \(providerType ?? "unknown")/\(modelName) in \(String(format: "%.2f", duration))s."
|
||||
|
||||
return ToolResponse(
|
||||
content: [
|
||||
.text(analysisText),
|
||||
.text(timingMessage)
|
||||
]
|
||||
)
|
||||
|
||||
} catch {
|
||||
logger.error("Analysis failed: \(error)")
|
||||
return ToolResponse.error("AI analysis failed: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private func parseAIProviders(_ providers: String) -> (modelName: String, providerType: String?) {
|
||||
// Parse PEEKABOO_AI_PROVIDERS format: "provider/model,provider2/model2"
|
||||
let components = providers.split(separator: ",").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
|
||||
if let firstProvider = components.first {
|
||||
let parts = firstProvider.split(separator: "/")
|
||||
if parts.count >= 2 {
|
||||
let provider = String(parts[0])
|
||||
let model = String(parts[1])
|
||||
return (model, provider)
|
||||
} else {
|
||||
// Just a model name
|
||||
return (String(firstProvider), nil)
|
||||
}
|
||||
}
|
||||
|
||||
// Default fallback
|
||||
return ("claude-opus-4-20250514", "anthropic")
|
||||
}
|
||||
|
||||
private func getOrCreateModel(modelName: String, providerType: String?) async throws -> any ModelInterface {
|
||||
let modelProvider = ModelProvider.shared
|
||||
|
||||
// Try to get the model from the provider first
|
||||
do {
|
||||
return try await modelProvider.getModel(modelName: modelName)
|
||||
} catch {
|
||||
// If not found, try to create based on provider type
|
||||
if let providerType = providerType {
|
||||
switch providerType.lowercased() {
|
||||
case "anthropic":
|
||||
guard let apiKey = ProcessInfo.processInfo.environment["ANTHROPIC_API_KEY"] else {
|
||||
throw PeekabooError.authenticationFailed("ANTHROPIC_API_KEY not set")
|
||||
}
|
||||
return AnthropicModel(apiKey: apiKey, modelName: modelName)
|
||||
|
||||
case "openai":
|
||||
guard let apiKey = ProcessInfo.processInfo.environment["OPENAI_API_KEY"] else {
|
||||
throw PeekabooError.authenticationFailed("OPENAI_API_KEY not set")
|
||||
}
|
||||
return OpenAIModel(apiKey: apiKey)
|
||||
|
||||
case "grok":
|
||||
guard let apiKey = ProcessInfo.processInfo.environment["X_AI_API_KEY"] ??
|
||||
ProcessInfo.processInfo.environment["XAI_API_KEY"] else {
|
||||
throw PeekabooError.authenticationFailed("X_AI_API_KEY or XAI_API_KEY not set")
|
||||
}
|
||||
return GrokModel(apiKey: apiKey, modelName: modelName)
|
||||
|
||||
case "ollama":
|
||||
let baseURLString = ProcessInfo.processInfo.environment["PEEKABOO_OLLAMA_BASE_URL"] ?? "http://localhost:11434"
|
||||
guard let baseURL = URL(string: baseURLString) else {
|
||||
throw PeekabooError.invalidInput("Invalid Ollama base URL: \(baseURLString)")
|
||||
}
|
||||
return OllamaModel(modelName: modelName, baseURL: baseURL)
|
||||
|
||||
default:
|
||||
throw PeekabooError.invalidInput("Unknown provider type: \(providerType)")
|
||||
}
|
||||
}
|
||||
|
||||
// Final fallback - try to guess based on model name
|
||||
if modelName.contains("claude") {
|
||||
guard let apiKey = ProcessInfo.processInfo.environment["ANTHROPIC_API_KEY"] else {
|
||||
throw PeekabooError.authenticationFailed("ANTHROPIC_API_KEY not set")
|
||||
}
|
||||
return AnthropicModel(apiKey: apiKey, modelName: modelName)
|
||||
} else if modelName.contains("gpt") || modelName.contains("o3") || modelName.contains("o4") {
|
||||
guard let apiKey = ProcessInfo.processInfo.environment["OPENAI_API_KEY"] else {
|
||||
throw PeekabooError.authenticationFailed("OPENAI_API_KEY not set")
|
||||
}
|
||||
return OpenAIModel(apiKey: apiKey)
|
||||
} else {
|
||||
// Assume Ollama for unknown models
|
||||
let baseURLString = ProcessInfo.processInfo.environment["PEEKABOO_OLLAMA_BASE_URL"] ?? "http://localhost:11434"
|
||||
guard let baseURL = URL(string: baseURLString) else {
|
||||
throw PeekabooError.invalidInput("Invalid Ollama base URL: \(baseURLString)")
|
||||
}
|
||||
return OllamaModel(modelName: modelName, baseURL: baseURL)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
473
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/AppTool.swift
Normal file
473
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/AppTool.swift
Normal file
@ -0,0 +1,473 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for controlling applications
|
||||
public struct AppTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "AppTool")
|
||||
|
||||
public let name = "app"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Control applications - launch, quit, relaunch, focus, hide, unhide, and switch between apps.
|
||||
|
||||
Actions:
|
||||
- launch: Start an application
|
||||
- quit: Quit an application (with optional force flag)
|
||||
- relaunch: Quit and restart an application (with configurable wait time)
|
||||
- focus/switch: Bring an application to the foreground
|
||||
- hide: Hide an application
|
||||
- unhide: Show a hidden application
|
||||
|
||||
Target applications by name (e.g., "Safari"), bundle ID (e.g., "com.apple.Safari"),
|
||||
or process ID (e.g., "PID:663"). Fuzzy matching is supported for application names.
|
||||
|
||||
Examples:
|
||||
- Launch Safari: { "action": "launch", "name": "Safari" }
|
||||
- Quit TextEdit: { "action": "quit", "name": "TextEdit" }
|
||||
- Relaunch Chrome: { "action": "relaunch", "name": "Google Chrome", "wait": 3 }
|
||||
- Focus Terminal: { "action": "focus", "name": "Terminal" }
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"action": SchemaBuilder.string(
|
||||
description: "The action to perform on the application",
|
||||
enum: ["launch", "quit", "relaunch", "focus", "hide", "unhide", "switch", "list"]
|
||||
),
|
||||
"name": SchemaBuilder.string(
|
||||
description: "Application name, bundle ID, or process ID (e.g., 'Safari', 'com.apple.Safari', 'PID:663')"
|
||||
),
|
||||
"bundleId": SchemaBuilder.string(
|
||||
description: "Launch by bundle identifier instead of name (for 'launch' action)"
|
||||
),
|
||||
"force": SchemaBuilder.boolean(
|
||||
description: "Force quit the application (for 'quit' and 'relaunch' actions)",
|
||||
default: false
|
||||
),
|
||||
"wait": SchemaBuilder.number(
|
||||
description: "Wait time in seconds between quit and launch (for 'relaunch' action, default: 2)",
|
||||
default: 2.0
|
||||
),
|
||||
"waitUntilReady": SchemaBuilder.boolean(
|
||||
description: "Wait for the application to be ready (for 'launch' and 'relaunch' actions)",
|
||||
default: false
|
||||
),
|
||||
"all": SchemaBuilder.boolean(
|
||||
description: "Quit all applications (for 'quit' action)",
|
||||
default: false
|
||||
),
|
||||
"except": SchemaBuilder.string(
|
||||
description: "Comma-separated list of apps to exclude when using --all (for 'quit' action)"
|
||||
),
|
||||
"to": SchemaBuilder.string(
|
||||
description: "Application to switch to (for 'switch' action)"
|
||||
),
|
||||
"cycle": SchemaBuilder.boolean(
|
||||
description: "Cycle to next application like Cmd+Tab (for 'switch' action)",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: ["action"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let action = arguments.getString("action") else {
|
||||
return ToolResponse.error("Missing required parameter: action")
|
||||
}
|
||||
|
||||
let name = arguments.getString("name")
|
||||
let bundleId = arguments.getString("bundleId")
|
||||
let force = arguments.getBool("force") ?? false
|
||||
let wait = arguments.getNumber("wait") ?? 2.0
|
||||
let waitUntilReady = arguments.getBool("waitUntilReady") ?? false
|
||||
let all = arguments.getBool("all") ?? false
|
||||
let except = arguments.getString("except")
|
||||
let to = arguments.getString("to")
|
||||
let cycle = arguments.getBool("cycle") ?? false
|
||||
|
||||
let applicationService = PeekabooServices.shared.applications
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
switch action {
|
||||
case "launch":
|
||||
return try await handleLaunch(
|
||||
service: applicationService,
|
||||
name: name,
|
||||
bundleId: bundleId,
|
||||
waitUntilReady: waitUntilReady,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "quit":
|
||||
return try await handleQuit(
|
||||
service: applicationService,
|
||||
name: name,
|
||||
force: force,
|
||||
all: all,
|
||||
except: except,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "relaunch":
|
||||
return try await handleRelaunch(
|
||||
service: applicationService,
|
||||
name: name,
|
||||
force: force,
|
||||
wait: wait,
|
||||
waitUntilReady: waitUntilReady,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "focus", "switch":
|
||||
return try await handleFocus(
|
||||
service: applicationService,
|
||||
name: name,
|
||||
to: to,
|
||||
cycle: cycle,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "hide":
|
||||
return try await handleHide(
|
||||
service: applicationService,
|
||||
name: name,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "unhide":
|
||||
return try await handleUnhide(
|
||||
service: applicationService,
|
||||
name: name,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "list":
|
||||
return try await handleList(
|
||||
service: applicationService,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
default:
|
||||
return ToolResponse.error("Unknown action: \(action). Supported actions: launch, quit, relaunch, focus, hide, unhide, switch, list")
|
||||
}
|
||||
|
||||
} catch {
|
||||
logger.error("App control execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to \(action) application: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Action Handlers
|
||||
|
||||
private func handleLaunch(
|
||||
service: ApplicationServiceProtocol,
|
||||
name: String?,
|
||||
bundleId: String?,
|
||||
waitUntilReady: Bool,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let identifier = bundleId ?? name
|
||||
guard let identifier = identifier else {
|
||||
return ToolResponse.error("Must specify either 'name' or 'bundleId' for launch action")
|
||||
}
|
||||
|
||||
let app = try await service.launchApplication(identifier: identifier)
|
||||
|
||||
if waitUntilReady {
|
||||
// Wait a bit for the app to fully launch
|
||||
try await Task.sleep(nanoseconds: 1_000_000_000) // 1 second
|
||||
}
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Launched \(app.name) (PID: \(app.processIdentifier)) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"app_name": .string(app.name),
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"bundle_id": app.bundleIdentifier != nil ? .string(app.bundleIdentifier!) : .null,
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleQuit(
|
||||
service: ApplicationServiceProtocol,
|
||||
name: String?,
|
||||
force: Bool,
|
||||
all: Bool,
|
||||
except: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
if all {
|
||||
return try await handleQuitAll(
|
||||
service: service,
|
||||
except: except,
|
||||
force: force,
|
||||
startTime: startTime
|
||||
)
|
||||
}
|
||||
|
||||
guard let name = name else {
|
||||
return ToolResponse.error("Must specify 'name' for quit action (or use 'all': true)")
|
||||
}
|
||||
|
||||
let app = try await service.findApplication(identifier: name)
|
||||
let success = try await service.quitApplication(identifier: name, force: force)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
let forceText = force ? " (force quit)" : ""
|
||||
|
||||
if success {
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Quit \(app.name)\(forceText) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"app_name": .string(app.name),
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"force_quit": .bool(force),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
} else {
|
||||
return ToolResponse.error("Failed to quit \(app.name). The application may have refused to quit.")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleQuitAll(
|
||||
service: ApplicationServiceProtocol,
|
||||
except: String?,
|
||||
force: Bool,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let allApps = try await service.listApplications()
|
||||
let exceptSet = Set((except ?? "").split(separator: ",").map { $0.trimmingCharacters(in: .whitespaces).lowercased() })
|
||||
|
||||
var quitCount = 0
|
||||
var failedApps: [String] = []
|
||||
|
||||
for app in allApps.data.applications {
|
||||
// Skip system apps and apps in the exception list
|
||||
let appNameLower = app.name.lowercased()
|
||||
if exceptSet.contains(appNameLower) ||
|
||||
exceptSet.contains(app.bundleIdentifier?.lowercased() ?? "") ||
|
||||
app.name == "Finder" || // Always preserve Finder
|
||||
app.bundleIdentifier?.starts(with: "com.apple.") == true {
|
||||
continue
|
||||
}
|
||||
|
||||
do {
|
||||
let success = try await service.quitApplication(identifier: app.name, force: force)
|
||||
if success {
|
||||
quitCount += 1
|
||||
} else {
|
||||
failedApps.append(app.name)
|
||||
}
|
||||
} catch {
|
||||
failedApps.append(app.name)
|
||||
}
|
||||
}
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
let forceText = force ? " (force quit)" : ""
|
||||
|
||||
var message = "✅ Quit \(quitCount) applications\(forceText)"
|
||||
if !failedApps.isEmpty {
|
||||
message += " (failed: \(failedApps.joined(separator: ", ")))"
|
||||
}
|
||||
message += " in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"quit_count": .double(Double(quitCount)),
|
||||
"failed_apps": .array(failedApps.map { .string($0) }),
|
||||
"force_quit": .bool(force),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleRelaunch(
|
||||
service: ApplicationServiceProtocol,
|
||||
name: String?,
|
||||
force: Bool,
|
||||
wait: Double,
|
||||
waitUntilReady: Bool,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
guard let name = name else {
|
||||
return ToolResponse.error("Must specify 'name' for relaunch action")
|
||||
}
|
||||
|
||||
// First, get app info before quitting
|
||||
let originalApp = try await service.findApplication(identifier: name)
|
||||
|
||||
// Quit the application
|
||||
let quitSuccess = try await service.quitApplication(identifier: name, force: force)
|
||||
if !quitSuccess {
|
||||
return ToolResponse.error("Failed to quit \(originalApp.name) for relaunch")
|
||||
}
|
||||
|
||||
// Wait the specified time
|
||||
let waitNanoseconds = UInt64(wait * 1_000_000_000)
|
||||
try await Task.sleep(nanoseconds: waitNanoseconds)
|
||||
|
||||
// Relaunch the application
|
||||
let newApp = try await service.launchApplication(identifier: name)
|
||||
|
||||
if waitUntilReady {
|
||||
// Wait a bit for the app to fully launch
|
||||
try await Task.sleep(nanoseconds: 1_000_000_000) // 1 second
|
||||
}
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
let forceText = force ? " (force quit)" : ""
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Relaunched \(newApp.name)\(forceText) with \(wait)s wait in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"app_name": .string(newApp.name),
|
||||
"old_process_id": .double(Double(originalApp.processIdentifier)),
|
||||
"new_process_id": .double(Double(newApp.processIdentifier)),
|
||||
"bundle_id": newApp.bundleIdentifier != nil ? .string(newApp.bundleIdentifier!) : .null,
|
||||
"wait_time": .double(wait),
|
||||
"force_quit": .bool(force),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleFocus(
|
||||
service: ApplicationServiceProtocol,
|
||||
name: String?,
|
||||
to: String?,
|
||||
cycle: Bool,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
if cycle {
|
||||
// TODO: Implement Cmd+Tab like cycling functionality
|
||||
return ToolResponse.error("Cycle mode not yet implemented")
|
||||
}
|
||||
|
||||
let targetName = to ?? name
|
||||
guard let targetName = targetName else {
|
||||
return ToolResponse.error("Must specify 'name' or 'to' for focus/switch action")
|
||||
}
|
||||
|
||||
let app = try await service.findApplication(identifier: targetName)
|
||||
try await service.activateApplication(identifier: targetName)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Focused \(app.name) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"app_name": .string(app.name),
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"bundle_id": app.bundleIdentifier != nil ? .string(app.bundleIdentifier!) : .null,
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleHide(
|
||||
service: ApplicationServiceProtocol,
|
||||
name: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
guard let name = name else {
|
||||
return ToolResponse.error("Must specify 'name' for hide action")
|
||||
}
|
||||
|
||||
let app = try await service.findApplication(identifier: name)
|
||||
try await service.hideApplication(identifier: name)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Hidden \(app.name) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"app_name": .string(app.name),
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleUnhide(
|
||||
service: ApplicationServiceProtocol,
|
||||
name: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
guard let name = name else {
|
||||
return ToolResponse.error("Must specify 'name' for unhide action")
|
||||
}
|
||||
|
||||
let app = try await service.findApplication(identifier: name)
|
||||
try await service.unhideApplication(identifier: name)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Unhidden \(app.name) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"app_name": .string(app.name),
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleList(
|
||||
service: ApplicationServiceProtocol,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let apps = try await service.listApplications()
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
let appList = apps.data.applications.map { app in
|
||||
var info = "\(app.name) (PID: \(app.processIdentifier))"
|
||||
if let bundleId = app.bundleIdentifier {
|
||||
info += " [\(bundleId)]"
|
||||
}
|
||||
if app.isActive {
|
||||
info += " [ACTIVE]"
|
||||
}
|
||||
if app.isHidden {
|
||||
info += " [HIDDEN]"
|
||||
}
|
||||
return info
|
||||
}.joined(separator: "\n")
|
||||
|
||||
let message = "📱 Running Applications (\(apps.data.applications.count) total):\n\(appList)\n\nCompleted in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"application_count": .double(Double(apps.data.applications.count)),
|
||||
"applications": .array(apps.data.applications.map { app in
|
||||
.object([
|
||||
"name": .string(app.name),
|
||||
"process_id": .double(Double(app.processIdentifier)),
|
||||
"bundle_id": app.bundleIdentifier != nil ? .string(app.bundleIdentifier!) : .null,
|
||||
"is_active": .bool(app.isActive),
|
||||
"is_hidden": .bool(app.isHidden),
|
||||
"window_count": .double(Double(app.windowCount))
|
||||
])
|
||||
}),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
}
|
||||
214
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/ClickTool.swift
Normal file
214
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/ClickTool.swift
Normal file
@ -0,0 +1,214 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for clicking UI elements
|
||||
public struct ClickTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "ClickTool")
|
||||
|
||||
public let name = "click"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Clicks on UI elements or coordinates.
|
||||
Supports element queries, specific IDs from see command, or raw coordinates.
|
||||
Includes smart waiting for elements to become actionable.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"query": SchemaBuilder.string(
|
||||
description: "Optional. Element text or query to click. Will search for matching elements."
|
||||
),
|
||||
"on": SchemaBuilder.string(
|
||||
description: "Optional. Element ID to click (e.g., B1, T2) from see command output."
|
||||
),
|
||||
"coords": SchemaBuilder.string(
|
||||
description: "Optional. Click at specific coordinates in format 'x,y' (e.g., '100,200')."
|
||||
),
|
||||
"session": SchemaBuilder.string(
|
||||
description: "Optional. Session ID from see command. Uses latest session if not specified."
|
||||
),
|
||||
"wait_for": SchemaBuilder.number(
|
||||
description: "Optional. Maximum milliseconds to wait for element to become actionable. Default: 5000.",
|
||||
default: 5000
|
||||
),
|
||||
"double": SchemaBuilder.boolean(
|
||||
description: "Optional. Double-click instead of single click.",
|
||||
default: false
|
||||
),
|
||||
"right": SchemaBuilder.boolean(
|
||||
description: "Optional. Right-click (secondary click) instead of left-click.",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: []
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Validate that at least one target is specified
|
||||
let query = arguments.getString("query")
|
||||
let elementId = arguments.getString("on")
|
||||
let coords = arguments.getString("coords")
|
||||
|
||||
guard query != nil || elementId != nil || coords != nil else {
|
||||
return ToolResponse.error("Must specify either 'query', 'on', or 'coords'")
|
||||
}
|
||||
|
||||
let sessionId = arguments.getString("session")
|
||||
let waitFor = arguments.getNumber("wait_for") ?? 5000
|
||||
let isDouble = arguments.getBool("double") ?? false
|
||||
let isRight = arguments.getBool("right") ?? false
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
// Determine click location
|
||||
let clickLocation: CGPoint
|
||||
let clickedElement: String?
|
||||
|
||||
if let coords = coords {
|
||||
// Parse coordinates
|
||||
let parts = coords.split(separator: ",").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
guard parts.count == 2,
|
||||
let x = Double(parts[0]),
|
||||
let y = Double(parts[1]) else {
|
||||
return ToolResponse.error("Invalid coordinates format. Use 'x,y' (e.g., '100,200')")
|
||||
}
|
||||
clickLocation = CGPoint(x: x, y: y)
|
||||
clickedElement = nil
|
||||
|
||||
} else if let elementId = elementId {
|
||||
// Find element by ID from session
|
||||
guard let session = await getSession(id: sessionId) else {
|
||||
return ToolResponse.error("No active session. Run 'see' command first to capture UI state.")
|
||||
}
|
||||
|
||||
guard let element = await session.getElement(byId: elementId) else {
|
||||
return ToolResponse.error("Element '\(elementId)' not found in current session. Run 'see' command to update UI state.")
|
||||
}
|
||||
|
||||
// Calculate center of element
|
||||
clickLocation = CGPoint(
|
||||
x: element.frame.midX,
|
||||
y: element.frame.midY
|
||||
)
|
||||
clickedElement = "\(element.role): \(element.title ?? element.label ?? "untitled")"
|
||||
|
||||
} else if let query = query {
|
||||
// Search for element by text
|
||||
guard let session = await getSession(id: sessionId) else {
|
||||
return ToolResponse.error("No active session. Run 'see' command first to capture UI state.")
|
||||
}
|
||||
|
||||
// Find matching element
|
||||
let elements = await session.uiElements
|
||||
let matches = elements.filter { element in
|
||||
let searchText = query.lowercased()
|
||||
return element.title?.lowercased().contains(searchText) ?? false ||
|
||||
element.label?.lowercased().contains(searchText) ?? false ||
|
||||
element.value?.lowercased().contains(searchText) ?? false
|
||||
}
|
||||
|
||||
guard !matches.isEmpty else {
|
||||
return ToolResponse.error("No elements found matching query: '\(query)'")
|
||||
}
|
||||
|
||||
// Use first actionable match, or first match if none are actionable
|
||||
let element = matches.first { $0.isActionable } ?? matches.first!
|
||||
|
||||
clickLocation = CGPoint(
|
||||
x: element.frame.midX,
|
||||
y: element.frame.midY
|
||||
)
|
||||
clickedElement = "\(element.role): \(element.title ?? element.label ?? "untitled")"
|
||||
|
||||
} else {
|
||||
return ToolResponse.error("No click target specified")
|
||||
}
|
||||
|
||||
// Perform the click
|
||||
let clickService = PeekabooServices.shared.automation
|
||||
|
||||
if isDouble {
|
||||
try await clickService.click(
|
||||
target: .coordinates(clickLocation),
|
||||
clickType: .double,
|
||||
sessionId: sessionId
|
||||
)
|
||||
} else if isRight {
|
||||
try await clickService.click(
|
||||
target: .coordinates(clickLocation),
|
||||
clickType: .right,
|
||||
sessionId: sessionId
|
||||
)
|
||||
} else {
|
||||
try await clickService.click(
|
||||
target: .coordinates(clickLocation),
|
||||
clickType: .single,
|
||||
sessionId: sessionId
|
||||
)
|
||||
}
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
// Build response
|
||||
var message = "✅ "
|
||||
if isDouble {
|
||||
message += "Double-clicked"
|
||||
} else if isRight {
|
||||
message += "Right-clicked"
|
||||
} else {
|
||||
message += "Clicked"
|
||||
}
|
||||
|
||||
if let element = clickedElement {
|
||||
message += " on \(element)"
|
||||
}
|
||||
message += " at (\(Int(clickLocation.x)), \(Int(clickLocation.y)))"
|
||||
message += " in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
// Break up complex expression for type checker
|
||||
let clickLocationMeta = Value.object([
|
||||
"x": .double(Double(clickLocation.x)),
|
||||
"y": .double(Double(clickLocation.y))
|
||||
])
|
||||
|
||||
let clickedElementMeta: Value = clickedElement != nil ? .string(clickedElement!) : .null
|
||||
|
||||
let metaDict: [String: Value] = [
|
||||
"click_location": clickLocationMeta,
|
||||
"execution_time": .double(executionTime),
|
||||
"clicked_element": clickedElementMeta
|
||||
]
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object(metaDict)
|
||||
)
|
||||
|
||||
} catch {
|
||||
logger.error("Click execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to perform click: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private func getSession(id: String?) async -> UISession? {
|
||||
if let sessionId = id {
|
||||
return await UISessionManager.shared.getSession(id: sessionId)
|
||||
}
|
||||
|
||||
// Get most recent session
|
||||
// For now, return nil - in a real implementation we'd track the most recent session
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,371 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for interacting with system dialogs and alerts
|
||||
public struct DialogTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "DialogTool")
|
||||
|
||||
public let name = "dialog"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Interact with system dialogs and alerts.
|
||||
|
||||
Actions:
|
||||
- click: Click buttons in dialogs
|
||||
- input: Input text into dialog fields
|
||||
- file: Select files in file dialogs
|
||||
- dismiss: Dismiss dialogs
|
||||
- list: List open dialogs
|
||||
|
||||
Handles save/open dialogs, alerts, and other system prompts.
|
||||
|
||||
Examples:
|
||||
- Click OK button: { "action": "click", "button": "OK" }
|
||||
- Input text: { "action": "input", "text": "Hello", "field": "Name" }
|
||||
- Select file: { "action": "file", "path": "/Users/user/document.txt" }
|
||||
- Dismiss dialog: { "action": "dismiss", "force": true }
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"action": SchemaBuilder.string(
|
||||
description: "Action to perform: 'list' to discover dialogs, 'click' to interact with buttons, 'input' for text entry, 'file' for file selection, 'dismiss' to close dialogs",
|
||||
enum: ["list", "click", "input", "file", "dismiss"]
|
||||
),
|
||||
"button": SchemaBuilder.string(
|
||||
description: "Button text to click (for click action)"
|
||||
),
|
||||
"text": SchemaBuilder.string(
|
||||
description: "Text to input (for input action)"
|
||||
),
|
||||
"field": SchemaBuilder.string(
|
||||
description: "Field name/index to target (for input action)"
|
||||
),
|
||||
"clear": SchemaBuilder.boolean(
|
||||
description: "Clear field before input (default: false)",
|
||||
default: false
|
||||
),
|
||||
"path": SchemaBuilder.string(
|
||||
description: "File path to select (for file action)"
|
||||
),
|
||||
"select": SchemaBuilder.string(
|
||||
description: "Multiple file paths to select (for file action)"
|
||||
),
|
||||
"window": SchemaBuilder.string(
|
||||
description: "Window title or index to target"
|
||||
),
|
||||
"name": SchemaBuilder.string(
|
||||
description: "Dialog name to target"
|
||||
),
|
||||
"force": SchemaBuilder.boolean(
|
||||
description: "Force dismiss (for dismiss action)",
|
||||
default: false
|
||||
),
|
||||
"index": SchemaBuilder.number(
|
||||
description: "Dialog index when multiple dialogs are open"
|
||||
)
|
||||
],
|
||||
required: ["action"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let action = arguments.getString("action") else {
|
||||
return ToolResponse.error("Missing required parameter: action")
|
||||
}
|
||||
|
||||
let button = arguments.getString("button")
|
||||
let text = arguments.getString("text")
|
||||
let field = arguments.getString("field")
|
||||
let clear = arguments.getBool("clear") ?? false
|
||||
let path = arguments.getString("path")
|
||||
let select = arguments.getString("select")
|
||||
let window = arguments.getString("window")
|
||||
let name = arguments.getString("name")
|
||||
let force = arguments.getBool("force") ?? false
|
||||
let index = arguments.getInt("index")
|
||||
|
||||
let dialogService = PeekabooServices.shared.dialogs
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
switch action {
|
||||
case "list":
|
||||
return try await handleList(
|
||||
service: dialogService,
|
||||
window: window,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "click":
|
||||
guard let button = button else {
|
||||
return ToolResponse.error("Click action requires 'button' parameter")
|
||||
}
|
||||
return try await handleClick(
|
||||
service: dialogService,
|
||||
button: button,
|
||||
window: window,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "input":
|
||||
guard let text = text else {
|
||||
return ToolResponse.error("Input action requires 'text' parameter")
|
||||
}
|
||||
return try await handleInput(
|
||||
service: dialogService,
|
||||
text: text,
|
||||
field: field,
|
||||
clear: clear,
|
||||
window: window,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "file":
|
||||
return try await handleFile(
|
||||
service: dialogService,
|
||||
path: path,
|
||||
select: select,
|
||||
window: window,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "dismiss":
|
||||
return try await handleDismiss(
|
||||
service: dialogService,
|
||||
force: force,
|
||||
window: window,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
default:
|
||||
return ToolResponse.error("Unknown action: \(action). Supported actions: list, click, input, file, dismiss")
|
||||
}
|
||||
|
||||
} catch {
|
||||
logger.error("Dialog operation execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to \(action) dialog: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Action Handlers
|
||||
|
||||
private func handleList(
|
||||
service: DialogServiceProtocol,
|
||||
window: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let elements = try await service.listDialogElements(windowTitle: window)
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
var content = "✅ Dialog Elements Found in \(String(format: "%.2f", executionTime))s:\n\n"
|
||||
|
||||
// Dialog info
|
||||
content += "📋 **Dialog**: \(elements.dialogInfo.title)\n"
|
||||
content += " Role: \(elements.dialogInfo.role)\n"
|
||||
if let subrole = elements.dialogInfo.subrole {
|
||||
content += " Subrole: \(subrole)\n"
|
||||
}
|
||||
content += " File Dialog: \(elements.dialogInfo.isFileDialog ? "Yes" : "No")\n"
|
||||
content += " Bounds: \(Int(elements.dialogInfo.bounds.origin.x)), \(Int(elements.dialogInfo.bounds.origin.y)), \(Int(elements.dialogInfo.bounds.size.width)) × \(Int(elements.dialogInfo.bounds.size.height))\n\n"
|
||||
|
||||
// Buttons
|
||||
if !elements.buttons.isEmpty {
|
||||
content += "🔘 **Buttons** (\(elements.buttons.count)):\n"
|
||||
for button in elements.buttons {
|
||||
let status = button.isEnabled ? "enabled" : "disabled"
|
||||
let defaultMark = button.isDefault ? " (default)" : ""
|
||||
content += " • \(button.title) (\(status))\(defaultMark)\n"
|
||||
}
|
||||
content += "\n"
|
||||
}
|
||||
|
||||
// Text fields
|
||||
if !elements.textFields.isEmpty {
|
||||
content += "📝 **Text Fields** (\(elements.textFields.count)):\n"
|
||||
for textField in elements.textFields {
|
||||
let title = textField.title ?? "Field \(textField.index)"
|
||||
let value = textField.value ?? ""
|
||||
let placeholder = textField.placeholder.map { " (placeholder: \($0))" } ?? ""
|
||||
let status = textField.isEnabled ? "enabled" : "disabled"
|
||||
content += " • \(title): '\(value)' (\(status))\(placeholder)\n"
|
||||
}
|
||||
content += "\n"
|
||||
}
|
||||
|
||||
// Static texts
|
||||
if !elements.staticTexts.isEmpty {
|
||||
content += "📄 **Static Text** (\(elements.staticTexts.count)):\n"
|
||||
for staticText in elements.staticTexts {
|
||||
content += " • \(staticText)\n"
|
||||
}
|
||||
content += "\n"
|
||||
}
|
||||
|
||||
// Other elements
|
||||
if !elements.otherElements.isEmpty {
|
||||
content += "🔧 **Other Elements** (\(elements.otherElements.count)):\n"
|
||||
for element in elements.otherElements {
|
||||
let title = element.title ?? "Untitled"
|
||||
let value = element.value.map { " = '\($0)'" } ?? ""
|
||||
content += " • \(element.role): \(title)\(value)\n"
|
||||
}
|
||||
}
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(content)],
|
||||
meta: .object([
|
||||
"dialog_title": .string(elements.dialogInfo.title),
|
||||
"dialog_role": .string(elements.dialogInfo.role),
|
||||
"is_file_dialog": .bool(elements.dialogInfo.isFileDialog),
|
||||
"button_count": .double(Double(elements.buttons.count)),
|
||||
"text_field_count": .double(Double(elements.textFields.count)),
|
||||
"static_text_count": .double(Double(elements.staticTexts.count)),
|
||||
"other_element_count": .double(Double(elements.otherElements.count)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleClick(
|
||||
service: DialogServiceProtocol,
|
||||
button: String,
|
||||
window: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let result = try await service.clickButton(buttonText: button, windowTitle: window)
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
if result.success {
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Clicked button '\(button)' in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"button_text": .string(button),
|
||||
"action": .string(result.action.rawValue),
|
||||
"success": .bool(result.success),
|
||||
"execution_time": .double(executionTime),
|
||||
"details": .object(result.details.mapValues { .string($0) })
|
||||
])
|
||||
)
|
||||
} else {
|
||||
return ToolResponse.error("Failed to click button '\(button)': \(result.details["error"] ?? "Unknown error")")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleInput(
|
||||
service: DialogServiceProtocol,
|
||||
text: String,
|
||||
field: String?,
|
||||
clear: Bool,
|
||||
window: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let result = try await service.enterText(
|
||||
text: text,
|
||||
fieldIdentifier: field,
|
||||
clearExisting: clear,
|
||||
windowTitle: window
|
||||
)
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
if result.success {
|
||||
let fieldDesc = field ?? "field"
|
||||
let clearDesc = clear ? " (cleared first)" : ""
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Entered text '\(text)' into \(fieldDesc)\(clearDesc) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"text": .string(text),
|
||||
"field": .string(field ?? ""),
|
||||
"clear": .bool(clear),
|
||||
"action": .string(result.action.rawValue),
|
||||
"success": .bool(result.success),
|
||||
"execution_time": .double(executionTime),
|
||||
"details": .object(result.details.mapValues { .string($0) })
|
||||
])
|
||||
)
|
||||
} else {
|
||||
return ToolResponse.error("Failed to enter text: \(result.details["error"] ?? "Unknown error")")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleFile(
|
||||
service: DialogServiceProtocol,
|
||||
path: String?,
|
||||
select: String?,
|
||||
window: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
// For file dialogs, we need to determine what to do
|
||||
// If path is provided, use it directly
|
||||
// If select is provided, it could be multiple paths (comma-separated)
|
||||
let targetPath = path ?? select
|
||||
|
||||
guard let targetPath = targetPath else {
|
||||
return ToolResponse.error("File action requires either 'path' or 'select' parameter")
|
||||
}
|
||||
|
||||
// Extract filename from path for save dialogs
|
||||
let url = URL(fileURLWithPath: targetPath)
|
||||
let filename = url.lastPathComponent
|
||||
let directoryPath = url.deletingLastPathComponent().path
|
||||
|
||||
let result = try await service.handleFileDialog(
|
||||
path: directoryPath,
|
||||
filename: filename,
|
||||
actionButton: "Save" // Default action button
|
||||
)
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
if result.success {
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Selected file '\(targetPath)' in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"path": .string(targetPath),
|
||||
"filename": .string(filename),
|
||||
"directory": .string(directoryPath),
|
||||
"action": .string(result.action.rawValue),
|
||||
"success": .bool(result.success),
|
||||
"execution_time": .double(executionTime),
|
||||
"details": .object(result.details.mapValues { .string($0) })
|
||||
])
|
||||
)
|
||||
} else {
|
||||
return ToolResponse.error("Failed to select file: \(result.details["error"] ?? "Unknown error")")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleDismiss(
|
||||
service: DialogServiceProtocol,
|
||||
force: Bool,
|
||||
window: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let result = try await service.dismissDialog(force: force, windowTitle: window)
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
if result.success {
|
||||
let method = force ? "force (Escape key)" : "normal"
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Dismissed dialog using \(method) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"force": .bool(force),
|
||||
"action": .string(result.action.rawValue),
|
||||
"success": .bool(result.success),
|
||||
"execution_time": .double(executionTime),
|
||||
"details": .object(result.details.mapValues { .string($0) })
|
||||
])
|
||||
)
|
||||
} else {
|
||||
return ToolResponse.error("Failed to dismiss dialog: \(result.details["error"] ?? "Unknown error")")
|
||||
}
|
||||
}
|
||||
}
|
||||
240
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/DockTool.swift
Normal file
240
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/DockTool.swift
Normal file
@ -0,0 +1,240 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for interacting with the macOS Dock
|
||||
public struct DockTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "DockTool")
|
||||
|
||||
public let name = "dock"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Interact with the macOS Dock - launch apps, show context menus, hide/show dock.
|
||||
Actions: launch, right-click (with menu selection), hide, show, list
|
||||
Can list all dock items including persistent and running applications.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"action": SchemaBuilder.string(
|
||||
description: "Action to perform on the dock",
|
||||
enum: ["launch", "right-click", "hide", "show", "list"]
|
||||
),
|
||||
"app": SchemaBuilder.string(
|
||||
description: "Application name for launch/right-click actions"
|
||||
),
|
||||
"select": SchemaBuilder.string(
|
||||
description: "Menu item to select after right-clicking"
|
||||
),
|
||||
"include_all": SchemaBuilder.boolean(
|
||||
description: "Include all items when listing (default: false)",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: ["action"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let action = arguments.getString("action") else {
|
||||
return ToolResponse.error("Missing required parameter: action")
|
||||
}
|
||||
|
||||
let app = arguments.getString("app")
|
||||
let select = arguments.getString("select")
|
||||
let includeAll = arguments.getBool("include_all") ?? false
|
||||
|
||||
let dockService = PeekabooServices.shared.dock
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
switch action {
|
||||
case "launch":
|
||||
return try await handleLaunch(
|
||||
service: dockService,
|
||||
app: app,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "right-click":
|
||||
return try await handleRightClick(
|
||||
service: dockService,
|
||||
app: app,
|
||||
menuItem: select,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "hide":
|
||||
return try await handleHide(
|
||||
service: dockService,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "show":
|
||||
return try await handleShow(
|
||||
service: dockService,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "list":
|
||||
return try await handleList(
|
||||
service: dockService,
|
||||
includeAll: includeAll,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
default:
|
||||
return ToolResponse.error("Unknown action: \(action). Supported actions: launch, right-click, hide, show, list")
|
||||
}
|
||||
|
||||
} catch {
|
||||
logger.error("Dock operation execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to \(action) dock: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Action Handlers
|
||||
|
||||
private func handleLaunch(
|
||||
service: DockServiceProtocol,
|
||||
app: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
guard let app = app else {
|
||||
return ToolResponse.error("Must specify 'app' for launch action")
|
||||
}
|
||||
|
||||
try await service.launchFromDock(appName: app)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Launched \(app) from dock in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"app_name": .string(app),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleRightClick(
|
||||
service: DockServiceProtocol,
|
||||
app: String?,
|
||||
menuItem: String?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
guard let app = app else {
|
||||
return ToolResponse.error("Must specify 'app' for right-click action")
|
||||
}
|
||||
|
||||
try await service.rightClickDockItem(appName: app, menuItem: menuItem)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
var message = "✅ Right-clicked \(app) in dock"
|
||||
if let menuItem = menuItem {
|
||||
message += " and selected '\(menuItem)'"
|
||||
}
|
||||
message += " in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"app_name": .string(app),
|
||||
"menu_item": menuItem != nil ? .string(menuItem!) : .null,
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleHide(
|
||||
service: DockServiceProtocol,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
try await service.hideDock()
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Hidden dock (enabled auto-hide) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"auto_hide_enabled": .bool(true),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleShow(
|
||||
service: DockServiceProtocol,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
try await service.showDock()
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Shown dock (disabled auto-hide) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"auto_hide_enabled": .bool(false),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleList(
|
||||
service: DockServiceProtocol,
|
||||
includeAll: Bool,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let dockItems = try await service.listDockItems(includeAll: includeAll)
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
let itemList = dockItems.enumerated().map { index, item in
|
||||
var info = "[\(index)] \(item.title) (\(item.itemType.rawValue))"
|
||||
if let isRunning = item.isRunning {
|
||||
info += isRunning ? " [RUNNING]" : " [NOT RUNNING]"
|
||||
}
|
||||
if let bundleId = item.bundleIdentifier {
|
||||
info += " [\(bundleId)]"
|
||||
}
|
||||
return info
|
||||
}.joined(separator: "\n")
|
||||
|
||||
let filterText = includeAll ? "(including separators/spacers)" : "(applications and folders only)"
|
||||
let message = "🚢 Dock Items \(filterText) (\(dockItems.count) total):\n\(itemList)\n\nCompleted in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"dock_item_count": .double(Double(dockItems.count)),
|
||||
"include_all": .bool(includeAll),
|
||||
"dock_items": .array(dockItems.map { item in
|
||||
.object([
|
||||
"index": .double(Double(item.index)),
|
||||
"title": .string(item.title),
|
||||
"item_type": .string(item.itemType.rawValue),
|
||||
"is_running": item.isRunning != nil ? .bool(item.isRunning!) : .null,
|
||||
"bundle_identifier": item.bundleIdentifier != nil ? .string(item.bundleIdentifier!) : .null,
|
||||
"position": item.position != nil ? .object([
|
||||
"x": .double(Double(item.position!.x)),
|
||||
"y": .double(Double(item.position!.y))
|
||||
]) : .null,
|
||||
"size": item.size != nil ? .object([
|
||||
"width": .double(Double(item.size!.width)),
|
||||
"height": .double(Double(item.size!.height))
|
||||
]) : .null
|
||||
])
|
||||
}),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
}
|
||||
310
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/DragTool.swift
Normal file
310
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/DragTool.swift
Normal file
@ -0,0 +1,310 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for performing drag and drop operations between UI elements or coordinates
|
||||
public struct DragTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "DragTool")
|
||||
|
||||
public let name = "drag"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Perform drag and drop operations between UI elements or coordinates.
|
||||
Supports element queries, specific IDs, or raw coordinates for both start and end points.
|
||||
Includes focus options for handling windows in different spaces.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"from": SchemaBuilder.string(
|
||||
description: "Optional. Start element ID or query"
|
||||
),
|
||||
"from_coords": SchemaBuilder.string(
|
||||
description: "Optional. Start coordinates in format 'x,y' (e.g., '100,200')"
|
||||
),
|
||||
"to": SchemaBuilder.string(
|
||||
description: "Optional. End element ID or query"
|
||||
),
|
||||
"to_coords": SchemaBuilder.string(
|
||||
description: "Optional. End coordinates in format 'x,y' (e.g., '300,400')"
|
||||
),
|
||||
"to_app": SchemaBuilder.string(
|
||||
description: "Optional. Target application name when dragging between apps"
|
||||
),
|
||||
"session": SchemaBuilder.string(
|
||||
description: "Optional. Session ID from see command. Uses latest session if not specified"
|
||||
),
|
||||
"duration": SchemaBuilder.number(
|
||||
description: "Optional. Duration in milliseconds (default: 500)",
|
||||
default: 500
|
||||
),
|
||||
"steps": SchemaBuilder.number(
|
||||
description: "Optional. Number of intermediate steps (default: 10)",
|
||||
default: 10
|
||||
),
|
||||
"modifiers": SchemaBuilder.string(
|
||||
description: "Optional. Comma-separated modifiers (cmd, shift, alt, ctrl)"
|
||||
),
|
||||
"auto_focus": SchemaBuilder.boolean(
|
||||
description: "Optional. Auto-focus target window (default: true)",
|
||||
default: true
|
||||
),
|
||||
"bring_to_current_space": SchemaBuilder.boolean(
|
||||
description: "Optional. Bring window to current space",
|
||||
default: false
|
||||
),
|
||||
"space_switch": SchemaBuilder.boolean(
|
||||
description: "Optional. Allow switching spaces",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: []
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Validate that at least one 'from' and one 'to' parameter is specified
|
||||
let fromElement = arguments.getString("from")
|
||||
let fromCoords = arguments.getString("from_coords")
|
||||
let toElement = arguments.getString("to")
|
||||
let toCoords = arguments.getString("to_coords")
|
||||
|
||||
guard fromElement != nil || fromCoords != nil else {
|
||||
return ToolResponse.error("Must specify either 'from' or 'from_coords' for the start point")
|
||||
}
|
||||
|
||||
guard toElement != nil || toCoords != nil else {
|
||||
return ToolResponse.error("Must specify either 'to' or 'to_coords' for the end point")
|
||||
}
|
||||
|
||||
// Parse optional parameters
|
||||
let sessionId = arguments.getString("session")
|
||||
let toApp = arguments.getString("to_app")
|
||||
let duration = Int(arguments.getNumber("duration") ?? 500)
|
||||
let steps = Int(arguments.getNumber("steps") ?? 10)
|
||||
let modifiers = arguments.getString("modifiers")
|
||||
let autoFocus = arguments.getBool("auto_focus") ?? true
|
||||
let bringToCurrentSpace = arguments.getBool("bring_to_current_space") ?? false
|
||||
let spaceSwitch = arguments.getBool("space_switch") ?? false
|
||||
|
||||
// Validate duration and steps
|
||||
guard duration > 0 else {
|
||||
return ToolResponse.error("Duration must be greater than 0")
|
||||
}
|
||||
|
||||
guard duration <= 30000 else {
|
||||
return ToolResponse.error("Duration must be 30 seconds or less to prevent excessive delays")
|
||||
}
|
||||
|
||||
guard steps > 0 else {
|
||||
return ToolResponse.error("Steps must be greater than 0")
|
||||
}
|
||||
|
||||
guard steps <= 100 else {
|
||||
return ToolResponse.error("Steps must be 100 or less to prevent excessive processing")
|
||||
}
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
// Determine start location
|
||||
let (fromPoint, fromDescription) = try await resolveLocation(
|
||||
elementQuery: fromElement,
|
||||
coordinateString: fromCoords,
|
||||
sessionId: sessionId,
|
||||
parameterName: "from"
|
||||
)
|
||||
|
||||
// Determine end location
|
||||
let (toPoint, toDescription) = try await resolveLocation(
|
||||
elementQuery: toElement,
|
||||
coordinateString: toCoords,
|
||||
sessionId: sessionId,
|
||||
parameterName: "to"
|
||||
)
|
||||
|
||||
// Validate that from and to are different
|
||||
guard fromPoint != toPoint else {
|
||||
return ToolResponse.error("Start and end points must be different")
|
||||
}
|
||||
|
||||
// Handle app focus if specified
|
||||
if let toApp = toApp, autoFocus {
|
||||
do {
|
||||
let windowService = PeekabooServices.shared.windows
|
||||
try await windowService.focusWindow(target: .application(toApp))
|
||||
// Small delay to allow app to come to front
|
||||
try await Task.sleep(nanoseconds: 100_000_000) // 100ms
|
||||
} catch {
|
||||
logger.warning("Failed to focus target app '\(toApp)': \(error)")
|
||||
// Continue with drag operation even if focus fails
|
||||
}
|
||||
}
|
||||
|
||||
// Handle space management if needed
|
||||
if bringToCurrentSpace || spaceSwitch {
|
||||
// For now, log the intention - space management would need additional implementation
|
||||
logger.info("Space management requested (bring_to_current_space: \(bringToCurrentSpace), space_switch: \(spaceSwitch))")
|
||||
}
|
||||
|
||||
// Perform the drag operation
|
||||
let automation = PeekabooServices.shared.automation
|
||||
try await automation.drag(
|
||||
from: fromPoint,
|
||||
to: toPoint,
|
||||
duration: duration,
|
||||
steps: steps,
|
||||
modifiers: modifiers
|
||||
)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
// Calculate distance for the response
|
||||
let deltaX = toPoint.x - fromPoint.x
|
||||
let deltaY = toPoint.y - fromPoint.y
|
||||
let distance = sqrt(deltaX * deltaX + deltaY * deltaY)
|
||||
|
||||
// Build response message
|
||||
var message = "✅ Performed drag and drop from \(fromDescription) to \(toDescription)"
|
||||
if let modifiers = modifiers, !modifiers.isEmpty {
|
||||
message += " with modifiers (\(modifiers))"
|
||||
}
|
||||
message += " over \(duration)ms with \(steps) steps"
|
||||
message += " (distance: \(String(format: "%.1f", distance))px)"
|
||||
message += " in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
var metaData: [String: Value] = [
|
||||
"from": .object([
|
||||
"x": .double(Double(fromPoint.x)),
|
||||
"y": .double(Double(fromPoint.y)),
|
||||
"description": .string(fromDescription)
|
||||
]),
|
||||
"to": .object([
|
||||
"x": .double(Double(toPoint.x)),
|
||||
"y": .double(Double(toPoint.y)),
|
||||
"description": .string(toDescription)
|
||||
]),
|
||||
"duration": .double(Double(duration)),
|
||||
"steps": .double(Double(steps)),
|
||||
"distance": .double(distance),
|
||||
"execution_time": .double(executionTime)
|
||||
]
|
||||
|
||||
if let modifiers = modifiers {
|
||||
metaData["modifiers"] = .string(modifiers)
|
||||
}
|
||||
|
||||
if let toApp = toApp {
|
||||
metaData["target_app"] = .string(toApp)
|
||||
}
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object(metaData)
|
||||
)
|
||||
|
||||
} catch let coordinateError as CoordinateParseError {
|
||||
return ToolResponse.error(coordinateError.message)
|
||||
} catch {
|
||||
logger.error("Drag execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to perform drag operation: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private struct CoordinateParseError: Swift.Error {
|
||||
let message: String
|
||||
}
|
||||
|
||||
/// Resolve location from either element query or coordinate string
|
||||
private func resolveLocation(
|
||||
elementQuery: String?,
|
||||
coordinateString: String?,
|
||||
sessionId: String?,
|
||||
parameterName: String
|
||||
) async throws -> (CGPoint, String) {
|
||||
|
||||
if let coords = coordinateString {
|
||||
// Parse coordinates
|
||||
let point = try parseCoordinates(coords, parameterName: parameterName)
|
||||
let description = "(\(Int(point.x)), \(Int(point.y)))"
|
||||
return (point, description)
|
||||
|
||||
} else if let query = elementQuery {
|
||||
// Try to find element by ID first, then by text search
|
||||
guard let session = await getSession(id: sessionId) else {
|
||||
throw CoordinateParseError(message: "No active session. Run 'see' command first to capture UI state.")
|
||||
}
|
||||
|
||||
// Check if it's an element ID (like B1, T2, etc.)
|
||||
if let element = await session.getElement(byId: query) {
|
||||
let point = CGPoint(x: element.frame.midX, y: element.frame.midY)
|
||||
let description = "element \(query) (\(element.role): \(element.title ?? element.label ?? "untitled"))"
|
||||
return (point, description)
|
||||
}
|
||||
|
||||
// Search by text
|
||||
let elements = await session.uiElements
|
||||
let matches = elements.filter { element in
|
||||
let searchText = query.lowercased()
|
||||
return element.title?.lowercased().contains(searchText) ?? false ||
|
||||
element.label?.lowercased().contains(searchText) ?? false ||
|
||||
element.value?.lowercased().contains(searchText) ?? false
|
||||
}
|
||||
|
||||
guard !matches.isEmpty else {
|
||||
throw CoordinateParseError(message: "No elements found matching '\(query)' for \(parameterName)")
|
||||
}
|
||||
|
||||
// Use first actionable match, or first match if none are actionable
|
||||
let element = matches.first { $0.isActionable } ?? matches.first!
|
||||
let point = CGPoint(x: element.frame.midX, y: element.frame.midY)
|
||||
let description = "\(element.role): \(element.title ?? element.label ?? "untitled")"
|
||||
return (point, description)
|
||||
|
||||
} else {
|
||||
throw CoordinateParseError(message: "No location specified for \(parameterName)")
|
||||
}
|
||||
}
|
||||
|
||||
private func parseCoordinates(_ coordString: String, parameterName: String) throws -> CGPoint {
|
||||
let parts = coordString.split(separator: ",").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
|
||||
guard parts.count == 2 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) coordinates format. Use 'x,y' (e.g., '100,200')")
|
||||
}
|
||||
|
||||
guard let x = Double(parts[0]), let y = Double(parts[1]) else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) coordinates. Both x and y must be valid numbers")
|
||||
}
|
||||
|
||||
// Validate coordinates are reasonable (not negative, not extremely large)
|
||||
guard x >= 0 && y >= 0 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) coordinates. Both x and y must be non-negative")
|
||||
}
|
||||
|
||||
guard x <= 20000 && y <= 20000 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) coordinates. Both x and y must be 20000 or less")
|
||||
}
|
||||
|
||||
return CGPoint(x: x, y: y)
|
||||
}
|
||||
|
||||
private func getSession(id: String?) async -> UISession? {
|
||||
if let sessionId = id {
|
||||
return await UISessionManager.shared.getSession(id: sessionId)
|
||||
}
|
||||
|
||||
// Get most recent session
|
||||
// For now, return nil - in a real implementation we'd track the most recent session
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,94 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for pressing keyboard shortcuts and key combinations
|
||||
public struct HotkeyTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "HotkeyTool")
|
||||
|
||||
public let name = "hotkey"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Presses keyboard shortcuts and key combinations.
|
||||
Simulates pressing multiple keys simultaneously like Cmd+C or Ctrl+Shift+T.
|
||||
Keys are pressed in order and released in reverse order.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"keys": SchemaBuilder.string(
|
||||
description: "Comma-separated list of keys to press (e.g., 'cmd,c' for copy, 'cmd,shift,t' for reopen tab). Supported keys: cmd, shift, alt/option, ctrl, fn, a-z, 0-9, space, return, tab, escape, delete, arrow_up, arrow_down, arrow_left, arrow_right, f1-f12."
|
||||
),
|
||||
"hold_duration": SchemaBuilder.number(
|
||||
description: "Optional. Delay between key press and release in milliseconds. Default: 50.",
|
||||
minimum: 0,
|
||||
default: 50
|
||||
)
|
||||
],
|
||||
required: ["keys"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Extract required keys parameter
|
||||
guard let keys = arguments.getString("keys") else {
|
||||
return ToolResponse.error("Missing required parameter: keys")
|
||||
}
|
||||
|
||||
// Validate keys is not empty
|
||||
guard !keys.trimmingCharacters(in: .whitespaces).isEmpty else {
|
||||
return ToolResponse.error("Keys parameter cannot be empty")
|
||||
}
|
||||
|
||||
// Extract optional hold_duration parameter
|
||||
let holdDuration = arguments.getNumber("hold_duration") ?? 50
|
||||
|
||||
// Validate hold_duration
|
||||
guard holdDuration >= 0 else {
|
||||
return ToolResponse.error("hold_duration must be non-negative")
|
||||
}
|
||||
|
||||
// Convert to integer milliseconds
|
||||
let holdDurationMs = Int(holdDuration)
|
||||
guard holdDurationMs <= 10000 else { // Max 10 seconds
|
||||
return ToolResponse.error("hold_duration cannot exceed 10000ms (10 seconds)")
|
||||
}
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
// Execute hotkey using PeekabooServices
|
||||
let hotkeyService = PeekabooServices.shared.automation
|
||||
try await hotkeyService.hotkey(keys: keys, holdDuration: holdDurationMs)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
// Format keys for display
|
||||
let keyArray = keys.split(separator: ",").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
let formattedKeys = keyArray.joined(separator: "+")
|
||||
|
||||
let message = "✅ Pressed \(formattedKeys) (held for \(holdDurationMs)ms) in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"keys": .string(keys),
|
||||
"hold_duration": .double(Double(holdDurationMs)),
|
||||
"execution_time": .double(executionTime),
|
||||
"formatted_keys": .string(formattedKeys)
|
||||
])
|
||||
)
|
||||
|
||||
} catch {
|
||||
logger.error("Hotkey execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to press hotkey combination '\(keys)': \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
}
|
||||
403
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/ImageTool.swift
Normal file
403
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/ImageTool.swift
Normal file
@ -0,0 +1,403 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import AppKit
|
||||
import UniformTypeIdentifiers
|
||||
|
||||
/// MCP tool for capturing screenshots
|
||||
public struct ImageTool: MCPTool {
|
||||
public let name = "image"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Captures macOS screen content and optionally analyzes it. Targets can be entire screen, specific app window, or all windows of an app (via app_target). Supports foreground/background capture. Output via file path or inline Base64 data (format: "data"). If a question is provided, image is analyzed by an AI model (auto-selected from PEEKABOO_AI_PROVIDERS). Window shadows/frames excluded. Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"path": SchemaBuilder.string(
|
||||
description: "Optional. Base absolute path for saving the image."
|
||||
),
|
||||
"format": SchemaBuilder.string(
|
||||
description: "Optional. Output format.",
|
||||
enum: ["png", "jpg", "data"]
|
||||
),
|
||||
"app_target": SchemaBuilder.string(
|
||||
description: "Optional. Specifies the capture target."
|
||||
),
|
||||
"question": SchemaBuilder.string(
|
||||
description: "Optional. If provided, the captured image will be analyzed."
|
||||
),
|
||||
"capture_focus": SchemaBuilder.string(
|
||||
description: "Optional. Focus behavior.",
|
||||
enum: ["background", "auto", "foreground"],
|
||||
default: "auto"
|
||||
)
|
||||
],
|
||||
required: ["path", "format"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
let input = try arguments.decode(ImageInput.self)
|
||||
|
||||
// Parse capture target
|
||||
let target = try parseCaptureTarget(input.appTarget)
|
||||
|
||||
// Determine capture focus
|
||||
let captureFocus = input.captureFocus ?? .auto
|
||||
|
||||
// Normalize format
|
||||
let format = normalizeFormat(input.format ?? .png)
|
||||
|
||||
// Perform capture based on target
|
||||
let captureResults: [CaptureResult]
|
||||
|
||||
switch target {
|
||||
case .screen(let index):
|
||||
let result = try await PeekabooServices.shared.screenCapture.captureScreen(displayIndex: index)
|
||||
captureResults = [result]
|
||||
|
||||
case .frontmost:
|
||||
let result = try await PeekabooServices.shared.screenCapture.captureFrontmost()
|
||||
captureResults = [result]
|
||||
|
||||
case .application(let identifier, let windowIndex):
|
||||
// Handle focus if needed
|
||||
if captureFocus == .foreground {
|
||||
try await PeekabooServices.shared.applications.activateApplication(identifier: identifier)
|
||||
try await Task.sleep(nanoseconds: 50_000_000) // 0.05 seconds
|
||||
}
|
||||
|
||||
if let windowIndex = windowIndex {
|
||||
let result = try await PeekabooServices.shared.screenCapture.captureWindow(
|
||||
appIdentifier: identifier,
|
||||
windowIndex: windowIndex
|
||||
)
|
||||
captureResults = [result]
|
||||
} else {
|
||||
// Capture all windows
|
||||
let windows = try await PeekabooServices.shared.windows.listWindows(target: .application(identifier))
|
||||
var results: [CaptureResult] = []
|
||||
|
||||
for (index, _) in windows.enumerated() {
|
||||
let result = try await PeekabooServices.shared.screenCapture.captureWindow(
|
||||
appIdentifier: identifier,
|
||||
windowIndex: index
|
||||
)
|
||||
results.append(result)
|
||||
}
|
||||
|
||||
captureResults = results
|
||||
}
|
||||
|
||||
case .menubar:
|
||||
// Special case for menu bar
|
||||
let result = try await captureMenuBar()
|
||||
captureResults = [result]
|
||||
}
|
||||
|
||||
// Save images if path provided
|
||||
var savedFiles: [MCPSavedFile] = []
|
||||
|
||||
if let basePath = input.path {
|
||||
for (index, result) in captureResults.enumerated() {
|
||||
let fileName: String
|
||||
if captureResults.count > 1 {
|
||||
fileName = generateFileName(
|
||||
basePath: basePath,
|
||||
index: index,
|
||||
metadata: result.metadata,
|
||||
format: format
|
||||
)
|
||||
} else {
|
||||
fileName = ensureExtension(basePath, format: format)
|
||||
}
|
||||
|
||||
try saveImageData(result.imageData, to: fileName, format: format)
|
||||
|
||||
savedFiles.append(MCPSavedFile(
|
||||
path: fileName,
|
||||
item_label: describeCapture(result.metadata),
|
||||
window_title: result.metadata.windowInfo?.title,
|
||||
window_id: nil,
|
||||
window_index: index,
|
||||
mime_type: format.mimeType
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle analysis if requested
|
||||
if let question = input.question {
|
||||
let imagePath = try savedFiles.first?.path ?? saveTemporaryImage(captureResults.first!.imageData)
|
||||
let analysis = try await analyzeImage(at: imagePath, question: question)
|
||||
|
||||
return ToolResponse.text(
|
||||
analysis.text,
|
||||
meta: .object([
|
||||
"model": .string(analysis.modelUsed),
|
||||
"savedFiles": .array(savedFiles.map { Value.string($0.path) })
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
// Return capture result
|
||||
if format == .data && captureResults.count == 1 {
|
||||
return ToolResponse.image(
|
||||
data: captureResults.first!.imageData,
|
||||
mimeType: "image/png",
|
||||
meta: .object(["savedFiles": .array(savedFiles.map { Value.string($0.path) })])
|
||||
)
|
||||
}
|
||||
|
||||
return ToolResponse.text(
|
||||
buildImageSummary(savedFiles: savedFiles, captureCount: captureResults.count),
|
||||
meta: .object(["savedFiles": .array(savedFiles.map { Value.string($0.path) })])
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Supporting Types
|
||||
|
||||
// Extended format that includes "data" option
|
||||
enum ImageFormatOption: String, Codable {
|
||||
case png
|
||||
case jpg
|
||||
case data // Return as base64 data
|
||||
}
|
||||
|
||||
struct ImageInput: Codable {
|
||||
let path: String?
|
||||
let format: ImageFormatOption?
|
||||
let appTarget: String?
|
||||
let question: String?
|
||||
let captureFocus: CaptureFocus?
|
||||
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case path, format, question
|
||||
case appTarget = "app_target"
|
||||
case captureFocus = "capture_focus"
|
||||
}
|
||||
}
|
||||
|
||||
enum ImageCaptureTarget {
|
||||
case screen(index: Int?)
|
||||
case frontmost
|
||||
case application(identifier: String, windowIndex: Int?)
|
||||
case menubar
|
||||
}
|
||||
|
||||
// MARK: - Helper Functions
|
||||
|
||||
private func parseCaptureTarget(_ appTarget: String?) throws -> ImageCaptureTarget {
|
||||
guard let target = appTarget else {
|
||||
return .screen(index: nil)
|
||||
}
|
||||
|
||||
// Parse screen:N format
|
||||
if target.hasPrefix("screen:") {
|
||||
let indexStr = String(target.dropFirst(7))
|
||||
if let index = Int(indexStr) {
|
||||
return .screen(index: index)
|
||||
}
|
||||
throw PeekabooError.invalidInput("Invalid screen index: \(indexStr)")
|
||||
}
|
||||
|
||||
// Special values
|
||||
switch target.lowercased() {
|
||||
case "", "screen":
|
||||
return .screen(index: nil)
|
||||
case "frontmost":
|
||||
return .frontmost
|
||||
case "menubar":
|
||||
return .menubar
|
||||
default:
|
||||
// Parse app[:window] format
|
||||
let parts = target.split(separator: ":", maxSplits: 1)
|
||||
let appIdentifier = String(parts[0])
|
||||
|
||||
var windowIndex: Int? = nil
|
||||
if parts.count > 1 {
|
||||
if let index = Int(String(parts[1])) {
|
||||
windowIndex = index
|
||||
}
|
||||
}
|
||||
|
||||
return .application(identifier: appIdentifier, windowIndex: windowIndex)
|
||||
}
|
||||
}
|
||||
|
||||
private func normalizeFormat(_ format: ImageFormatOption?) -> ImageFormatOption {
|
||||
guard let format = format else { return .png }
|
||||
|
||||
// The jpeg alias is handled by ImageFormat's Codable implementation
|
||||
return format
|
||||
}
|
||||
|
||||
private func captureMenuBar() async throws -> CaptureResult {
|
||||
// Get main screen bounds
|
||||
guard let mainScreen = NSScreen.main else {
|
||||
throw OperationError.captureFailed(reason: "No main screen available")
|
||||
}
|
||||
|
||||
let screenBounds = mainScreen.frame
|
||||
let menuBarRect = CGRect(
|
||||
x: screenBounds.minX,
|
||||
y: screenBounds.maxY - 24, // Menu bar is 24px high
|
||||
width: screenBounds.width,
|
||||
height: 24
|
||||
)
|
||||
|
||||
return try await PeekabooServices.shared.screenCapture.captureArea(menuBarRect)
|
||||
}
|
||||
|
||||
private func saveImageData(_ data: Data, to path: String, format: ImageFormatOption) throws {
|
||||
let url = URL(fileURLWithPath: path.expandingTildeInPath)
|
||||
|
||||
// Create parent directory if needed
|
||||
let parentDir = url.deletingLastPathComponent()
|
||||
if !FileManager.default.fileExists(atPath: parentDir.path) {
|
||||
try FileManager.default.createDirectory(at: parentDir, withIntermediateDirectories: true)
|
||||
}
|
||||
|
||||
// Convert format if needed
|
||||
let outputData: Data
|
||||
if format.imageFormat == .jpg {
|
||||
// Convert PNG to JPEG
|
||||
guard let image = NSImage(data: data),
|
||||
let tiffData = image.tiffRepresentation,
|
||||
let bitmap = NSBitmapImageRep(data: tiffData),
|
||||
let jpegData = bitmap.representation(using: .jpeg, properties: [.compressionFactor: 0.9]) else {
|
||||
throw OperationError.captureFailed(reason: "Failed to convert image to JPEG")
|
||||
}
|
||||
outputData = jpegData
|
||||
} else {
|
||||
outputData = data
|
||||
}
|
||||
|
||||
try outputData.write(to: url)
|
||||
}
|
||||
|
||||
private func saveTemporaryImage(_ data: Data) throws -> String {
|
||||
let tempDir = FileManager.default.temporaryDirectory
|
||||
let fileName = "peekaboo-\(UUID().uuidString).png"
|
||||
let url = tempDir.appendingPathComponent(fileName)
|
||||
try data.write(to: url)
|
||||
return url.path
|
||||
}
|
||||
|
||||
private func ensureExtension(_ path: String, format: ImageFormatOption) -> String {
|
||||
let expectedExt = format.fileExtension
|
||||
let url = URL(fileURLWithPath: path.expandingTildeInPath)
|
||||
|
||||
if url.pathExtension.lowercased() != expectedExt {
|
||||
return url.deletingPathExtension().appendingPathExtension(expectedExt).path
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
private func generateFileName(basePath: String, index: Int, metadata: CaptureMetadata, format: ImageFormatOption) -> String {
|
||||
let url = URL(fileURLWithPath: basePath.expandingTildeInPath)
|
||||
let basename = url.deletingPathExtension().lastPathComponent
|
||||
let directory = url.deletingLastPathComponent()
|
||||
|
||||
var filename = basename
|
||||
if let appInfo = metadata.applicationInfo {
|
||||
filename += "-\(appInfo.name.replacingOccurrences(of: " ", with: "_"))"
|
||||
}
|
||||
if let windowInfo = metadata.windowInfo {
|
||||
let sanitizedTitle = windowInfo.title
|
||||
.replacingOccurrences(of: "/", with: "_")
|
||||
.replacingOccurrences(of: ":", with: "_")
|
||||
.prefix(50)
|
||||
filename += "-\(sanitizedTitle)"
|
||||
}
|
||||
filename += "-\(index)"
|
||||
|
||||
return directory
|
||||
.appendingPathComponent(filename)
|
||||
.appendingPathExtension(format.fileExtension)
|
||||
.path
|
||||
}
|
||||
|
||||
private func describeCapture(_ metadata: CaptureMetadata) -> String {
|
||||
if let appInfo = metadata.applicationInfo {
|
||||
if let windowInfo = metadata.windowInfo {
|
||||
return "\(appInfo.name) - \(windowInfo.title)"
|
||||
}
|
||||
return appInfo.name
|
||||
}
|
||||
|
||||
if let displayInfo = metadata.displayInfo {
|
||||
return "Screen \(displayInfo.index)"
|
||||
}
|
||||
|
||||
return "Screenshot"
|
||||
}
|
||||
|
||||
private func buildImageSummary(savedFiles: [MCPSavedFile], captureCount: Int) -> String {
|
||||
if savedFiles.isEmpty {
|
||||
return "Captured \(captureCount) image(s)"
|
||||
}
|
||||
|
||||
var lines: [String] = []
|
||||
lines.append("📸 Captured \(captureCount) screenshot(s)")
|
||||
|
||||
for file in savedFiles {
|
||||
lines.append(" • \(file.item_label): \(file.path)")
|
||||
}
|
||||
|
||||
return lines.joined(separator: "\n")
|
||||
}
|
||||
|
||||
private func analyzeImage(at path: String, question: String) async throws -> (text: String, modelUsed: String) {
|
||||
// TODO: Implement AI analysis once AI service is migrated
|
||||
// For now, return a placeholder response
|
||||
throw PeekabooError.operationError(message: "AI analysis not yet implemented in MCP server")
|
||||
}
|
||||
|
||||
// MARK: - Supporting Types
|
||||
|
||||
struct MCPSavedFile {
|
||||
let path: String
|
||||
let item_label: String
|
||||
let window_title: String?
|
||||
let window_id: String?
|
||||
let window_index: Int?
|
||||
let mime_type: String
|
||||
}
|
||||
|
||||
extension String {
|
||||
var expandingTildeInPath: String {
|
||||
return (self as NSString).expandingTildeInPath
|
||||
}
|
||||
}
|
||||
|
||||
extension ImageFormatOption {
|
||||
var mimeType: String {
|
||||
switch self {
|
||||
case .png, .data: return "image/png"
|
||||
case .jpg: return "image/jpeg"
|
||||
}
|
||||
}
|
||||
|
||||
var fileExtension: String {
|
||||
switch self {
|
||||
case .png, .data: return "png"
|
||||
case .jpg: return "jpg"
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to ImageFormat for actual image saving
|
||||
var imageFormat: ImageFormat {
|
||||
switch self {
|
||||
case .png, .data: return .png
|
||||
case .jpg: return .jpg
|
||||
}
|
||||
}
|
||||
}
|
||||
248
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/ListTool.swift
Normal file
248
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/ListTool.swift
Normal file
@ -0,0 +1,248 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import AppKit
|
||||
|
||||
/// MCP tool for listing various system items
|
||||
public struct ListTool: MCPTool {
|
||||
public let name = "list"
|
||||
public let description = """
|
||||
Lists various system items on macOS, providing situational awareness.
|
||||
|
||||
Capabilities:
|
||||
- Running Applications: Get a list of all currently running applications (names and bundle IDs).
|
||||
- Application Windows: For a specific application (identified by name or bundle ID), list its open windows.
|
||||
- Details: Optionally include window IDs, bounds (position and size), and whether a window is off-screen.
|
||||
- Multi-window apps: Clearly lists each window of the target app.
|
||||
- Server Status: Provides information about the Peekaboo MCP server itself (version, configured AI providers).
|
||||
|
||||
Use Cases:
|
||||
- Agent needs to know if 'Photoshop' is running before attempting to automate it.
|
||||
{ "item_type": "running_applications" } // Agent checks if 'Photoshop' is in the list.
|
||||
- Agent wants to find a specific 'Notes' window to capture.
|
||||
{ "item_type": "application_windows", "app": "Notes", "include_window_details": ["ids", "bounds"] }
|
||||
The agent can then use the window title or ID with the 'image' tool.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"item_type": SchemaBuilder.string(
|
||||
description: "Specifies the type of items to list. If omitted or empty, it defaults to 'application_windows' if 'app' is provided, otherwise 'running_applications'. Valid options are:\n- `running_applications`: Lists all currently running applications.\n- `application_windows`: Lists open windows for a specific application. Requires the `app` parameter.\n- `server_status`: Returns information about the Peekaboo MCP server.",
|
||||
enum: ["running_applications", "application_windows", "server_status"]
|
||||
),
|
||||
"app": SchemaBuilder.string(
|
||||
description: "Required when `item_type` is `application_windows`. Specifies the target application by its name (e.g., \"Safari\", \"TextEdit\"), bundle ID, or process ID (e.g., \"PID:663\"). Fuzzy matching is used for names, so partial names may work."
|
||||
),
|
||||
"include_window_details": SchemaBuilder.array(
|
||||
items: SchemaBuilder.string(
|
||||
enum: ["off_screen", "bounds", "ids"]
|
||||
),
|
||||
description: "Optional, only applicable when `item_type` is `application_windows`. Specifies additional details to include for each window. Provide an array of strings. Example: [\"bounds\", \"ids\"].\n- `ids`: Include window ID.\n- `bounds`: Include window position and size (x, y, width, height).\n- `off_screen`: Indicate if the window is currently off-screen."
|
||||
)
|
||||
],
|
||||
required: []
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Determine item type
|
||||
let itemTypeString = arguments.getString("item_type")
|
||||
let app = arguments.getString("app")
|
||||
let includeWindowDetails = arguments.getStringArray("include_window_details")
|
||||
|
||||
// Determine effective item type
|
||||
let effectiveItemType: ItemType
|
||||
if let typeStr = itemTypeString {
|
||||
switch typeStr {
|
||||
case "running_applications":
|
||||
effectiveItemType = .runningApplications
|
||||
case "application_windows":
|
||||
effectiveItemType = .applicationWindows
|
||||
case "server_status":
|
||||
effectiveItemType = .serverStatus
|
||||
default:
|
||||
effectiveItemType = app != nil ? .applicationWindows : .runningApplications
|
||||
}
|
||||
} else {
|
||||
effectiveItemType = app != nil ? .applicationWindows : .runningApplications
|
||||
}
|
||||
|
||||
// Validate parameters
|
||||
if effectiveItemType == .applicationWindows && app == nil {
|
||||
return ToolResponse.error("For 'application_windows', 'app' identifier is required.")
|
||||
}
|
||||
|
||||
// Execute based on type
|
||||
switch effectiveItemType {
|
||||
case .runningApplications:
|
||||
return try await listRunningApplications()
|
||||
case .applicationWindows:
|
||||
return try await listApplicationWindows(app: app!, includeDetails: includeWindowDetails)
|
||||
case .serverStatus:
|
||||
return await getServerStatus()
|
||||
}
|
||||
}
|
||||
|
||||
private func listRunningApplications() async throws -> ToolResponse {
|
||||
do {
|
||||
let output = try await PeekabooServices.shared.applications.listApplications()
|
||||
|
||||
let apps = output.data.applications
|
||||
var summary = "Found \(apps.count) running application\(apps.count != 1 ? "s" : ""):\n\n"
|
||||
|
||||
for (index, app) in apps.enumerated() {
|
||||
summary += "\(index + 1). \(app.name)"
|
||||
if let bundleID = app.bundleIdentifier, !bundleID.isEmpty {
|
||||
summary += " (\(bundleID))"
|
||||
}
|
||||
summary += " - PID: \(app.processIdentifier)"
|
||||
if app.isActive {
|
||||
summary += " [ACTIVE]"
|
||||
}
|
||||
summary += " - Windows: \(app.windowCount)\n"
|
||||
}
|
||||
|
||||
return ToolResponse.text(summary)
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to list applications: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
private func listApplicationWindows(app: String, includeDetails: [String]?) async throws -> ToolResponse {
|
||||
do {
|
||||
// Get windows for the app (the service handles identifier resolution)
|
||||
let output = try await PeekabooServices.shared.applications.listWindows(for: app)
|
||||
|
||||
let windows = output.data.windows
|
||||
let appInfo = output.data.targetApplication
|
||||
|
||||
var summary: String
|
||||
if let appInfo = appInfo {
|
||||
summary = "Found \(windows.count) window\(windows.count != 1 ? "s" : "") for application: \(appInfo.name)"
|
||||
|
||||
if let bundleID = appInfo.bundleIdentifier, !bundleID.isEmpty {
|
||||
summary += " (\(bundleID))"
|
||||
}
|
||||
summary += " - PID: \(appInfo.processIdentifier)\n\n"
|
||||
} else {
|
||||
summary = "Found \(windows.count) window\(windows.count != 1 ? "s" : "") for application: \(app)\n\n"
|
||||
}
|
||||
|
||||
if windows.count > 0 {
|
||||
summary += "Windows:\n"
|
||||
for (index, window) in windows.enumerated() {
|
||||
summary += "\(index + 1). \"\(window.title)\""
|
||||
|
||||
// Add optional details
|
||||
if let details = includeDetails {
|
||||
if details.contains("ids") && window.windowID != 0 {
|
||||
summary += " [ID: \(window.windowID)]"
|
||||
}
|
||||
|
||||
if details.contains("off_screen") {
|
||||
summary += window.isOffScreen ? " [OFF-SCREEN]" : " [ON-SCREEN]"
|
||||
}
|
||||
|
||||
if details.contains("bounds") {
|
||||
let bounds = window.bounds
|
||||
summary += " [\(Int(bounds.origin.x)),\(Int(bounds.origin.y)) \(Int(bounds.width))×\(Int(bounds.height))]"
|
||||
}
|
||||
}
|
||||
|
||||
summary += "\n"
|
||||
}
|
||||
}
|
||||
|
||||
return ToolResponse.text(summary)
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to list windows: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
private func getServerStatus() async -> ToolResponse {
|
||||
var sections: [String] = []
|
||||
|
||||
// 1. Server version
|
||||
sections.append("# Peekaboo MCP Server Status")
|
||||
sections.append("")
|
||||
sections.append("Version: 3.0.0-beta.2")
|
||||
sections.append("Platform: macOS")
|
||||
sections.append("")
|
||||
|
||||
// 2. System Permissions
|
||||
sections.append("## System Permissions")
|
||||
|
||||
let screenRecording = await PeekabooServices.shared.screenCapture.hasScreenRecordingPermission()
|
||||
let accessibility = await PeekabooServices.shared.automation.hasAccessibilityPermission()
|
||||
|
||||
sections.append("- Screen Recording: \(screenRecording ? "✅ Granted" : "❌ Not granted")")
|
||||
sections.append("- Accessibility: \(accessibility ? "✅ Granted" : "❌ Not granted")")
|
||||
sections.append("")
|
||||
|
||||
// 3. AI Provider Status
|
||||
sections.append("## AI Provider Status")
|
||||
|
||||
if let providersString = ProcessInfo.processInfo.environment["PEEKABOO_AI_PROVIDERS"] {
|
||||
sections.append("Configured providers: \(providersString)")
|
||||
} else {
|
||||
sections.append("❌ No AI providers configured")
|
||||
sections.append("Configure PEEKABOO_AI_PROVIDERS environment variable to enable image analysis")
|
||||
}
|
||||
sections.append("")
|
||||
|
||||
// 4. Configuration Issues
|
||||
sections.append("## Configuration Issues")
|
||||
|
||||
var issues: [String] = []
|
||||
|
||||
if !screenRecording {
|
||||
issues.append("❌ Screen Recording permission not granted")
|
||||
}
|
||||
|
||||
if ProcessInfo.processInfo.environment["PEEKABOO_AI_PROVIDERS"] == nil {
|
||||
issues.append("⚠️ No AI providers configured (analysis features will be limited)")
|
||||
}
|
||||
|
||||
if issues.isEmpty {
|
||||
sections.append("✅ No configuration issues detected")
|
||||
} else {
|
||||
for issue in issues {
|
||||
sections.append(issue)
|
||||
}
|
||||
}
|
||||
sections.append("")
|
||||
|
||||
// 5. System Information
|
||||
sections.append("## System Information")
|
||||
sections.append("- Platform: \(ProcessInfo.processInfo.operatingSystemVersionString)")
|
||||
sections.append("- Architecture: \(ProcessInfo.processInfo.processorArchitecture)")
|
||||
|
||||
let fullStatus = sections.joined(separator: "\n")
|
||||
|
||||
return ToolResponse.text(fullStatus)
|
||||
}
|
||||
}
|
||||
|
||||
// Helper enum for item types
|
||||
private enum ItemType {
|
||||
case runningApplications
|
||||
case applicationWindows
|
||||
case serverStatus
|
||||
}
|
||||
|
||||
// Extension to get processor architecture
|
||||
private extension ProcessInfo {
|
||||
var processorArchitecture: String {
|
||||
#if arch(arm64)
|
||||
return "arm64"
|
||||
#elseif arch(x86_64)
|
||||
return "x86_64"
|
||||
#else
|
||||
return "unknown"
|
||||
#endif
|
||||
}
|
||||
}
|
||||
244
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/MenuTool.swift
Normal file
244
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/MenuTool.swift
Normal file
@ -0,0 +1,244 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
/// MCP tool for interacting with application menu bars
|
||||
public struct MenuTool: MCPTool {
|
||||
public let name = "menu"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Interact with application menu bars - list available menus and menu items for an application, or click on a specific menu item using path notation.
|
||||
|
||||
Actions:
|
||||
- list: Discover all available menus and menu items for an application
|
||||
- list-all: List all menus across all applications (for debugging)
|
||||
- click: Click on a specific menu item using path notation
|
||||
- click-extra: Click on a system menu extra (menu bar items)
|
||||
|
||||
Target applications by name (e.g., "Safari"), bundle ID (e.g., "com.apple.Safari"),
|
||||
or process ID (e.g., "PID:663"). Fuzzy matching is supported for application names.
|
||||
|
||||
Examples:
|
||||
- List Chrome menus: { "action": "list", "app": "Google Chrome" }
|
||||
- Save document: { "action": "click", "app": "TextEdit", "path": "File > Save" }
|
||||
- Copy selection: { "action": "click", "app": "Safari", "path": "Edit > Copy" }
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"action": SchemaBuilder.string(
|
||||
description: "Action to perform: 'list' to discover menus, 'click' to interact with menu items, 'click-extra' for system menu extras, 'list-all' for all menus",
|
||||
enum: ["list", "click", "click-extra", "list-all"]
|
||||
),
|
||||
"app": SchemaBuilder.string(
|
||||
description: "Target application name, bundle ID, or process ID (required for list and click actions)"
|
||||
),
|
||||
"path": SchemaBuilder.string(
|
||||
description: "Menu path for nested items (e.g., 'File > Save As...' or 'Edit > Copy')"
|
||||
),
|
||||
"item": SchemaBuilder.string(
|
||||
description: "Simple menu item to click (for non-nested items)"
|
||||
),
|
||||
"title": SchemaBuilder.string(
|
||||
description: "Title of system menu extra (for click-extra action)"
|
||||
)
|
||||
],
|
||||
required: ["action"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let action = arguments.getString("action") else {
|
||||
return ToolResponse.error("Missing required parameter: action")
|
||||
}
|
||||
|
||||
switch action {
|
||||
case "list":
|
||||
return try await handleListAction(arguments: arguments)
|
||||
case "list-all":
|
||||
return try await handleListAllAction()
|
||||
case "click":
|
||||
return try await handleClickAction(arguments: arguments)
|
||||
case "click-extra":
|
||||
return try await handleClickExtraAction(arguments: arguments)
|
||||
default:
|
||||
return ToolResponse.error("Invalid action: \(action). Must be one of: list, click, click-extra, list-all")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Action Handlers
|
||||
|
||||
private func handleListAction(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let app = arguments.getString("app") else {
|
||||
return ToolResponse.error("Missing required parameter: app (required for list action)")
|
||||
}
|
||||
|
||||
do {
|
||||
let menuStructure = try await PeekabooServices.shared.menu.listMenus(for: app)
|
||||
let formattedOutput = formatMenuStructure(menuStructure)
|
||||
|
||||
return ToolResponse.text(
|
||||
formattedOutput,
|
||||
meta: .object([
|
||||
"app": .string(menuStructure.application.name),
|
||||
"total_menus": .int(menuStructure.menus.count),
|
||||
"total_items": .int(menuStructure.totalItems)
|
||||
])
|
||||
)
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to list menus for app '\(app)': \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleListAllAction() async throws -> ToolResponse {
|
||||
// This is a debugging feature - we'll list menus for all running applications
|
||||
do {
|
||||
let apps = try await PeekabooServices.shared.applications.listApplications()
|
||||
var allMenus: [(app: String, menuCount: Int, itemCount: Int)] = []
|
||||
|
||||
for app in apps.data.applications {
|
||||
do {
|
||||
let menuStructure = try await PeekabooServices.shared.menu.listMenus(for: app.name)
|
||||
allMenus.append((
|
||||
app: app.name,
|
||||
menuCount: menuStructure.menus.count,
|
||||
itemCount: menuStructure.totalItems
|
||||
))
|
||||
} catch {
|
||||
// Skip apps that don't have accessible menus
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if allMenus.isEmpty {
|
||||
return ToolResponse.text("No applications with accessible menus found.")
|
||||
}
|
||||
|
||||
var output = "📋 All Application Menus\n\n"
|
||||
for menuInfo in allMenus.sorted(by: { $0.app < $1.app }) {
|
||||
output += "• \(menuInfo.app): \(menuInfo.menuCount) menus, \(menuInfo.itemCount) items\n"
|
||||
}
|
||||
|
||||
return ToolResponse.text(
|
||||
output,
|
||||
meta: .object([
|
||||
"total_apps": .int(allMenus.count),
|
||||
"apps": .array(allMenus.map { .string($0.app) })
|
||||
])
|
||||
)
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to list all menus: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleClickAction(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let app = arguments.getString("app") else {
|
||||
return ToolResponse.error("Missing required parameter: app (required for click action)")
|
||||
}
|
||||
|
||||
// Try path first, then item
|
||||
if let path = arguments.getString("path") {
|
||||
do {
|
||||
try await PeekabooServices.shared.menu.clickMenuItem(app: app, itemPath: path)
|
||||
return ToolResponse.text("✅ Successfully clicked menu item: \(path)")
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to click menu item '\(path)' in app '\(app)': \(error.localizedDescription)")
|
||||
}
|
||||
} else if let item = arguments.getString("item") {
|
||||
do {
|
||||
try await PeekabooServices.shared.menu.clickMenuItemByName(app: app, itemName: item)
|
||||
return ToolResponse.text("✅ Successfully clicked menu item: \(item)")
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to click menu item '\(item)' in app '\(app)': \(error.localizedDescription)")
|
||||
}
|
||||
} else {
|
||||
return ToolResponse.error("Missing required parameter: either 'path' or 'item' must be provided for click action")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleClickExtraAction(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let title = arguments.getString("title") else {
|
||||
return ToolResponse.error("Missing required parameter: title (required for click-extra action)")
|
||||
}
|
||||
|
||||
do {
|
||||
try await PeekabooServices.shared.menu.clickMenuExtra(title: title)
|
||||
return ToolResponse.text("✅ Successfully clicked system menu extra: \(title)")
|
||||
} catch {
|
||||
return ToolResponse.error("Failed to click system menu extra '\(title)': \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Formatting Helpers
|
||||
|
||||
private func formatMenuStructure(_ structure: MenuStructure) -> String {
|
||||
var output = "📋 Menu Structure for \(structure.application.name)\n\n"
|
||||
|
||||
for menu in structure.menus {
|
||||
output += formatMenu(menu, indent: 0)
|
||||
}
|
||||
|
||||
output += "\n📊 Summary: \(structure.menus.count) menus, \(structure.totalItems) total items"
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
private func formatMenu(_ menu: Menu, indent: Int) -> String {
|
||||
let indentStr = String(repeating: " ", count: indent)
|
||||
var output = "\(indentStr)📁 \(menu.title)"
|
||||
|
||||
if !menu.isEnabled {
|
||||
output += " (disabled)"
|
||||
}
|
||||
|
||||
output += "\n"
|
||||
|
||||
for item in menu.items {
|
||||
output += formatMenuItem(item, indent: indent + 1)
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
private func formatMenuItem(_ item: MenuItem, indent: Int) -> String {
|
||||
let indentStr = String(repeating: " ", count: indent)
|
||||
var output = ""
|
||||
|
||||
if item.isSeparator {
|
||||
output += "\(indentStr)┈┈┈┈┈┈┈┈┈┈\n"
|
||||
return output
|
||||
}
|
||||
|
||||
let icon = item.submenu.isEmpty ? "•" : "📂"
|
||||
output += "\(indentStr)\(icon) \(item.title)"
|
||||
|
||||
// Add keyboard shortcut if available
|
||||
if let shortcut = item.keyboardShortcut {
|
||||
output += " (\(shortcut.displayString))"
|
||||
}
|
||||
|
||||
// Add state indicators
|
||||
var indicators: [String] = []
|
||||
if !item.isEnabled { indicators.append("disabled") }
|
||||
if item.isChecked { indicators.append("checked") }
|
||||
|
||||
if !indicators.isEmpty {
|
||||
output += " [\(indicators.joined(separator: ", "))]"
|
||||
}
|
||||
|
||||
output += "\n"
|
||||
|
||||
// Add submenu items
|
||||
for subitem in item.submenu {
|
||||
output += formatMenuItem(subitem, indent: indent + 1)
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
}
|
||||
239
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/MoveTool.swift
Normal file
239
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/MoveTool.swift
Normal file
@ -0,0 +1,239 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
#if canImport(AppKit)
|
||||
import AppKit
|
||||
#endif
|
||||
|
||||
/// MCP tool for moving the mouse cursor
|
||||
public struct MoveTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "MoveTool")
|
||||
|
||||
public let name = "move"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Move the mouse cursor to a specific position or UI element.
|
||||
Supports absolute coordinates, UI element targeting, or centering on screen.
|
||||
Can animate movement smoothly over a specified duration.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"to": SchemaBuilder.string(
|
||||
description: "Optional. Coordinates in format 'x,y' (e.g., '100,200') or 'center' to center on screen."
|
||||
),
|
||||
"coordinates": SchemaBuilder.string(
|
||||
description: "Optional. Alias for 'to' - coordinates in format 'x,y' (e.g., '100,200')."
|
||||
),
|
||||
"id": SchemaBuilder.string(
|
||||
description: "Optional. Element ID to move to (from see command output)."
|
||||
),
|
||||
"session": SchemaBuilder.string(
|
||||
description: "Optional. Session ID from see command. Uses latest session if not specified."
|
||||
),
|
||||
"center": SchemaBuilder.boolean(
|
||||
description: "Optional. Move to center of screen.",
|
||||
default: false
|
||||
),
|
||||
"smooth": SchemaBuilder.boolean(
|
||||
description: "Optional. Use smooth animated movement.",
|
||||
default: false
|
||||
),
|
||||
"duration": SchemaBuilder.number(
|
||||
description: "Optional. Duration in milliseconds for smooth movement. Default: 500.",
|
||||
default: 500
|
||||
),
|
||||
"steps": SchemaBuilder.number(
|
||||
description: "Optional. Number of steps for smooth movement. Default: 10.",
|
||||
default: 10
|
||||
)
|
||||
],
|
||||
required: []
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Validate that at least one target is specified
|
||||
let toCoords = arguments.getString("to")
|
||||
let coordinates = arguments.getString("coordinates")
|
||||
let elementId = arguments.getString("id")
|
||||
let centerScreen = arguments.getBool("center") ?? false
|
||||
|
||||
guard toCoords != nil || coordinates != nil || elementId != nil || centerScreen else {
|
||||
return ToolResponse.error("Must specify either 'to', 'coordinates', 'id', or 'center'")
|
||||
}
|
||||
|
||||
// Parse optional parameters
|
||||
let sessionId = arguments.getString("session")
|
||||
let useSmooth = arguments.getBool("smooth") ?? false
|
||||
let duration = Int(arguments.getNumber("duration") ?? 500)
|
||||
let steps = Int(arguments.getNumber("steps") ?? 10)
|
||||
|
||||
// Validate duration and steps for smooth movement
|
||||
if useSmooth {
|
||||
guard duration > 0 else {
|
||||
return ToolResponse.error("Duration must be greater than 0")
|
||||
}
|
||||
|
||||
guard duration <= 30000 else {
|
||||
return ToolResponse.error("Duration must be 30 seconds or less to prevent excessive delays")
|
||||
}
|
||||
|
||||
guard steps > 0 else {
|
||||
return ToolResponse.error("Steps must be greater than 0")
|
||||
}
|
||||
|
||||
guard steps <= 100 else {
|
||||
return ToolResponse.error("Steps must be 100 or less to prevent excessive processing")
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
// Determine target location
|
||||
let targetLocation: CGPoint
|
||||
let targetDescription: String
|
||||
|
||||
if centerScreen {
|
||||
// Move to center of screen
|
||||
targetLocation = try getCenterOfScreen()
|
||||
targetDescription = "center of screen"
|
||||
|
||||
} else if let coordString = toCoords ?? coordinates {
|
||||
// Parse coordinates or handle "center" string
|
||||
if coordString.lowercased() == "center" {
|
||||
targetLocation = try getCenterOfScreen()
|
||||
targetDescription = "center of screen"
|
||||
} else {
|
||||
targetLocation = try parseCoordinates(coordString, parameterName: "coordinates")
|
||||
targetDescription = "coordinates (\(Int(targetLocation.x)), \(Int(targetLocation.y)))"
|
||||
}
|
||||
|
||||
} else if let elementId = elementId {
|
||||
// Find element by ID from session
|
||||
guard let session = await getSession(id: sessionId) else {
|
||||
return ToolResponse.error("No active session. Run 'see' command first to capture UI state.")
|
||||
}
|
||||
|
||||
guard let element = await session.getElement(byId: elementId) else {
|
||||
return ToolResponse.error("Element '\(elementId)' not found in current session. Run 'see' command to update UI state.")
|
||||
}
|
||||
|
||||
// Calculate center of element
|
||||
targetLocation = CGPoint(
|
||||
x: element.frame.midX,
|
||||
y: element.frame.midY
|
||||
)
|
||||
targetDescription = "element \(elementId) (\(element.role): \(element.title ?? element.label ?? "untitled"))"
|
||||
|
||||
} else {
|
||||
return ToolResponse.error("No target specified")
|
||||
}
|
||||
|
||||
// Perform the mouse movement
|
||||
let automation = PeekabooServices.shared.automation
|
||||
|
||||
if useSmooth {
|
||||
try await automation.moveMouse(to: targetLocation, duration: duration, steps: steps)
|
||||
} else {
|
||||
// For non-smooth movement, use duration=0 and steps=1 for instant movement
|
||||
try await automation.moveMouse(to: targetLocation, duration: 0, steps: 1)
|
||||
}
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
// Build response message
|
||||
var message = "✅ Moved mouse cursor to \(targetDescription)"
|
||||
if useSmooth {
|
||||
message += " with smooth animation (\(duration)ms, \(steps) steps)"
|
||||
}
|
||||
message += " in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"target_location": .object([
|
||||
"x": .double(Double(targetLocation.x)),
|
||||
"y": .double(Double(targetLocation.y))
|
||||
]),
|
||||
"target_description": .string(targetDescription),
|
||||
"smooth": .bool(useSmooth),
|
||||
"duration": .double(Double(duration)),
|
||||
"steps": .double(Double(steps)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
|
||||
} catch let coordinateError as CoordinateParseError {
|
||||
return ToolResponse.error(coordinateError.message)
|
||||
} catch {
|
||||
logger.error("Mouse movement execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to move mouse: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private struct CoordinateParseError: Swift.Error {
|
||||
let message: String
|
||||
}
|
||||
|
||||
private func parseCoordinates(_ coordString: String, parameterName: String) throws -> CGPoint {
|
||||
let parts = coordString.split(separator: ",").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
|
||||
guard parts.count == 2 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) format. Use 'x,y' (e.g., '100,200') or 'center'")
|
||||
}
|
||||
|
||||
guard let x = Double(parts[0]), let y = Double(parts[1]) else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName). Both x and y must be valid numbers")
|
||||
}
|
||||
|
||||
// Validate coordinates are reasonable (not negative, not extremely large)
|
||||
guard x >= 0 && y >= 0 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName). Both x and y must be non-negative")
|
||||
}
|
||||
|
||||
guard x <= 20000 && y <= 20000 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName). Both x and y must be 20000 or less")
|
||||
}
|
||||
|
||||
return CGPoint(x: x, y: y)
|
||||
}
|
||||
|
||||
private func getCenterOfScreen() throws -> CGPoint {
|
||||
#if canImport(AppKit)
|
||||
guard let mainScreen = NSScreen.main else {
|
||||
throw CoordinateParseError(message: "Unable to determine main screen dimensions")
|
||||
}
|
||||
|
||||
let screenFrame = mainScreen.frame
|
||||
return CGPoint(
|
||||
x: screenFrame.midX,
|
||||
y: screenFrame.midY
|
||||
)
|
||||
#else
|
||||
// Fallback for non-AppKit environments
|
||||
throw CoordinateParseError(message: "Screen center calculation not supported in this environment")
|
||||
#endif
|
||||
}
|
||||
|
||||
private func getSession(id: String?) async -> UISession? {
|
||||
if let sessionId = id {
|
||||
return await UISessionManager.shared.getSession(id: sessionId)
|
||||
}
|
||||
|
||||
// Get most recent session
|
||||
// For now, return nil - in a real implementation we'd track the most recent session
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,57 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
/// MCP tool for checking macOS system permissions
|
||||
public struct PermissionsTool: MCPTool {
|
||||
public let name = "permissions"
|
||||
public let description = """
|
||||
Check macOS system permissions required for automation.
|
||||
Verifies both Screen Recording and Accessibility permissions.
|
||||
Returns the current permission status for each required permission.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [:],
|
||||
required: []
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Get permissions from PeekabooCore services
|
||||
let screenRecording = await PeekabooServices.shared.screenCapture.hasScreenRecordingPermission()
|
||||
let accessibility = await PeekabooServices.shared.automation.hasAccessibilityPermission()
|
||||
|
||||
// Build response text
|
||||
var lines: [String] = []
|
||||
lines.append("macOS Permissions Status:")
|
||||
lines.append("")
|
||||
lines.append("Screen Recording: \(screenRecording ? "✅ Granted" : "❌ Not Granted")")
|
||||
lines.append("Accessibility: \(accessibility ? "✅ Granted" : "⚠️ Not Granted (Optional)")")
|
||||
|
||||
if !screenRecording {
|
||||
lines.append("")
|
||||
lines.append("⚠️ Screen Recording permission is REQUIRED for capturing screenshots.")
|
||||
lines.append("Grant via: System Settings > Privacy & Security > Screen Recording")
|
||||
}
|
||||
|
||||
if !accessibility {
|
||||
lines.append("")
|
||||
lines.append("ℹ️ Accessibility permission is optional but needed for UI automation.")
|
||||
lines.append("Grant via: System Settings > Privacy & Security > Accessibility")
|
||||
}
|
||||
|
||||
let responseText = lines.joined(separator: "\n")
|
||||
|
||||
// Return error response if required permissions are missing
|
||||
if !screenRecording {
|
||||
return ToolResponse.error(responseText)
|
||||
}
|
||||
|
||||
return ToolResponse.text(responseText)
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,159 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for scrolling UI elements or at current mouse position
|
||||
public struct ScrollTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "ScrollTool")
|
||||
|
||||
public let name = "scroll"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Scrolls the mouse wheel in any direction.
|
||||
Can target specific elements or scroll at current mouse position.
|
||||
Supports smooth scrolling and configurable speed.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"direction": SchemaBuilder.string(
|
||||
description: "Scroll direction: up (content moves up), down (content moves down), left, or right.",
|
||||
enum: ["up", "down", "left", "right"]
|
||||
),
|
||||
"on": SchemaBuilder.string(
|
||||
description: "Optional. Element ID to scroll on (from see command). If not specified, scrolls at current mouse position."
|
||||
),
|
||||
"session": SchemaBuilder.string(
|
||||
description: "Optional. Session ID from see command. Uses latest session if not specified."
|
||||
),
|
||||
"amount": SchemaBuilder.number(
|
||||
description: "Optional. Number of scroll ticks/lines. Default: 3.",
|
||||
default: 3
|
||||
),
|
||||
"delay": SchemaBuilder.number(
|
||||
description: "Optional. Delay between scroll ticks in milliseconds. Default: 2.",
|
||||
default: 2
|
||||
),
|
||||
"smooth": SchemaBuilder.boolean(
|
||||
description: "Optional. Use smooth scrolling with smaller increments.",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: ["direction"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Parse required parameters
|
||||
guard let directionString = arguments.getString("direction") else {
|
||||
return ToolResponse.error("Direction is required")
|
||||
}
|
||||
|
||||
guard let direction = parseScrollDirection(directionString) else {
|
||||
return ToolResponse.error("Invalid direction. Must be one of: up, down, left, right")
|
||||
}
|
||||
|
||||
// Parse optional parameters
|
||||
let elementId = arguments.getString("on")
|
||||
let sessionId = arguments.getString("session")
|
||||
let amount = Int(arguments.getNumber("amount") ?? 3)
|
||||
let delay = Int(arguments.getNumber("delay") ?? 2)
|
||||
let smooth = arguments.getBool("smooth") ?? false
|
||||
|
||||
// Validate amount
|
||||
guard amount > 0 else {
|
||||
return ToolResponse.error("Amount must be greater than 0")
|
||||
}
|
||||
|
||||
guard amount <= 50 else {
|
||||
return ToolResponse.error("Amount must be 50 or less to prevent excessive scrolling")
|
||||
}
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
let automation = PeekabooServices.shared.automation
|
||||
|
||||
// Determine target for scrolling
|
||||
var targetDescription = "at current mouse position"
|
||||
|
||||
if let elementId = elementId {
|
||||
// Find element from session and scroll on it
|
||||
guard let session = await getSession(id: sessionId) else {
|
||||
return ToolResponse.error("No active session. Run 'see' command first to capture UI state.")
|
||||
}
|
||||
|
||||
guard let element = await session.getElement(byId: elementId) else {
|
||||
return ToolResponse.error("Element '\(elementId)' not found in current session. Run 'see' command to update UI state.")
|
||||
}
|
||||
|
||||
targetDescription = "on \(element.role): \(element.title ?? element.label ?? "untitled")"
|
||||
|
||||
// Use element ID as target for the scroll service
|
||||
try await automation.scroll(
|
||||
direction: direction,
|
||||
amount: amount,
|
||||
target: elementId,
|
||||
smooth: smooth,
|
||||
delay: delay,
|
||||
sessionId: sessionId
|
||||
)
|
||||
} else {
|
||||
// Scroll at current mouse position
|
||||
try await automation.scroll(
|
||||
direction: direction,
|
||||
amount: amount,
|
||||
target: nil,
|
||||
smooth: smooth,
|
||||
delay: delay,
|
||||
sessionId: sessionId
|
||||
)
|
||||
}
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
// Build response message
|
||||
let scrollDescription = smooth ? "smooth scroll" : "scroll"
|
||||
let message = "✅ Performed \(scrollDescription) \(direction) (\(amount) ticks) \(targetDescription) in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse.text(message)
|
||||
|
||||
} catch {
|
||||
logger.error("Scroll execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to perform scroll: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private func parseScrollDirection(_ direction: String) -> ScrollDirection? {
|
||||
switch direction.lowercased() {
|
||||
case "up":
|
||||
return .up
|
||||
case "down":
|
||||
return .down
|
||||
case "left":
|
||||
return .left
|
||||
case "right":
|
||||
return .right
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
private func getSession(id: String?) async -> UISession? {
|
||||
if let sessionId = id {
|
||||
return await UISessionManager.shared.getSession(id: sessionId)
|
||||
}
|
||||
|
||||
// Get most recent session
|
||||
// For now, return nil - in a real implementation we'd track the most recent session
|
||||
return nil
|
||||
}
|
||||
}
|
||||
386
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/SeeTool.swift
Normal file
386
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/SeeTool.swift
Normal file
@ -0,0 +1,386 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for capturing UI state and element detection
|
||||
public struct SeeTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "SeeTool")
|
||||
|
||||
public let name = "see"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Captures a screenshot and analyzes UI elements for automation.
|
||||
Returns UI element map with Peekaboo IDs (B1 for buttons, T1 for text fields, etc.)
|
||||
that can be used with interaction commands.
|
||||
Creates or updates a session for tracking UI state across multiple commands.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"app_target": SchemaBuilder.string(
|
||||
description: """
|
||||
Optional. Specifies the capture target (same as image tool).
|
||||
For example:
|
||||
Omit or use an empty string (e.g., '') for all screens.
|
||||
Use 'screen:INDEX' (e.g., 'screen:0') for a specific display.
|
||||
Use 'frontmost' for all windows of the current foreground application.
|
||||
Use 'AppName' (e.g., 'Safari') for all windows of that application.
|
||||
Use 'PID:PROCESS_ID' (e.g., 'PID:663') to target a specific process by its PID.
|
||||
"""
|
||||
),
|
||||
"path": SchemaBuilder.string(
|
||||
description: "Optional. Path to save the screenshot. If not provided, uses a temporary file."
|
||||
),
|
||||
"session": SchemaBuilder.string(
|
||||
description: "Optional. Session ID for UI automation state tracking. Creates new session if not provided."
|
||||
),
|
||||
"annotate": SchemaBuilder.boolean(
|
||||
description: "Optional. If true, generates an annotated screenshot with interaction markers and IDs.",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: []
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Parse input
|
||||
let appTarget = arguments.getString("app_target")
|
||||
let path = arguments.getString("path")
|
||||
let sessionId = arguments.getString("session")
|
||||
let annotate = arguments.getBool("annotate") ?? false
|
||||
|
||||
do {
|
||||
// Create or get session
|
||||
let session = try await getOrCreateSession(sessionId: sessionId)
|
||||
|
||||
// Parse capture target
|
||||
let target = try parseCaptureTarget(appTarget)
|
||||
|
||||
// Capture screenshot
|
||||
let screenshotPath = try await captureScreenshot(
|
||||
target: target,
|
||||
path: path,
|
||||
session: session
|
||||
)
|
||||
|
||||
// Detect UI elements
|
||||
let elements = try await detectUIElements(
|
||||
target: target,
|
||||
session: session
|
||||
)
|
||||
|
||||
// Generate annotated screenshot if requested
|
||||
let annotatedPath: String?
|
||||
if annotate {
|
||||
annotatedPath = try await generateAnnotatedScreenshot(
|
||||
originalPath: screenshotPath,
|
||||
elements: elements,
|
||||
session: session
|
||||
)
|
||||
} else {
|
||||
annotatedPath = nil
|
||||
}
|
||||
|
||||
// Build response
|
||||
let summary = await buildSummary(
|
||||
session: session,
|
||||
elements: elements,
|
||||
screenshotPath: annotatedPath ?? screenshotPath,
|
||||
target: target
|
||||
)
|
||||
|
||||
var content: [MCP.Tool.Content] = [.text(summary)]
|
||||
|
||||
// Add annotated screenshot as base64 if requested
|
||||
if annotate, let annotatedPath = annotatedPath {
|
||||
let imageData = try Data(contentsOf: URL(fileURLWithPath: annotatedPath))
|
||||
content.append(.image(data: imageData.base64EncodedString(), mimeType: "image/png", metadata: nil))
|
||||
}
|
||||
|
||||
return ToolResponse(
|
||||
content: content,
|
||||
meta: .object([
|
||||
"session_id": .string(session.id),
|
||||
"element_count": .double(Double(elements.count)),
|
||||
"actionable_count": .double(Double(elements.filter { $0.isActionable }.count))
|
||||
])
|
||||
)
|
||||
|
||||
} catch {
|
||||
logger.error("See tool execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to capture UI state: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private func getOrCreateSession(sessionId: String?) async throws -> UISession {
|
||||
if let sessionId = sessionId {
|
||||
// Try to get existing session
|
||||
if let existingSession = await UISessionManager.shared.getSession(id: sessionId) {
|
||||
return existingSession
|
||||
}
|
||||
}
|
||||
|
||||
// Create new session
|
||||
return await UISessionManager.shared.createSession()
|
||||
}
|
||||
|
||||
private func parseCaptureTarget(_ appTarget: String?) throws -> CaptureTarget {
|
||||
guard let target = appTarget else {
|
||||
return .screen(index: nil)
|
||||
}
|
||||
|
||||
// Parse screen:N format
|
||||
if target.hasPrefix("screen:") {
|
||||
let indexStr = String(target.dropFirst(7))
|
||||
if let index = Int(indexStr) {
|
||||
return .screen(index: index)
|
||||
}
|
||||
throw PeekabooError.invalidInput("Invalid screen index: \(indexStr)")
|
||||
}
|
||||
|
||||
// Special values
|
||||
switch target.lowercased() {
|
||||
case "", "screen":
|
||||
return .screen(index: nil)
|
||||
case "frontmost":
|
||||
return .frontmost
|
||||
default:
|
||||
// Parse PID:N format
|
||||
if target.hasPrefix("PID:") {
|
||||
let pidStr = String(target.dropFirst(4))
|
||||
if let pid = Int32(pidStr) {
|
||||
return .window(app: "PID:\(pid)", index: nil)
|
||||
}
|
||||
throw PeekabooError.invalidInput("Invalid PID: \(pidStr)")
|
||||
}
|
||||
|
||||
// Otherwise treat as app name
|
||||
return .window(app: target, index: nil)
|
||||
}
|
||||
}
|
||||
|
||||
private func captureScreenshot(target: CaptureTarget, path: String?, session: UISession) async throws -> String {
|
||||
let screenshotPath = path ?? FileManager.default.temporaryDirectory
|
||||
.appendingPathComponent("peekaboo-see-\(Date().timeIntervalSince1970).png")
|
||||
.path
|
||||
|
||||
// Use screen capture service
|
||||
let captureResult: CaptureResult
|
||||
switch target {
|
||||
case .screen(let index):
|
||||
captureResult = try await PeekabooServices.shared.screenCapture.captureScreen(displayIndex: index)
|
||||
case .frontmost:
|
||||
captureResult = try await PeekabooServices.shared.screenCapture.captureFrontmost()
|
||||
case .window(let identifier, _):
|
||||
// Capture first window of the app
|
||||
let windows = try await PeekabooServices.shared.windows.listWindows(target: .application(identifier))
|
||||
guard !windows.isEmpty else {
|
||||
throw PeekabooError.windowNotFound(criteria: "No windows found for application: \(identifier)")
|
||||
}
|
||||
captureResult = try await PeekabooServices.shared.screenCapture.captureWindow(
|
||||
appIdentifier: identifier,
|
||||
windowIndex: 0
|
||||
)
|
||||
case .area(_):
|
||||
throw PeekabooError.invalidInput("Area capture not supported for see tool")
|
||||
}
|
||||
|
||||
// Save the image
|
||||
try captureResult.imageData.write(to: URL(fileURLWithPath: screenshotPath))
|
||||
|
||||
// Store in session
|
||||
await session.setScreenshot(path: screenshotPath, metadata: captureResult.metadata)
|
||||
|
||||
return screenshotPath
|
||||
}
|
||||
|
||||
private func detectUIElements(target: CaptureTarget, session: UISession) async throws -> [UIElement] {
|
||||
// Get the application info for element detection
|
||||
let appInfo: ServiceApplicationInfo?
|
||||
switch target {
|
||||
case .frontmost:
|
||||
appInfo = try await PeekabooServices.shared.applications.getFrontmostApplication()
|
||||
case .window(let appIdentifier, _):
|
||||
let apps = try await PeekabooServices.shared.applications.listApplications()
|
||||
appInfo = apps.data.applications.first { app in
|
||||
app.name == appIdentifier ||
|
||||
app.bundleIdentifier == appIdentifier ||
|
||||
(appIdentifier.hasPrefix("PID:") && "PID:\(app.processIdentifier)" == appIdentifier)
|
||||
}
|
||||
default:
|
||||
appInfo = nil
|
||||
}
|
||||
|
||||
guard let appInfo = appInfo else {
|
||||
// No specific app, return empty elements
|
||||
return []
|
||||
}
|
||||
|
||||
// Use automation service for element detection
|
||||
// For now, just return empty elements since we need proper integration
|
||||
// TODO: Call actual detectElements on UIAutomationService with captured image data
|
||||
|
||||
// Convert to UI elements with empty data for now
|
||||
var elements: [UIElement] = []
|
||||
|
||||
// Store in session
|
||||
await session.setUIElements(elements)
|
||||
|
||||
return elements
|
||||
}
|
||||
|
||||
// Removed getRolePrefix - no longer needed after refactoring to use main UIElement struct
|
||||
|
||||
private func generateAnnotatedScreenshot(
|
||||
originalPath: String,
|
||||
elements: [UIElement],
|
||||
session: UISession
|
||||
) async throws -> String {
|
||||
// For now, just return the original path
|
||||
// TODO: Implement actual annotation with element markers
|
||||
logger.info("Annotation not yet implemented, returning original screenshot")
|
||||
return originalPath
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private func buildSummary(
|
||||
session: UISession,
|
||||
elements: [UIElement],
|
||||
screenshotPath: String,
|
||||
target: CaptureTarget
|
||||
) async -> String {
|
||||
var lines: [String] = []
|
||||
|
||||
lines.append("📸 UI State Captured")
|
||||
lines.append("Session ID: \(session.id)")
|
||||
|
||||
// Add app/window info if available
|
||||
if let metadata = await session.screenshotMetadata {
|
||||
if let appInfo = metadata.applicationInfo {
|
||||
lines.append("Application: \(appInfo.name)")
|
||||
}
|
||||
if let windowInfo = metadata.windowInfo {
|
||||
lines.append("Window: \(windowInfo.title)")
|
||||
}
|
||||
}
|
||||
|
||||
lines.append("Screenshot: \(screenshotPath)")
|
||||
lines.append("Elements found: \(elements.count)")
|
||||
|
||||
// Group elements by role
|
||||
let elementsByRole = Dictionary(grouping: elements, by: { $0.role })
|
||||
|
||||
lines.append("\nUI Elements:")
|
||||
|
||||
for (role, roleElements) in elementsByRole.sorted(by: { $0.key < $1.key }) {
|
||||
let actionableCount = roleElements.filter { $0.isActionable }.count
|
||||
lines.append("\n\(role) (\(roleElements.count) found, \(actionableCount) actionable):")
|
||||
|
||||
for element in roleElements {
|
||||
var parts = [" \(element.id)"]
|
||||
|
||||
if let title = element.title {
|
||||
parts.append("\"\(title)\"")
|
||||
} else if let label = element.label {
|
||||
parts.append("\"\(label)\"")
|
||||
} else if let value = element.value {
|
||||
parts.append("value: \"\(value)\"")
|
||||
}
|
||||
|
||||
parts.append("at (\(Int(element.frame.origin.x)), \(Int(element.frame.origin.y)))")
|
||||
|
||||
if !element.isActionable {
|
||||
parts.append("[not actionable]")
|
||||
}
|
||||
|
||||
lines.append(parts.joined(separator: " - "))
|
||||
}
|
||||
}
|
||||
|
||||
lines.append("\nUse element IDs (B1, T1, etc.) with click, type, and other interaction commands.")
|
||||
|
||||
return lines.joined(separator: "\n")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Supporting Types
|
||||
|
||||
// Using CaptureTarget from PeekabooServices - no need to redefine
|
||||
// Note: menubar case is not available in the main CaptureTarget enum
|
||||
|
||||
// Using the main UIElement from Session.swift - no need to redefine
|
||||
|
||||
// MARK: - UI Session Management
|
||||
|
||||
actor UISession {
|
||||
let id: String
|
||||
private(set) var screenshotPath: String?
|
||||
private(set) var screenshotMetadata: CaptureMetadata?
|
||||
private(set) var uiElements: [UIElement] = []
|
||||
private(set) var createdAt: Date
|
||||
private(set) var lastAccessedAt: Date
|
||||
|
||||
init() {
|
||||
self.id = UUID().uuidString
|
||||
self.createdAt = Date()
|
||||
self.lastAccessedAt = Date()
|
||||
}
|
||||
|
||||
func setScreenshot(path: String, metadata: CaptureMetadata) {
|
||||
self.screenshotPath = path
|
||||
self.screenshotMetadata = metadata
|
||||
self.lastAccessedAt = Date()
|
||||
}
|
||||
|
||||
func setUIElements(_ elements: [UIElement]) {
|
||||
self.uiElements = elements
|
||||
self.lastAccessedAt = Date()
|
||||
}
|
||||
|
||||
func getElement(byId id: String) -> UIElement? {
|
||||
return uiElements.first { $0.id == id }
|
||||
}
|
||||
}
|
||||
|
||||
actor UISessionManager {
|
||||
static let shared = UISessionManager()
|
||||
|
||||
private var sessions: [String: UISession] = [:]
|
||||
|
||||
private init() {}
|
||||
|
||||
func createSession() -> UISession {
|
||||
let session = UISession()
|
||||
sessions[session.id] = session
|
||||
return session
|
||||
}
|
||||
|
||||
func getSession(id: String) -> UISession? {
|
||||
return sessions[id]
|
||||
}
|
||||
|
||||
func removeSession(id: String) {
|
||||
sessions.removeValue(forKey: id)
|
||||
}
|
||||
|
||||
func cleanupOldSessions(olderThan timeInterval: TimeInterval = 3600) async {
|
||||
let cutoffDate = Date().addingTimeInterval(-timeInterval)
|
||||
var newSessions: [String: UISession] = [:]
|
||||
for (id, session) in sessions {
|
||||
if await session.lastAccessedAt > cutoffDate {
|
||||
newSessions[id] = session
|
||||
}
|
||||
}
|
||||
sessions = newSessions
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,53 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
/// MCP tool for pausing execution
|
||||
public struct SleepTool: MCPTool {
|
||||
public let name = "sleep"
|
||||
public let description = """
|
||||
Pauses execution for a specified duration.
|
||||
Useful for waiting between UI actions or allowing animations to complete.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"duration": SchemaBuilder.number(
|
||||
description: "Sleep duration in milliseconds."
|
||||
)
|
||||
],
|
||||
required: ["duration"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Extract duration using the helper method
|
||||
guard let duration = arguments.getNumber("duration") else {
|
||||
return ToolResponse.error("Missing required parameter: duration")
|
||||
}
|
||||
|
||||
// Validate duration
|
||||
guard duration > 0 else {
|
||||
return ToolResponse.error("Duration must be positive")
|
||||
}
|
||||
|
||||
// Convert to reasonable integer value
|
||||
let milliseconds = Int(duration)
|
||||
guard milliseconds <= 600_000 else { // Max 10 minutes
|
||||
return ToolResponse.error("Duration cannot exceed 600000ms (10 minutes)")
|
||||
}
|
||||
|
||||
let startTime = Date()
|
||||
|
||||
// Perform sleep
|
||||
try await Task.sleep(nanoseconds: UInt64(milliseconds) * 1_000_000)
|
||||
|
||||
let actualDuration = Date().timeIntervalSince(startTime) * 1000 // Convert to ms
|
||||
let seconds = Double(milliseconds) / 1000.0
|
||||
|
||||
return ToolResponse.text("✅ Paused for \(seconds)s (requested: \(milliseconds)ms, actual: \(Int(actualDuration))ms)")
|
||||
}
|
||||
}
|
||||
330
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/SpaceTool.swift
Normal file
330
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/SpaceTool.swift
Normal file
@ -0,0 +1,330 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for managing macOS Spaces (virtual desktops)
|
||||
public struct SpaceTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "SpaceTool")
|
||||
|
||||
public let name = "space"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Manage macOS Spaces (virtual desktops).
|
||||
|
||||
Actions:
|
||||
- list: List spaces with detailed information
|
||||
- switch: Switch to a specific space
|
||||
- move-window: Move windows between spaces
|
||||
|
||||
Supports moving windows with optional follow behavior to switch along with the window.
|
||||
|
||||
Examples:
|
||||
- List spaces: { "action": "list" }
|
||||
- List with details: { "action": "list", "detailed": true }
|
||||
- Switch to space 2: { "action": "switch", "to": 2 }
|
||||
- Move window to space 3: { "action": "move-window", "app": "Safari", "to": 3 }
|
||||
- Move window to current space: { "action": "move-window", "app": "TextEdit", "to_current": true }
|
||||
- Move and follow: { "action": "move-window", "app": "Terminal", "to": 2, "follow": true }
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"action": SchemaBuilder.string(
|
||||
description: "The action to perform",
|
||||
enum: ["list", "switch", "move-window"]
|
||||
),
|
||||
"to": SchemaBuilder.number(
|
||||
description: "Space number to switch to (for switch action)"
|
||||
),
|
||||
"app": SchemaBuilder.string(
|
||||
description: "Application name for move-window action"
|
||||
),
|
||||
"window_title": SchemaBuilder.string(
|
||||
description: "Window title to move"
|
||||
),
|
||||
"window_index": SchemaBuilder.number(
|
||||
description: "Window index for multi-window apps"
|
||||
),
|
||||
"to_current": SchemaBuilder.boolean(
|
||||
description: "Move window to current space (for move-window action)",
|
||||
default: false
|
||||
),
|
||||
"follow": SchemaBuilder.boolean(
|
||||
description: "Follow the window to the new space (for move-window action)",
|
||||
default: false
|
||||
),
|
||||
"detailed": SchemaBuilder.boolean(
|
||||
description: "Show detailed space information (for list action)",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: ["action"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let action = arguments.getString("action") else {
|
||||
return ToolResponse.error("Missing required parameter: action")
|
||||
}
|
||||
|
||||
let to = arguments.getNumber("to")
|
||||
let appName = arguments.getString("app")
|
||||
let windowTitle = arguments.getString("window_title")
|
||||
let windowIndex = arguments.getInt("window_index")
|
||||
let toCurrent = arguments.getBool("to_current") ?? false
|
||||
let follow = arguments.getBool("follow") ?? false
|
||||
let detailed = arguments.getBool("detailed") ?? false
|
||||
|
||||
let spaceService = SpaceManagementService()
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
switch action {
|
||||
case "list":
|
||||
return try await handleList(
|
||||
service: spaceService,
|
||||
detailed: detailed,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "switch":
|
||||
guard let spaceNumber = to else {
|
||||
return ToolResponse.error("Switch action requires 'to' parameter (space number)")
|
||||
}
|
||||
return try await handleSwitch(
|
||||
service: spaceService,
|
||||
spaceNumber: Int(spaceNumber),
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "move-window":
|
||||
guard let appName = appName else {
|
||||
return ToolResponse.error("Move-window action requires 'app' parameter")
|
||||
}
|
||||
|
||||
if toCurrent && to != nil {
|
||||
return ToolResponse.error("Cannot specify both 'to_current' and 'to' parameters")
|
||||
}
|
||||
|
||||
if !toCurrent && to == nil {
|
||||
return ToolResponse.error("Move-window action requires either 'to' (space number) or 'to_current' parameter")
|
||||
}
|
||||
|
||||
return try await handleMoveWindow(
|
||||
service: spaceService,
|
||||
appName: appName,
|
||||
windowTitle: windowTitle,
|
||||
windowIndex: windowIndex,
|
||||
targetSpaceNumber: to != nil ? Int(to!) : nil,
|
||||
toCurrent: toCurrent,
|
||||
follow: follow,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
default:
|
||||
return ToolResponse.error("Unknown action: \(action). Supported actions: list, switch, move-window")
|
||||
}
|
||||
|
||||
} catch {
|
||||
logger.error("Space operation execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to \(action): \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Action Handlers
|
||||
|
||||
@MainActor
|
||||
private func handleList(
|
||||
service: SpaceManagementService,
|
||||
detailed: Bool,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let spaces = service.getAllSpaces()
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
if spaces.isEmpty {
|
||||
return ToolResponse(
|
||||
content: [.text("No Spaces found")],
|
||||
meta: .object([
|
||||
"count": .double(0),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
var output = "Found \(spaces.count) Space(s):\n\n"
|
||||
|
||||
for (index, space) in spaces.enumerated() {
|
||||
let spaceNumber = index + 1
|
||||
let activeIndicator = space.isActive ? " (Active)" : ""
|
||||
|
||||
output += "Space \(spaceNumber)\(activeIndicator):\n"
|
||||
|
||||
if detailed {
|
||||
output += " • ID: \(space.id)\n"
|
||||
output += " • Type: \(space.type.rawValue)\n"
|
||||
if let displayID = space.displayID {
|
||||
output += " • Display: \(displayID)\n"
|
||||
}
|
||||
if let name = space.name, !name.isEmpty {
|
||||
output += " • Name: \(name)\n"
|
||||
}
|
||||
if !space.ownerPIDs.isEmpty {
|
||||
output += " • Owner PIDs: \(space.ownerPIDs.map(String.init).joined(separator: ", "))\n"
|
||||
}
|
||||
} else {
|
||||
output += " • Type: \(space.type.rawValue)\n"
|
||||
}
|
||||
|
||||
output += "\n"
|
||||
}
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(output.trimmingCharacters(in: .whitespacesAndNewlines))],
|
||||
meta: .object([
|
||||
"count": .double(Double(spaces.count)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private func handleSwitch(
|
||||
service: SpaceManagementService,
|
||||
spaceNumber: Int,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let spaces = service.getAllSpaces()
|
||||
|
||||
guard spaceNumber > 0, spaceNumber <= spaces.count else {
|
||||
return ToolResponse.error("Invalid space number. Available spaces: 1-\(spaces.count)")
|
||||
}
|
||||
|
||||
let targetSpace = spaces[spaceNumber - 1]
|
||||
|
||||
// Check if already on the target space
|
||||
if targetSpace.isActive {
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
return ToolResponse(
|
||||
content: [.text("Already on Space \(spaceNumber)")],
|
||||
meta: .object([
|
||||
"space_number": .double(Double(spaceNumber)),
|
||||
"space_id": .double(Double(targetSpace.id)),
|
||||
"was_already_active": .bool(true),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
try await service.switchToSpace(targetSpace.id)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Switched to Space \(spaceNumber) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"space_number": .double(Double(spaceNumber)),
|
||||
"space_id": .double(Double(targetSpace.id)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private func handleMoveWindow(
|
||||
service: SpaceManagementService,
|
||||
appName: String,
|
||||
windowTitle: String?,
|
||||
windowIndex: Int?,
|
||||
targetSpaceNumber: Int?,
|
||||
toCurrent: Bool,
|
||||
follow: Bool,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let windowService = PeekabooServices.shared.windows
|
||||
|
||||
// Find the target window
|
||||
let windowTarget = try createWindowTarget(app: appName, title: windowTitle, index: windowIndex)
|
||||
let windows = try await windowService.listWindows(target: windowTarget)
|
||||
|
||||
guard let windowInfo = windows.first else {
|
||||
return ToolResponse.error("No matching window found for app '\(appName)'")
|
||||
}
|
||||
|
||||
let windowID = UInt32(windowInfo.windowID)
|
||||
|
||||
if toCurrent {
|
||||
// Move to current space
|
||||
try service.moveWindowToCurrentSpace(windowID: windowID)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Moved window '\(windowInfo.title)' to current Space in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"moved_to_current": .bool(true),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
} else {
|
||||
// Move to specific space
|
||||
guard let targetSpaceNumber = targetSpaceNumber else {
|
||||
return ToolResponse.error("Internal error: targetSpaceNumber is nil")
|
||||
}
|
||||
|
||||
let spaces = service.getAllSpaces()
|
||||
|
||||
guard targetSpaceNumber > 0, targetSpaceNumber <= spaces.count else {
|
||||
return ToolResponse.error("Invalid space number. Available spaces: 1-\(spaces.count)")
|
||||
}
|
||||
|
||||
let targetSpace = spaces[targetSpaceNumber - 1]
|
||||
|
||||
try service.moveWindowToSpace(windowID: windowID, spaceID: targetSpace.id)
|
||||
|
||||
// If follow is true, switch to the target space
|
||||
if follow {
|
||||
try await service.switchToSpace(targetSpace.id)
|
||||
}
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
let followText = follow ? " and switched to Space \(targetSpaceNumber)" : ""
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Moved window '\(windowInfo.title)' to Space \(targetSpaceNumber)\(followText) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"target_space_number": .double(Double(targetSpaceNumber)),
|
||||
"target_space_id": .double(Double(targetSpace.id)),
|
||||
"followed": .bool(follow),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Helper Methods
|
||||
|
||||
private func createWindowTarget(app: String, title: String?, index: Int?) throws -> WindowTarget {
|
||||
if let title = title {
|
||||
return .applicationAndTitle(app: app, title: title)
|
||||
}
|
||||
|
||||
if let index = index {
|
||||
return .index(app: app, index: index)
|
||||
}
|
||||
|
||||
return .application(app)
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,47 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
// Temporary stub implementations for tools not yet migrated
|
||||
// TODO: Implement each tool properly
|
||||
|
||||
// AnalyzeTool has been migrated to its own file
|
||||
|
||||
// ListTool has been migrated to its own file
|
||||
|
||||
// PermissionsTool has been migrated to its own file
|
||||
|
||||
// SeeTool has been migrated to its own file
|
||||
|
||||
// ClickTool has been migrated to its own file
|
||||
|
||||
// TypeTool has been migrated to its own file
|
||||
|
||||
// ScrollTool has been migrated to its own file
|
||||
|
||||
// HotkeyTool has been migrated to its own file
|
||||
|
||||
// SwipeTool has been migrated to its own file
|
||||
|
||||
// DragTool has been migrated to its own file
|
||||
|
||||
// MoveTool has been migrated to its own file
|
||||
|
||||
// AppTool has been migrated to its own file
|
||||
|
||||
// WindowTool has been migrated to its own file
|
||||
|
||||
// MenuTool has been migrated to its own file
|
||||
|
||||
// RunTool removed - security risk, allows arbitrary script execution
|
||||
|
||||
// SleepTool has been migrated to its own file
|
||||
|
||||
// CleanTool removed - internal maintenance tool, not for external use
|
||||
|
||||
// AgentTool has been migrated to its own file
|
||||
|
||||
// DockTool has been migrated to its own file
|
||||
|
||||
// DialogTool has been migrated to its own file
|
||||
|
||||
// SpaceTool has been migrated to its own file
|
||||
165
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/SwipeTool.swift
Normal file
165
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/SwipeTool.swift
Normal file
@ -0,0 +1,165 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for performing swipe/drag gestures
|
||||
public struct SwipeTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "SwipeTool")
|
||||
|
||||
public let name = "swipe"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Performs a swipe/drag gesture from one point to another.
|
||||
Useful for dragging elements, swiping through content, or gesture-based interactions.
|
||||
Creates smooth movement with configurable duration.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"from": SchemaBuilder.string(
|
||||
description: "Starting coordinates in format 'x,y' (e.g., '100,200')."
|
||||
),
|
||||
"to": SchemaBuilder.string(
|
||||
description: "Ending coordinates in format 'x,y' (e.g., '300,400')."
|
||||
),
|
||||
"duration": SchemaBuilder.number(
|
||||
description: "Optional. Duration of the swipe in milliseconds. Default: 500.",
|
||||
default: 500
|
||||
),
|
||||
"steps": SchemaBuilder.number(
|
||||
description: "Optional. Number of intermediate steps for smooth movement. Default: 10.",
|
||||
default: 10
|
||||
)
|
||||
],
|
||||
required: ["from", "to"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
// Parse required parameters
|
||||
guard let fromString = arguments.getString("from") else {
|
||||
return ToolResponse.error("'from' parameter is required")
|
||||
}
|
||||
|
||||
guard let toString = arguments.getString("to") else {
|
||||
return ToolResponse.error("'to' parameter is required")
|
||||
}
|
||||
|
||||
// Parse optional parameters
|
||||
let duration = Int(arguments.getNumber("duration") ?? 500)
|
||||
let steps = Int(arguments.getNumber("steps") ?? 10)
|
||||
|
||||
// Validate duration
|
||||
guard duration > 0 else {
|
||||
return ToolResponse.error("Duration must be greater than 0")
|
||||
}
|
||||
|
||||
guard duration <= 30000 else {
|
||||
return ToolResponse.error("Duration must be 30 seconds or less to prevent excessive delays")
|
||||
}
|
||||
|
||||
// Validate steps
|
||||
guard steps > 0 else {
|
||||
return ToolResponse.error("Steps must be greater than 0")
|
||||
}
|
||||
|
||||
guard steps <= 100 else {
|
||||
return ToolResponse.error("Steps must be 100 or less to prevent excessive processing")
|
||||
}
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
// Parse 'from' coordinates
|
||||
let fromPoint = try parseCoordinates(fromString, parameterName: "from")
|
||||
|
||||
// Parse 'to' coordinates
|
||||
let toPoint = try parseCoordinates(toString, parameterName: "to")
|
||||
|
||||
// Validate that from and to are different
|
||||
guard fromPoint != toPoint else {
|
||||
return ToolResponse.error("'from' and 'to' coordinates must be different")
|
||||
}
|
||||
|
||||
// Perform the drag/swipe gesture
|
||||
let automation = PeekabooServices.shared.automation
|
||||
try await automation.drag(
|
||||
from: fromPoint,
|
||||
to: toPoint,
|
||||
duration: duration,
|
||||
steps: steps,
|
||||
modifiers: nil
|
||||
)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
// Calculate distance for the response
|
||||
let deltaX = toPoint.x - fromPoint.x
|
||||
let deltaY = toPoint.y - fromPoint.y
|
||||
let distance = sqrt(deltaX * deltaX + deltaY * deltaY)
|
||||
|
||||
// Build response message
|
||||
let message = "✅ Performed swipe from (\(Int(fromPoint.x)), \(Int(fromPoint.y))) to (\(Int(toPoint.x)), \(Int(toPoint.y))) over \(duration)ms with \(steps) steps (distance: \(String(format: "%.1f", distance))px) in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"from": .object([
|
||||
"x": .double(Double(fromPoint.x)),
|
||||
"y": .double(Double(fromPoint.y))
|
||||
]),
|
||||
"to": .object([
|
||||
"x": .double(Double(toPoint.x)),
|
||||
"y": .double(Double(toPoint.y))
|
||||
]),
|
||||
"duration": .double(Double(duration)),
|
||||
"steps": .double(Double(steps)),
|
||||
"distance": .double(distance),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
|
||||
} catch let coordinateError as CoordinateParseError {
|
||||
return ToolResponse.error(coordinateError.message)
|
||||
} catch {
|
||||
logger.error("Swipe execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to perform swipe: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private struct CoordinateParseError: Swift.Error {
|
||||
let message: String
|
||||
}
|
||||
|
||||
private func parseCoordinates(_ coordString: String, parameterName: String) throws -> CGPoint {
|
||||
let parts = coordString.split(separator: ",").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
|
||||
guard parts.count == 2 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) coordinates format. Use 'x,y' (e.g., '100,200')")
|
||||
}
|
||||
|
||||
guard let x = Double(parts[0]), let y = Double(parts[1]) else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) coordinates. Both x and y must be valid numbers")
|
||||
}
|
||||
|
||||
// Validate coordinates are reasonable (not negative, not extremely large)
|
||||
guard x >= 0 && y >= 0 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) coordinates. Both x and y must be non-negative")
|
||||
}
|
||||
|
||||
guard x <= 10000 && y <= 10000 else {
|
||||
throw CoordinateParseError(message: "Invalid \(parameterName) coordinates. Both x and y must be 10000 or less")
|
||||
}
|
||||
|
||||
return CGPoint(x: x, y: y)
|
||||
}
|
||||
}
|
||||
207
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/TypeTool.swift
Normal file
207
Core/PeekabooCore/Sources/PeekabooCore/MCP/Tools/TypeTool.swift
Normal file
@ -0,0 +1,207 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for typing text
|
||||
public struct TypeTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "TypeTool")
|
||||
|
||||
public let name = "type"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Types text into UI elements or at current focus.
|
||||
Supports special keys ({return}, {tab}, etc.) and configurable typing speed.
|
||||
Can target specific elements or type at current keyboard focus.
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"text": SchemaBuilder.string(
|
||||
description: "The text to type. If not specified, can use special key flags instead."
|
||||
),
|
||||
"on": SchemaBuilder.string(
|
||||
description: "Optional. Element ID to type into (from see command). If not specified, types at current focus."
|
||||
),
|
||||
"session": SchemaBuilder.string(
|
||||
description: "Optional. Session ID from see command. Uses latest session if not specified."
|
||||
),
|
||||
"delay": SchemaBuilder.number(
|
||||
description: "Optional. Delay between keystrokes in milliseconds. Default: 5.",
|
||||
default: 5
|
||||
),
|
||||
"clear": SchemaBuilder.boolean(
|
||||
description: "Optional. Clear the field before typing (Cmd+A, Delete).",
|
||||
default: false
|
||||
),
|
||||
"press_return": SchemaBuilder.boolean(
|
||||
description: "Optional. Press return/enter after typing.",
|
||||
default: false
|
||||
),
|
||||
"tab": SchemaBuilder.number(
|
||||
description: "Optional. Press tab N times."
|
||||
),
|
||||
"escape": SchemaBuilder.boolean(
|
||||
description: "Optional. Press escape key.",
|
||||
default: false
|
||||
),
|
||||
"delete": SchemaBuilder.boolean(
|
||||
description: "Optional. Press delete/backspace key.",
|
||||
default: false
|
||||
)
|
||||
],
|
||||
required: []
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
let text = arguments.getString("text")
|
||||
let elementId = arguments.getString("on")
|
||||
let sessionId = arguments.getString("session")
|
||||
let delay = Int(arguments.getNumber("delay") ?? 5)
|
||||
let clear = arguments.getBool("clear") ?? false
|
||||
let pressReturn = arguments.getBool("press_return") ?? false
|
||||
let tabCount = arguments.getNumber("tab").map { Int($0) }
|
||||
let escape = arguments.getBool("escape") ?? false
|
||||
let delete = arguments.getBool("delete") ?? false
|
||||
|
||||
// Validate that something will be typed
|
||||
guard text != nil || tabCount != nil || escape || delete || pressReturn else {
|
||||
return ToolResponse.error("Must specify text to type or special key actions")
|
||||
}
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
let automation = PeekabooServices.shared.automation
|
||||
|
||||
// Focus on element if specified
|
||||
if let elementId = elementId {
|
||||
guard let session = await getSession(id: sessionId) else {
|
||||
return ToolResponse.error("No active session. Run 'see' command first to capture UI state.")
|
||||
}
|
||||
|
||||
guard let element = await session.getElement(byId: elementId) else {
|
||||
return ToolResponse.error("Element '\(elementId)' not found in current session. Run 'see' command to update UI state.")
|
||||
}
|
||||
|
||||
// Click on the element to focus it
|
||||
let clickLocation = CGPoint(
|
||||
x: element.frame.midX,
|
||||
y: element.frame.midY
|
||||
)
|
||||
// Use proper click API with target and sessionId
|
||||
try await automation.click(
|
||||
target: .coordinates(clickLocation),
|
||||
clickType: .single,
|
||||
sessionId: sessionId
|
||||
)
|
||||
|
||||
// Small delay after clicking
|
||||
try await Task.sleep(nanoseconds: 100_000_000) // 0.1 seconds
|
||||
}
|
||||
|
||||
// Clear field if requested
|
||||
if clear {
|
||||
// Select all (Cmd+A)
|
||||
try await automation.hotkey(keys: "cmd,a", holdDuration: 50)
|
||||
try await Task.sleep(nanoseconds: 50_000_000) // 0.05 seconds
|
||||
|
||||
// Delete
|
||||
try await automation.hotkey(keys: "delete", holdDuration: 50)
|
||||
try await Task.sleep(nanoseconds: 50_000_000) // 0.05 seconds
|
||||
}
|
||||
|
||||
// Type the text
|
||||
if let text = text {
|
||||
try await automation.type(text: text, target: nil, clearExisting: false, typingDelay: Int(delay), sessionId: sessionId)
|
||||
}
|
||||
|
||||
// Press tab if requested
|
||||
if let tabCount = tabCount {
|
||||
for _ in 0..<tabCount {
|
||||
try await automation.hotkey(keys: "tab", holdDuration: 50)
|
||||
if tabCount > 1 {
|
||||
try await Task.sleep(nanoseconds: UInt64(delay) * 1_000_000)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Press escape if requested
|
||||
if escape {
|
||||
try await automation.hotkey(keys: "escape", holdDuration: 50)
|
||||
}
|
||||
|
||||
// Press delete if requested
|
||||
if delete {
|
||||
try await automation.hotkey(keys: "delete", holdDuration: 50)
|
||||
}
|
||||
|
||||
// Press return if requested
|
||||
if pressReturn {
|
||||
try await automation.hotkey(keys: "return", holdDuration: 50)
|
||||
}
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
// Build response message
|
||||
var actions: [String] = []
|
||||
|
||||
if clear {
|
||||
actions.append("Cleared field")
|
||||
}
|
||||
|
||||
if let text = text {
|
||||
let displayText = text.count > 50 ? String(text.prefix(50)) + "..." : text
|
||||
actions.append("Typed: \"\(displayText)\"")
|
||||
}
|
||||
|
||||
if let tabCount = tabCount {
|
||||
actions.append("Pressed Tab \(tabCount) time\(tabCount != 1 ? "s" : "")")
|
||||
}
|
||||
|
||||
if escape {
|
||||
actions.append("Pressed Escape")
|
||||
}
|
||||
|
||||
if delete {
|
||||
actions.append("Pressed Delete")
|
||||
}
|
||||
|
||||
if pressReturn {
|
||||
actions.append("Pressed Return")
|
||||
}
|
||||
|
||||
let message = "✅ " + actions.joined(separator: ", ") + " in \(String(format: "%.2f", executionTime))s"
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text(message)],
|
||||
meta: .object([
|
||||
"execution_time": .double(executionTime),
|
||||
"characters_typed": text != nil ? .double(Double(text!.count)) : .null
|
||||
])
|
||||
)
|
||||
|
||||
} catch {
|
||||
logger.error("Type execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to type text: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
|
||||
private func getSession(id: String?) async -> UISession? {
|
||||
if let sessionId = id {
|
||||
return await UISessionManager.shared.getSession(id: sessionId)
|
||||
}
|
||||
|
||||
// Get most recent session
|
||||
// For now, return nil - in a real implementation we'd track the most recent session
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,425 @@
|
||||
import Foundation
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
/// MCP tool for manipulating application windows
|
||||
public struct WindowTool: MCPTool {
|
||||
private let logger = os.Logger(subsystem: "boo.peekaboo.mcp", category: "WindowTool")
|
||||
|
||||
public let name = "window"
|
||||
|
||||
public var description: String {
|
||||
"""
|
||||
Manipulate application windows - close, minimize, maximize, move, resize, and focus.
|
||||
|
||||
Actions:
|
||||
- close: Close a window
|
||||
- minimize: Minimize a window
|
||||
- maximize: Maximize a window
|
||||
- move: Move a window to specific coordinates (requires x, y)
|
||||
- resize: Resize a window to specific dimensions (requires width, height)
|
||||
- set-bounds: Set both position and size (requires x, y, width, height)
|
||||
- focus: Bring a window to the foreground
|
||||
|
||||
Target windows by application name and optionally by window title or index.
|
||||
Supports partial title matching for convenience.
|
||||
|
||||
Examples:
|
||||
- Close Safari window: { "action": "close", "app": "Safari" }
|
||||
- Move window: { "action": "move", "app": "TextEdit", "x": 100, "y": 100 }
|
||||
- Resize window: { "action": "resize", "app": "Terminal", "width": 800, "height": 600 }
|
||||
Peekaboo MCP 3.0.0-beta.2 using anthropic/claude-opus-4-20250514, ollama/llava:latest
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"action": SchemaBuilder.string(
|
||||
description: "The action to perform on the window",
|
||||
enum: ["close", "minimize", "maximize", "move", "resize", "set-bounds", "focus"]
|
||||
),
|
||||
"app": SchemaBuilder.string(
|
||||
description: "Target application name, bundle ID, or process ID"
|
||||
),
|
||||
"title": SchemaBuilder.string(
|
||||
description: "Window title to target (partial matching supported)"
|
||||
),
|
||||
"index": SchemaBuilder.number(
|
||||
description: "Window index (0-based) for multi-window applications"
|
||||
),
|
||||
"x": SchemaBuilder.number(
|
||||
description: "X coordinate for move or set-bounds action"
|
||||
),
|
||||
"y": SchemaBuilder.number(
|
||||
description: "Y coordinate for move or set-bounds action"
|
||||
),
|
||||
"width": SchemaBuilder.number(
|
||||
description: "Width for resize or set-bounds action"
|
||||
),
|
||||
"height": SchemaBuilder.number(
|
||||
description: "Height for resize or set-bounds action"
|
||||
)
|
||||
],
|
||||
required: ["action"]
|
||||
)
|
||||
}
|
||||
|
||||
public init() {}
|
||||
|
||||
@MainActor
|
||||
public func execute(arguments: ToolArguments) async throws -> ToolResponse {
|
||||
guard let action = arguments.getString("action") else {
|
||||
return ToolResponse.error("Missing required parameter: action")
|
||||
}
|
||||
|
||||
let app = arguments.getString("app")
|
||||
let title = arguments.getString("title")
|
||||
let index = arguments.getInt("index")
|
||||
let x = arguments.getNumber("x")
|
||||
let y = arguments.getNumber("y")
|
||||
let width = arguments.getNumber("width")
|
||||
let height = arguments.getNumber("height")
|
||||
|
||||
let windowService = PeekabooServices.shared.windows
|
||||
|
||||
do {
|
||||
let startTime = Date()
|
||||
|
||||
switch action {
|
||||
case "close":
|
||||
return try await handleClose(
|
||||
service: windowService,
|
||||
app: app,
|
||||
title: title,
|
||||
index: index,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "minimize":
|
||||
return try await handleMinimize(
|
||||
service: windowService,
|
||||
app: app,
|
||||
title: title,
|
||||
index: index,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "maximize":
|
||||
return try await handleMaximize(
|
||||
service: windowService,
|
||||
app: app,
|
||||
title: title,
|
||||
index: index,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "move":
|
||||
guard let x = x, let y = y else {
|
||||
return ToolResponse.error("Move action requires both 'x' and 'y' coordinates")
|
||||
}
|
||||
return try await handleMove(
|
||||
service: windowService,
|
||||
app: app,
|
||||
title: title,
|
||||
index: index,
|
||||
x: x,
|
||||
y: y,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "resize":
|
||||
guard let width = width, let height = height else {
|
||||
return ToolResponse.error("Resize action requires both 'width' and 'height' dimensions")
|
||||
}
|
||||
return try await handleResize(
|
||||
service: windowService,
|
||||
app: app,
|
||||
title: title,
|
||||
index: index,
|
||||
width: width,
|
||||
height: height,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "set-bounds":
|
||||
guard let x = x, let y = y, let width = width, let height = height else {
|
||||
return ToolResponse.error("Set-bounds action requires 'x', 'y', 'width', and 'height' parameters")
|
||||
}
|
||||
return try await handleSetBounds(
|
||||
service: windowService,
|
||||
app: app,
|
||||
title: title,
|
||||
index: index,
|
||||
x: x,
|
||||
y: y,
|
||||
width: width,
|
||||
height: height,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
case "focus":
|
||||
return try await handleFocus(
|
||||
service: windowService,
|
||||
app: app,
|
||||
title: title,
|
||||
index: index,
|
||||
startTime: startTime
|
||||
)
|
||||
|
||||
default:
|
||||
return ToolResponse.error("Unknown action: \(action). Supported actions: close, minimize, maximize, move, resize, set-bounds, focus")
|
||||
}
|
||||
|
||||
} catch {
|
||||
logger.error("Window operation execution failed: \(error)")
|
||||
return ToolResponse.error("Failed to \(action) window: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Action Handlers
|
||||
|
||||
private func handleClose(
|
||||
service: WindowManagementServiceProtocol,
|
||||
app: String?,
|
||||
title: String?,
|
||||
index: Int?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let target = try createWindowTarget(app: app, title: title, index: index)
|
||||
|
||||
// Get window info before closing for better reporting
|
||||
let windows = try await service.listWindows(target: target)
|
||||
guard let windowInfo = windows.first else {
|
||||
return ToolResponse.error("No matching window found to close")
|
||||
}
|
||||
|
||||
try await service.closeWindow(target: target)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Closed window '\(windowInfo.title)' in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleMinimize(
|
||||
service: WindowManagementServiceProtocol,
|
||||
app: String?,
|
||||
title: String?,
|
||||
index: Int?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let target = try createWindowTarget(app: app, title: title, index: index)
|
||||
|
||||
// Get window info before minimizing
|
||||
let windows = try await service.listWindows(target: target)
|
||||
guard let windowInfo = windows.first else {
|
||||
return ToolResponse.error("No matching window found to minimize")
|
||||
}
|
||||
|
||||
try await service.minimizeWindow(target: target)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Minimized window '\(windowInfo.title)' in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleMaximize(
|
||||
service: WindowManagementServiceProtocol,
|
||||
app: String?,
|
||||
title: String?,
|
||||
index: Int?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let target = try createWindowTarget(app: app, title: title, index: index)
|
||||
|
||||
// Get window info before maximizing
|
||||
let windows = try await service.listWindows(target: target)
|
||||
guard let windowInfo = windows.first else {
|
||||
return ToolResponse.error("No matching window found to maximize")
|
||||
}
|
||||
|
||||
try await service.maximizeWindow(target: target)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Maximized window '\(windowInfo.title)' in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleMove(
|
||||
service: WindowManagementServiceProtocol,
|
||||
app: String?,
|
||||
title: String?,
|
||||
index: Int?,
|
||||
x: Double,
|
||||
y: Double,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let target = try createWindowTarget(app: app, title: title, index: index)
|
||||
let position = CGPoint(x: x, y: y)
|
||||
|
||||
// Get window info before moving
|
||||
let windows = try await service.listWindows(target: target)
|
||||
guard let windowInfo = windows.first else {
|
||||
return ToolResponse.error("No matching window found to move")
|
||||
}
|
||||
|
||||
try await service.moveWindow(target: target, to: position)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Moved window '\(windowInfo.title)' to (\(Int(x)), \(Int(y))) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_x": .double(x),
|
||||
"new_y": .double(y),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleResize(
|
||||
service: WindowManagementServiceProtocol,
|
||||
app: String?,
|
||||
title: String?,
|
||||
index: Int?,
|
||||
width: Double,
|
||||
height: Double,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let target = try createWindowTarget(app: app, title: title, index: index)
|
||||
let size = CGSize(width: width, height: height)
|
||||
|
||||
// Get window info before resizing
|
||||
let windows = try await service.listWindows(target: target)
|
||||
guard let windowInfo = windows.first else {
|
||||
return ToolResponse.error("No matching window found to resize")
|
||||
}
|
||||
|
||||
try await service.resizeWindow(target: target, to: size)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Resized window '\(windowInfo.title)' to \(Int(width)) × \(Int(height)) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_width": .double(width),
|
||||
"new_height": .double(height),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleSetBounds(
|
||||
service: WindowManagementServiceProtocol,
|
||||
app: String?,
|
||||
title: String?,
|
||||
index: Int?,
|
||||
x: Double,
|
||||
y: Double,
|
||||
width: Double,
|
||||
height: Double,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let target = try createWindowTarget(app: app, title: title, index: index)
|
||||
let bounds = CGRect(x: x, y: y, width: width, height: height)
|
||||
|
||||
// Get window info before setting bounds
|
||||
let windows = try await service.listWindows(target: target)
|
||||
guard let windowInfo = windows.first else {
|
||||
return ToolResponse.error("No matching window found to set bounds")
|
||||
}
|
||||
|
||||
try await service.setWindowBounds(target: target, bounds: bounds)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Set bounds for window '\(windowInfo.title)' to (\(Int(x)), \(Int(y)), \(Int(width)) × \(Int(height))) in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"new_x": .double(x),
|
||||
"new_y": .double(y),
|
||||
"new_width": .double(width),
|
||||
"new_height": .double(height),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
private func handleFocus(
|
||||
service: WindowManagementServiceProtocol,
|
||||
app: String?,
|
||||
title: String?,
|
||||
index: Int?,
|
||||
startTime: Date
|
||||
) async throws -> ToolResponse {
|
||||
let target = try createWindowTarget(app: app, title: title, index: index)
|
||||
|
||||
// Get window info before focusing
|
||||
let windows = try await service.listWindows(target: target)
|
||||
guard let windowInfo = windows.first else {
|
||||
return ToolResponse.error("No matching window found to focus")
|
||||
}
|
||||
|
||||
try await service.focusWindow(target: target)
|
||||
|
||||
let executionTime = Date().timeIntervalSince(startTime)
|
||||
|
||||
return ToolResponse(
|
||||
content: [.text("✅ Focused window '\(windowInfo.title)' in \(String(format: "%.2f", executionTime))s")],
|
||||
meta: .object([
|
||||
"window_title": .string(windowInfo.title),
|
||||
"window_id": .double(Double(windowInfo.windowID)),
|
||||
"execution_time": .double(executionTime)
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Helper Methods
|
||||
|
||||
private func createWindowTarget(app: String?, title: String?, index: Int?) throws -> WindowTarget {
|
||||
if let app = app, let title = title {
|
||||
return .applicationAndTitle(app: app, title: title)
|
||||
}
|
||||
|
||||
if let app = app, let index = index {
|
||||
return .index(app: app, index: index)
|
||||
}
|
||||
|
||||
if let app = app {
|
||||
return .application(app)
|
||||
}
|
||||
|
||||
if let title = title {
|
||||
return .title(title)
|
||||
}
|
||||
|
||||
throw PeekabooError.invalidInput("Must specify at least 'app' or 'title' parameter to target a window")
|
||||
}
|
||||
}
|
||||
@ -61,14 +61,14 @@ public protocol SessionManagerProtocol: Sendable {
|
||||
/// - sessionId: Session identifier
|
||||
/// - elementId: Element ID to retrieve
|
||||
/// - Returns: UI element if found
|
||||
func getElement(sessionId: String, elementId: String) async throws -> UIElement?
|
||||
func getElement(sessionId: String, elementId: String) async throws -> PeekabooCore.UIElement?
|
||||
|
||||
/// Find elements matching a query
|
||||
/// - Parameters:
|
||||
/// - sessionId: Session identifier
|
||||
/// - query: Search query
|
||||
/// - Returns: Array of matching elements
|
||||
func findElements(sessionId: String, matching query: String) async throws -> [UIElement]
|
||||
func findElements(sessionId: String, matching query: String) async throws -> [PeekabooCore.UIElement]
|
||||
|
||||
/// Get the full UI automation session data
|
||||
/// - Parameter sessionId: Session identifier
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
{
|
||||
"root": true,
|
||||
"parser": "@typescript-eslint/parser",
|
||||
"plugins": ["@typescript-eslint"],
|
||||
"extends": [
|
||||
"eslint:recommended",
|
||||
"plugin:@typescript-eslint/recommended"
|
||||
],
|
||||
"parserOptions": {
|
||||
"ecmaVersion": 2022,
|
||||
"sourceType": "module",
|
||||
"project": "./tsconfig.json"
|
||||
},
|
||||
"env": {
|
||||
"node": true,
|
||||
"es2022": true
|
||||
},
|
||||
"ignorePatterns": [
|
||||
"dist/",
|
||||
"node_modules/",
|
||||
"coverage/",
|
||||
"*.js",
|
||||
"scripts/prepare-release.js",
|
||||
"tests/**/*.ts"
|
||||
],
|
||||
"rules": {
|
||||
"@typescript-eslint/no-explicit-any": "warn",
|
||||
"@typescript-eslint/no-unused-vars": ["error", {
|
||||
"argsIgnorePattern": "^_",
|
||||
"varsIgnorePattern": "^_",
|
||||
"caughtErrorsIgnorePattern": "^_"
|
||||
}],
|
||||
"@typescript-eslint/explicit-module-boundary-types": "off",
|
||||
"@typescript-eslint/no-non-null-assertion": "warn",
|
||||
"no-console": "error",
|
||||
"prefer-const": "error",
|
||||
"no-var": "error",
|
||||
"eqeqeq": ["error", "always"],
|
||||
"curly": ["error", "all"],
|
||||
"brace-style": ["error", "1tbs"],
|
||||
"quotes": ["error", "double", { "avoidEscape": true }],
|
||||
"semi": ["error", "always"],
|
||||
"comma-dangle": ["error", "always-multiline"],
|
||||
"no-trailing-spaces": "error",
|
||||
"indent": ["error", 2, { "SwitchCase": 1 }],
|
||||
"max-len": ["warn", { "code": 120, "ignoreUrls": true, "ignoreStrings": true }]
|
||||
}
|
||||
}
|
||||
42
Server/.gitignore
vendored
42
Server/.gitignore
vendored
@ -1,42 +0,0 @@
|
||||
# Node.js
|
||||
node_modules/
|
||||
dist/
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.npm
|
||||
*.tsbuildinfo
|
||||
|
||||
# Testing
|
||||
coverage/
|
||||
.nyc_output/
|
||||
*.lcov
|
||||
|
||||
# IDEs
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
||||
.cache/
|
||||
|
||||
# Build artifacts
|
||||
*.tgz
|
||||
|
||||
# ESLint cache
|
||||
.eslintcache
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
@ -1,26 +0,0 @@
|
||||
{
|
||||
"semi": true,
|
||||
"trailingComma": "es5",
|
||||
"singleQuote": false,
|
||||
"printWidth": 120,
|
||||
"tabWidth": 2,
|
||||
"useTabs": false,
|
||||
"bracketSpacing": true,
|
||||
"arrowParens": "always",
|
||||
"endOfLine": "lf",
|
||||
"plugins": ["@prettier/plugin-oxc"],
|
||||
"overrides": [
|
||||
{
|
||||
"files": "*.js",
|
||||
"options": {
|
||||
"parser": "oxc"
|
||||
}
|
||||
},
|
||||
{
|
||||
"files": "*.ts",
|
||||
"options": {
|
||||
"parser": "oxc-ts"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1,43 +0,0 @@
|
||||
# Peekaboo MCP Server
|
||||
|
||||
This directory contains the Model Context Protocol (MCP) server implementation for Peekaboo, enabling integration with Claude Desktop and Claude Code.
|
||||
|
||||
## What is MCP?
|
||||
|
||||
The Model Context Protocol allows AI assistants like Claude to interact with external tools and services. This MCP server exposes all of Peekaboo's macOS automation capabilities to Claude.
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. **Build the server**:
|
||||
```bash
|
||||
npm install
|
||||
npm run build
|
||||
```
|
||||
|
||||
2. **Configure Claude Desktop or Claude Code**:
|
||||
- See [SETUP_INSTRUCTIONS.md](./SETUP_INSTRUCTIONS.md) for detailed configuration steps
|
||||
|
||||
## Available Tools
|
||||
|
||||
The MCP server exposes 20+ tools for macOS automation:
|
||||
- Screen capture and image analysis
|
||||
- UI element detection and interaction
|
||||
- Application and window management
|
||||
- Keyboard and mouse automation
|
||||
- System dialog interaction
|
||||
- And much more...
|
||||
|
||||
## Development
|
||||
|
||||
- `npm run dev` - Watch mode for TypeScript changes
|
||||
- `npm run inspector` - Test with MCP Inspector
|
||||
- `npm test` - Run tests
|
||||
|
||||
## Requirements
|
||||
|
||||
- macOS 14.0+ (Sonoma)
|
||||
- Node.js 18+
|
||||
- Peekaboo CLI binary (built from parent project)
|
||||
- Screen Recording and Accessibility permissions
|
||||
|
||||
See the main [Peekaboo README](../README.md) for more information about the project.
|
||||
@ -1,189 +0,0 @@
|
||||
# Peekaboo MCP Server Setup Instructions
|
||||
|
||||
The Peekaboo MCP server has been built and is ready to use with Claude Desktop and Claude Code.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- macOS 14.0 (Sonoma) or later
|
||||
- Node.js 18 or later
|
||||
- Screen Recording and Accessibility permissions granted to Terminal/Claude apps
|
||||
|
||||
## Installation Steps
|
||||
|
||||
### For Claude Desktop
|
||||
|
||||
1. **Open Claude Desktop Settings**
|
||||
- Click on Settings from the **menubar** (not the settings button within the app)
|
||||
|
||||
2. **Navigate to Developer Settings**
|
||||
- Click on "Developer" in the left sidebar
|
||||
- Click "Edit Config" button
|
||||
|
||||
3. **Edit the Configuration File**
|
||||
- This opens `claude_desktop_config.json` located at:
|
||||
- macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
||||
- Windows: `%APPDATA%\Claude\claude_desktop_config.json`
|
||||
|
||||
4. **Add the Peekaboo MCP Server Configuration**:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"peekaboo": {
|
||||
"command": "node",
|
||||
"args": ["/Users/steipete/Projects/Peekaboo/Server/dist/index.js"],
|
||||
"env": {
|
||||
"PEEKABOO_AI_PROVIDERS": "anthropic/claude-opus-4",
|
||||
"PEEKABOO_LOG_LEVEL": "info"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
5. **Save and Restart**
|
||||
- Save the configuration file
|
||||
- Completely quit Claude Desktop (Cmd+Q)
|
||||
- Restart Claude Desktop
|
||||
- Look for the MCP server indicator in the bottom-right corner of the conversation input box
|
||||
|
||||
### For Claude Code
|
||||
|
||||
Claude Code uses a CLI-based configuration system. You have two options:
|
||||
|
||||
#### Option 1: Using CLI Commands (Recommended for Simple Setup)
|
||||
|
||||
```bash
|
||||
# Add the Peekaboo MCP server
|
||||
claude mcp add peekaboo node /Users/steipete/Projects/Peekaboo/Server/dist/index.js
|
||||
|
||||
# Or with environment variables using JSON configuration
|
||||
claude mcp add-json peekaboo '{
|
||||
"type": "stdio",
|
||||
"command": "node",
|
||||
"args": ["/Users/steipete/Projects/Peekaboo/Server/dist/index.js"],
|
||||
"env": {
|
||||
"PEEKABOO_AI_PROVIDERS": "anthropic/claude-opus-4",
|
||||
"PEEKABOO_LOG_LEVEL": "info",
|
||||
"PEEKABOO_LOG_FILE": "/Users/steipete/Library/Logs/peekaboo-mcp.log"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
#### Option 2: Direct Configuration File Editing (More Control)
|
||||
|
||||
1. **Locate the Configuration File**
|
||||
- The configuration is stored in `.claude.json` in your home directory or project directory
|
||||
|
||||
2. **Edit the Configuration**:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"peekaboo": {
|
||||
"type": "stdio",
|
||||
"command": "node",
|
||||
"args": ["/Users/steipete/Projects/Peekaboo/Server/dist/index.js"],
|
||||
"env": {
|
||||
"PEEKABOO_AI_PROVIDERS": "anthropic/claude-opus-4",
|
||||
"PEEKABOO_LOG_LEVEL": "info",
|
||||
"PEEKABOO_LOG_FILE": "/Users/steipete/Library/Logs/peekaboo-mcp.log"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. **Restart Claude Code**
|
||||
- Restart Claude Code for changes to take effect
|
||||
|
||||
#### Verify Connection
|
||||
|
||||
In Claude Code, use the `/mcp` command to check server status:
|
||||
```
|
||||
> /mcp
|
||||
⎿ MCP Server Status ⎿
|
||||
⎿ • peekaboo: connected ⎿
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### Environment Variables
|
||||
|
||||
- `PEEKABOO_AI_PROVIDERS`: AI provider configuration (e.g., `anthropic/claude-opus-4`, `openai/gpt-4.1`)
|
||||
- `PEEKABOO_LOG_LEVEL`: Logging level (`debug`, `info`, `warn`, `error`)
|
||||
- `PEEKABOO_LOG_FILE`: Log file location (defaults to `~/Library/Logs/peekaboo-mcp.log`)
|
||||
- `PEEKABOO_USE_MODERN_CAPTURE`: Set to `false` if screen capture hangs
|
||||
|
||||
### API Keys
|
||||
|
||||
Set your API keys as environment variables or in `~/.peekaboo/credentials`:
|
||||
|
||||
```bash
|
||||
# For Anthropic
|
||||
export ANTHROPIC_API_KEY=sk-ant-...
|
||||
|
||||
# For OpenAI
|
||||
export OPENAI_API_KEY=sk-...
|
||||
|
||||
# For Grok/xAI
|
||||
export X_AI_API_KEY=xai-...
|
||||
```
|
||||
|
||||
Or use the Peekaboo CLI to set credentials:
|
||||
|
||||
```bash
|
||||
./peekaboo config set-credential ANTHROPIC_API_KEY sk-ant-...
|
||||
./peekaboo config set-credential OPENAI_API_KEY sk-...
|
||||
```
|
||||
|
||||
## Available Tools
|
||||
|
||||
Once configured, you'll have access to these Peekaboo tools in Claude:
|
||||
|
||||
- **image**: Capture screenshots of screen, windows, or apps
|
||||
- **analyze**: Analyze images with AI vision models
|
||||
- **list**: List running applications and windows
|
||||
- **see**: Capture and analyze UI elements for automation
|
||||
- **click**: Click on UI elements or coordinates
|
||||
- **type**: Type text into UI elements
|
||||
- **scroll**: Scroll content in any direction
|
||||
- **hotkey**: Press keyboard shortcuts
|
||||
- **app**: Control applications (launch, quit, focus)
|
||||
- **window**: Manage windows (move, resize, close)
|
||||
- **menu**: Interact with application menus
|
||||
- **agent**: Execute complex automation tasks with AI
|
||||
- And many more...
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Logs
|
||||
|
||||
Check the MCP server logs at:
|
||||
- `~/Library/Logs/peekaboo-mcp.log`
|
||||
|
||||
### Permissions
|
||||
|
||||
If tools fail with permission errors:
|
||||
1. Open System Settings → Privacy & Security
|
||||
2. Grant Screen Recording permission to Terminal/Claude apps
|
||||
3. Grant Accessibility permission to Terminal/Claude apps
|
||||
|
||||
### Testing
|
||||
|
||||
Test the MCP server directly:
|
||||
```bash
|
||||
cd /Users/steipete/Projects/Peekaboo/Server
|
||||
npm run inspector
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
To make changes to the MCP server:
|
||||
|
||||
1. Edit TypeScript files in `Server/src/`
|
||||
2. Rebuild: `npm run build`
|
||||
3. Test: `npm run inspector`
|
||||
4. Restart Claude Desktop/Code to load changes
|
||||
|
||||
The Peekaboo CLI binary must be present at `Server/peekaboo` for the MCP server to work.
|
||||
@ -1,110 +0,0 @@
|
||||
{
|
||||
"$schema": "https://biomejs.dev/schemas/2.1.3/schema.json",
|
||||
"vcs": {
|
||||
"enabled": true,
|
||||
"clientKind": "git",
|
||||
"useIgnoreFile": true
|
||||
},
|
||||
"files": {
|
||||
"includes": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.js", "src/**/*.jsx"],
|
||||
"ignoreUnknown": false
|
||||
},
|
||||
"formatter": {
|
||||
"enabled": true,
|
||||
"formatWithErrors": false,
|
||||
"indentStyle": "space",
|
||||
"indentWidth": 2,
|
||||
"lineWidth": 120,
|
||||
"lineEnding": "lf"
|
||||
},
|
||||
"linter": {
|
||||
"enabled": true,
|
||||
"rules": {
|
||||
"recommended": true,
|
||||
"complexity": {
|
||||
"noUselessCatch": "error",
|
||||
"noUselessConstructor": "warn",
|
||||
"noUselessRename": "error",
|
||||
"noUselessSwitchCase": "error"
|
||||
},
|
||||
"correctness": {
|
||||
"noConstAssign": "error",
|
||||
"noConstantCondition": "error",
|
||||
"noEmptyCharacterClassInRegex": "error",
|
||||
"noEmptyPattern": "error",
|
||||
"noGlobalObjectCalls": "error",
|
||||
"noInvalidConstructorSuper": "error",
|
||||
"noNonoctalDecimalEscape": "error",
|
||||
"noPrecisionLoss": "error",
|
||||
"noSelfAssign": "error",
|
||||
"noSetterReturn": "error",
|
||||
"noSwitchDeclarations": "error",
|
||||
"noUndeclaredVariables": "error",
|
||||
"noUnreachable": "error",
|
||||
"noUnreachableSuper": "error",
|
||||
"noUnsafeFinally": "error",
|
||||
"noUnsafeOptionalChaining": "error",
|
||||
"noUnusedLabels": "error",
|
||||
"noUnusedVariables": "error",
|
||||
"useIsNan": "error",
|
||||
"useValidForDirection": "error",
|
||||
"useYield": "error"
|
||||
},
|
||||
"style": {
|
||||
"noNonNullAssertion": "warn",
|
||||
"noParameterAssign": "off",
|
||||
"useConst": "error",
|
||||
"useDefaultParameterLast": "error",
|
||||
"useExponentiationOperator": "error",
|
||||
"useNodejsImportProtocol": "off",
|
||||
"useNumberNamespace": "error",
|
||||
"useSingleVarDeclarator": "error"
|
||||
},
|
||||
"suspicious": {
|
||||
"noAsyncPromiseExecutor": "error",
|
||||
"noCatchAssign": "error",
|
||||
"noClassAssign": "error",
|
||||
"noCompareNegZero": "error",
|
||||
"noControlCharactersInRegex": "error",
|
||||
"noDebugger": "error",
|
||||
"noDuplicateCase": "error",
|
||||
"noDuplicateClassMembers": "error",
|
||||
"noDuplicateObjectKeys": "error",
|
||||
"noDuplicateParameters": "error",
|
||||
"noEmptyBlockStatements": "off",
|
||||
"noExplicitAny": "warn",
|
||||
"noExtraNonNullAssertion": "error",
|
||||
"noFallthroughSwitchClause": "error",
|
||||
"noFunctionAssign": "error",
|
||||
"noGlobalAssign": "error",
|
||||
"noImportAssign": "error",
|
||||
"noMisleadingCharacterClass": "error",
|
||||
"noPrototypeBuiltins": "error",
|
||||
"noRedeclare": "error",
|
||||
"noSelfCompare": "error",
|
||||
"noShadowRestrictedNames": "error",
|
||||
"noUnsafeNegation": "error",
|
||||
"useGetterReturn": "error"
|
||||
}
|
||||
}
|
||||
},
|
||||
"javascript": {
|
||||
"formatter": {
|
||||
"quoteStyle": "double",
|
||||
"trailingCommas": "es5",
|
||||
"semicolons": "always",
|
||||
"arrowParentheses": "always",
|
||||
"bracketSameLine": false,
|
||||
"bracketSpacing": true,
|
||||
"quoteProperties": "asNeeded"
|
||||
}
|
||||
},
|
||||
"assist": {
|
||||
"enabled": true,
|
||||
"actions": {
|
||||
"source": {
|
||||
"organizeImports": "on"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
5922
Server/package-lock.json
generated
5922
Server/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,97 +0,0 @@
|
||||
{
|
||||
"name": "@steipete/peekaboo-mcp",
|
||||
"version": "3.0.0-beta.2",
|
||||
"description": "A macOS utility exposed via Node.js MCP server for advanced screen captures, image analysis, and window management",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"bin": {
|
||||
"peekaboo-mcp": "dist/index.js"
|
||||
},
|
||||
"files": [
|
||||
"dist/",
|
||||
"peekaboo",
|
||||
"README.md",
|
||||
"LICENSE"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"build:swift": "../scripts/build-swift-arm.sh",
|
||||
"build:swift:all": "../scripts/build-swift-universal.sh",
|
||||
"build:all": "npm run build:swift:all && npm run build",
|
||||
"start": "node dist/index.js",
|
||||
"prepublishOnly": "npm run build:all",
|
||||
"dev": "tsc --watch",
|
||||
"clean": "rm -rf dist",
|
||||
"test": "PEEKABOO_TEST_MODE=safe vitest run",
|
||||
"test:safe": "PEEKABOO_TEST_MODE=safe vitest run",
|
||||
"test:full": "PEEKABOO_TEST_MODE=full vitest run",
|
||||
"test:watch": "PEEKABOO_TEST_MODE=safe vitest watch",
|
||||
"test:watch:full": "PEEKABOO_TEST_MODE=full vitest watch",
|
||||
"test:coverage": "PEEKABOO_TEST_MODE=safe vitest run --coverage",
|
||||
"test:coverage:full": "PEEKABOO_TEST_MODE=full vitest run --coverage",
|
||||
"test:unit": "PEEKABOO_TEST_MODE=safe vitest run tests/unit",
|
||||
"test:unit:full": "PEEKABOO_TEST_MODE=full vitest run tests/unit",
|
||||
"test:typescript": "SKIP_SWIFT_TESTS=true PEEKABOO_TEST_MODE=safe vitest run",
|
||||
"test:typescript:watch": "SKIP_SWIFT_TESTS=true PEEKABOO_TEST_MODE=safe vitest watch",
|
||||
"test:swift": "cd ../Apps/CLI && swift test --parallel --skip \"LocalIntegrationTests|ScreenshotValidationTests|ApplicationFinderTests|WindowManagerTests\"",
|
||||
"test:integration": "npm run build && npm run test:swift && PEEKABOO_TEST_MODE=safe vitest run",
|
||||
"test:integration:full": "npm run build && npm run test:swift && PEEKABOO_TEST_MODE=full vitest run",
|
||||
"test:all": "npm run test:integration:full",
|
||||
"lint": "biome check src",
|
||||
"lint:fix": "biome check src --write",
|
||||
"lint:biome": "biome check src",
|
||||
"format": "biome format src --write",
|
||||
"format:check": "biome format src",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"check": "npm run lint && npm run typecheck",
|
||||
"check:fix": "npm run lint:fix && npm run typecheck",
|
||||
"lint:swift": "cd ../Apps/CLI && swiftlint",
|
||||
"format:swift": "cd ../Apps/CLI && swiftformat .",
|
||||
"prepare-release": "node ../Scripts/prepare-release.js",
|
||||
"inspector": "npx @modelcontextprotocol/inspector node dist/index.js",
|
||||
"postinstall": "chmod +x dist/index.js 2>/dev/null || true"
|
||||
},
|
||||
"keywords": [
|
||||
"mcp",
|
||||
"screen-capture",
|
||||
"macos",
|
||||
"ai-analysis",
|
||||
"image-analysis",
|
||||
"window-management"
|
||||
],
|
||||
"author": "Peter Steinberger <steipete@gmail.com>",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.15.0",
|
||||
"openai": "^4.0.0",
|
||||
"pino": "^9.7.0",
|
||||
"pino-pretty": "^13.0.0",
|
||||
"zod": "^3.25.28"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "^2.1.3",
|
||||
"@prettier/plugin-oxc": "^0.0.4",
|
||||
"@types/node": "^22.15.21",
|
||||
"@typescript-eslint/eslint-plugin": "^8.19.1",
|
||||
"@typescript-eslint/parser": "^8.19.1",
|
||||
"@vitest/coverage-v8": "^3.1.4",
|
||||
"@vitest/ui": "^3.1.4",
|
||||
"eslint": "^8.57.1",
|
||||
"typescript": "^5.3.0",
|
||||
"vitest": "^3.1.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/steipete/peekaboo.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/steipete/peekaboo/issues"
|
||||
},
|
||||
"homepage": "https://github.com/steipete/peekaboo#readme"
|
||||
}
|
||||
BIN
Server/peekaboo
BIN
Server/peekaboo
Binary file not shown.
@ -1,647 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
||||
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
||||
import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
||||
import fs from "fs/promises";
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
import pino from "pino";
|
||||
import { fileURLToPath } from "url";
|
||||
import type { z } from "zod";
|
||||
import {
|
||||
agentToolHandler,
|
||||
agentToolSchema,
|
||||
analyzeToolHandler,
|
||||
analyzeToolSchema,
|
||||
appToolHandler,
|
||||
appToolSchema,
|
||||
cleanToolHandler,
|
||||
cleanToolSchema,
|
||||
clickToolHandler,
|
||||
clickToolSchema,
|
||||
dialogToolHandler,
|
||||
dialogToolSchema,
|
||||
dockToolHandler,
|
||||
dockToolSchema,
|
||||
dragToolHandler,
|
||||
dragToolSchema,
|
||||
hotkeyToolHandler,
|
||||
hotkeyToolSchema,
|
||||
imageToolHandler,
|
||||
imageToolSchema,
|
||||
listToolHandler,
|
||||
listToolSchema,
|
||||
menuToolHandler,
|
||||
menuToolSchema,
|
||||
moveToolHandler,
|
||||
moveToolSchema,
|
||||
permissionsToolHandler,
|
||||
permissionsToolSchema,
|
||||
runToolHandler,
|
||||
runToolSchema,
|
||||
scrollToolHandler,
|
||||
scrollToolSchema,
|
||||
seeToolHandler,
|
||||
seeToolSchema,
|
||||
sleepToolHandler,
|
||||
sleepToolSchema,
|
||||
spaceToolHandler,
|
||||
spaceToolSchema,
|
||||
swipeToolHandler,
|
||||
swipeToolSchema,
|
||||
typeToolHandler,
|
||||
typeToolSchema,
|
||||
windowToolHandler,
|
||||
windowToolSchema,
|
||||
} from "./tools/index.js";
|
||||
import type { ImageInput, ToolResponse } from "./types/index.js";
|
||||
import { getAIProvidersConfig, setupEnvironmentFromCredentials } from "./utils/config-loader.js";
|
||||
import { initializeSwiftCliPath } from "./utils/peekaboo-cli.js";
|
||||
import { generateServerStatusString } from "./utils/server-status.js";
|
||||
import { zodToJsonSchema } from "./utils/zod-to-json-schema.js";
|
||||
|
||||
// Get package version and determine package root
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename); // This will be dist/
|
||||
const packageRootDir = path.resolve(__dirname, ".."); // Server root for package.json
|
||||
const packageJsonPath = path.join(packageRootDir, "package.json");
|
||||
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, "utf-8"));
|
||||
const SERVER_VERSION = packageJson.version;
|
||||
|
||||
// Initialize the Swift CLI Path once
|
||||
// When installed via npm, the peekaboo binary is in the package root (Server/)
|
||||
initializeSwiftCliPath(packageRootDir);
|
||||
|
||||
// No longer need to track initial status display
|
||||
|
||||
// Initialize logger with fallback support
|
||||
const baseLogLevel = (process.env.PEEKABOO_LOG_LEVEL || "info").toLowerCase();
|
||||
const defaultLogPath = path.join(os.homedir(), "Library/Logs/peekaboo-mcp.log");
|
||||
const fallbackLogPath = path.join(os.tmpdir(), "peekaboo-mcp.log");
|
||||
let logFile = process.env.PEEKABOO_LOG_FILE || defaultLogPath;
|
||||
|
||||
// Test if the log directory is writable
|
||||
const logDir = path.dirname(logFile);
|
||||
try {
|
||||
// Try to create the directory if it doesn't exist
|
||||
await fs.mkdir(logDir, { recursive: true });
|
||||
// Test write access by creating a temp file
|
||||
const testFile = path.join(logDir, `.peekaboo-test-${Date.now()}`);
|
||||
await fs.writeFile(testFile, "test");
|
||||
await fs.unlink(testFile);
|
||||
} catch (_error) {
|
||||
// If we can't write to the configured/default location, fall back to temp directory
|
||||
if (logFile !== fallbackLogPath) {
|
||||
logFile = fallbackLogPath;
|
||||
// We'll log this error after the logger is initialized
|
||||
}
|
||||
}
|
||||
|
||||
const transportTargets = [];
|
||||
|
||||
// Always add file transport
|
||||
transportTargets.push({
|
||||
level: baseLogLevel, // Explicitly set level for this transport
|
||||
target: "pino/file",
|
||||
options: {
|
||||
destination: logFile,
|
||||
mkdir: true, // Ensure the directory exists
|
||||
},
|
||||
});
|
||||
|
||||
// Conditional console logging for development
|
||||
if (process.env.PEEKABOO_CONSOLE_LOGGING === "true") {
|
||||
transportTargets.push({
|
||||
level: baseLogLevel, // Explicitly set level for this transport
|
||||
target: "pino-pretty",
|
||||
options: {
|
||||
destination: 2, // stderr
|
||||
colorize: true,
|
||||
translateTime: "SYS:standard", // More standard time format
|
||||
ignore: "pid,hostname",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const logger = pino(
|
||||
{
|
||||
name: "peekaboo-mcp",
|
||||
level: baseLogLevel, // Overall minimum level
|
||||
},
|
||||
pino.transport({ targets: transportTargets })
|
||||
);
|
||||
|
||||
// Tool context for handlers
|
||||
const toolContext = { logger };
|
||||
|
||||
// Create MCP server using the low-level API
|
||||
const server = new Server(
|
||||
{
|
||||
name: "peekaboo-mcp",
|
||||
version: SERVER_VERSION,
|
||||
},
|
||||
{
|
||||
capabilities: {
|
||||
tools: {},
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
// Set up request handlers
|
||||
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
||||
// Generate server status string to append to tool descriptions
|
||||
const serverStatus = generateServerStatusString(SERVER_VERSION);
|
||||
const statusSuffix = `\n${serverStatus}`;
|
||||
|
||||
return {
|
||||
tools: [
|
||||
{
|
||||
name: "image",
|
||||
title: "Capture and Analyze Screen Content",
|
||||
description: `Captures macOS screen content and optionally analyzes it. \
|
||||
Targets can be entire screen, specific app window, or all windows of an app (via app_target). \
|
||||
Supports foreground/background capture. Output via file path or inline Base64 data (format: "data"). \
|
||||
If a question is provided, image is analyzed by an AI model (auto-selected from PEEKABOO_AI_PROVIDERS). \
|
||||
Window shadows/frames excluded. ${serverStatus}`,
|
||||
inputSchema: zodToJsonSchema(imageToolSchema),
|
||||
},
|
||||
{
|
||||
name: "analyze",
|
||||
title: "Analyze Image with AI",
|
||||
description: `Analyzes a pre-existing image file from the local filesystem using a configured AI model.
|
||||
|
||||
This tool is useful when an image already exists (e.g., previously captured, downloaded, or generated) and you
|
||||
need to understand its content, extract text, or answer specific questions about it.
|
||||
|
||||
Capabilities:
|
||||
- Image Understanding: Provide any question about the image (e.g., "What objects are in this picture?",
|
||||
"Describe the scene.", "Is there a red car?").
|
||||
- Text Extraction (OCR): Ask the AI to extract text from the image (e.g., "What text is visible in this screenshot?").
|
||||
- Flexible AI Configuration: Can use server-default AI providers/models or specify a particular one per call
|
||||
via 'provider_config'.
|
||||
|
||||
Example:
|
||||
If you have an image '/tmp/chart.png' showing a bar chart, you could ask:
|
||||
{ "image_path": "/tmp/chart.png", "question": "Which category has the highest value in this bar chart?" }
|
||||
The AI will analyze the image and attempt to answer your question based on its visual content.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(analyzeToolSchema),
|
||||
},
|
||||
{
|
||||
name: "list",
|
||||
title: "List System Items",
|
||||
description: `Lists various system items on macOS, providing situational awareness.
|
||||
|
||||
Capabilities:
|
||||
- Running Applications: Get a list of all currently running applications (names and bundle IDs).
|
||||
- Application Windows: For a specific application (identified by name or bundle ID), list its open windows.
|
||||
- Details: Optionally include window IDs, bounds (position and size), and whether a window is off-screen.
|
||||
- Multi-window apps: Clearly lists each window of the target app.
|
||||
- Server Status: Provides information about the Peekaboo MCP server itself (version, configured AI providers).
|
||||
|
||||
Use Cases:
|
||||
- Agent needs to know if 'Photoshop' is running before attempting to automate it.
|
||||
{ "item_type": "running_applications" } // Agent checks if 'Photoshop' is in the list.
|
||||
- Agent wants to find a specific 'Notes' window to capture.
|
||||
{ "item_type": "application_windows", "app": "Notes", "include_window_details": ["ids", "bounds"] }
|
||||
The agent can then use the window title or ID with the 'image' tool.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(listToolSchema),
|
||||
},
|
||||
{
|
||||
name: "see",
|
||||
title: "See UI Elements",
|
||||
description: `Captures a screenshot and analyzes UI elements for automation.
|
||||
Returns UI element map with Peekaboo IDs (B1 for buttons, T1 for text fields, etc.)
|
||||
that can be used with interaction commands.
|
||||
Creates or updates a session for tracking UI state across multiple commands.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(seeToolSchema),
|
||||
},
|
||||
{
|
||||
name: "click",
|
||||
title: "Click UI Elements",
|
||||
description: `Clicks on UI elements or coordinates.
|
||||
Supports element queries, specific IDs from see command, or raw coordinates.
|
||||
Includes smart waiting for elements to become actionable.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(clickToolSchema),
|
||||
},
|
||||
{
|
||||
name: "type",
|
||||
title: "Type Text",
|
||||
description: `Types text into UI elements or at current focus.
|
||||
Supports special keys ({return}, {tab}, etc.) and configurable typing speed.
|
||||
Can target specific elements or type at current keyboard focus.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(typeToolSchema),
|
||||
},
|
||||
{
|
||||
name: "scroll",
|
||||
title: "Scroll Content",
|
||||
description: `Scrolls the mouse wheel in any direction.
|
||||
Can target specific elements or scroll at current mouse position.
|
||||
Supports smooth scrolling and configurable speed.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(scrollToolSchema),
|
||||
},
|
||||
{
|
||||
name: "hotkey",
|
||||
title: "Press Keyboard Shortcuts",
|
||||
description: `Presses keyboard shortcuts and key combinations.
|
||||
Simulates pressing multiple keys simultaneously like Cmd+C or Ctrl+Shift+T.
|
||||
Keys are pressed in order and released in reverse order.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(hotkeyToolSchema),
|
||||
},
|
||||
{
|
||||
name: "swipe",
|
||||
title: "Swipe/Drag Gesture",
|
||||
description: `Performs a swipe/drag gesture from one point to another.
|
||||
Useful for dragging elements, swiping through content, or gesture-based interactions.
|
||||
Creates smooth movement with configurable duration.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(swipeToolSchema),
|
||||
},
|
||||
{
|
||||
name: "run",
|
||||
title: "Run Automation Script",
|
||||
description: `Runs a batch script of Peekaboo commands from a .peekaboo.json file.
|
||||
Scripts can automate complex UI workflows by chaining commands.
|
||||
Each command runs sequentially with shared session state.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(runToolSchema),
|
||||
},
|
||||
{
|
||||
name: "sleep",
|
||||
title: "Pause Execution",
|
||||
description: `Pauses execution for a specified duration.
|
||||
Useful for waiting between UI actions or allowing animations to complete.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(sleepToolSchema),
|
||||
},
|
||||
{
|
||||
name: "clean",
|
||||
title: "Clean Session Cache",
|
||||
description: `Cleans up session cache and temporary files.
|
||||
Sessions are stored in ~/.peekaboo/session/<PID>/ directories.
|
||||
Use this to free up disk space and remove orphaned session data.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(cleanToolSchema),
|
||||
},
|
||||
{
|
||||
name: "app",
|
||||
title: "Application Control",
|
||||
description: `Control applications - launch, quit, relaunch, focus, hide, unhide, and switch between apps.
|
||||
|
||||
Actions:
|
||||
- launch: Start an application
|
||||
- quit: Quit an application (with optional force flag)
|
||||
- relaunch: Quit and restart an application (with configurable wait time)
|
||||
- focus/switch: Bring an application to the foreground
|
||||
- hide: Hide an application
|
||||
- unhide: Show a hidden application
|
||||
|
||||
Target applications by name (e.g., "Safari"), bundle ID (e.g., "com.apple.Safari"),
|
||||
or process ID (e.g., "PID:663"). Fuzzy matching is supported for application names.
|
||||
|
||||
Examples:
|
||||
- Launch Safari: { "action": "launch", "name": "Safari" }
|
||||
- Quit TextEdit: { "action": "quit", "name": "TextEdit" }
|
||||
- Relaunch Chrome: { "action": "relaunch", "name": "Google Chrome", "wait": 3 }
|
||||
- Focus Terminal: { "action": "focus", "name": "Terminal" }${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(appToolSchema),
|
||||
},
|
||||
{
|
||||
name: "window",
|
||||
title: "Window Management",
|
||||
description: `Manipulate application windows - close, minimize, maximize, move, resize, and focus.
|
||||
|
||||
Actions:
|
||||
- close: Close a window
|
||||
- minimize: Minimize a window
|
||||
- maximize: Maximize a window
|
||||
- move: Move a window to specific coordinates (requires x, y)
|
||||
- resize: Resize a window to specific dimensions (requires width, height)
|
||||
- focus: Bring a window to the foreground
|
||||
|
||||
Target windows by application name and optionally by window title or index.
|
||||
Supports partial title matching for convenience.
|
||||
|
||||
Examples:
|
||||
- Close Safari window: { "action": "close", "app": "Safari" }
|
||||
- Move window: { "action": "move", "app": "TextEdit", "x": 100, "y": 100 }
|
||||
- Resize window: { "action": "resize", "app": "Terminal", "width": 800, "height": 600 }${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(windowToolSchema),
|
||||
},
|
||||
{
|
||||
name: "menu",
|
||||
title: "Menu Interaction",
|
||||
description: `Interact with application menu bars - list available menus or click menu items.
|
||||
|
||||
Actions:
|
||||
- list: Discover all available menus and menu items for an application
|
||||
- click: Click on a specific menu item using path notation
|
||||
|
||||
Menu paths use ">" separator (e.g., "File > Save As..." or "Edit > Copy").
|
||||
Use plain ellipsis "..." instead of Unicode "…" in menu paths.
|
||||
|
||||
Examples:
|
||||
- List Chrome menus: { "action": "list", "app": "Google Chrome" }
|
||||
- Save document: { "action": "click", "app": "TextEdit", "path": "File > Save" }
|
||||
- Copy selection: { "action": "click", "app": "Safari", "path": "Edit > Copy" }${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(menuToolSchema),
|
||||
},
|
||||
{
|
||||
name: "agent",
|
||||
title: "AI Agent Task Execution",
|
||||
description: `Execute complex automation tasks using an AI agent powered by OpenAI's Assistants API.
|
||||
The agent can understand natural language instructions and break them down into specific
|
||||
Peekaboo commands to accomplish complex workflows.
|
||||
|
||||
Capabilities:
|
||||
- Natural Language Processing: Understands tasks described in plain English
|
||||
- Multi-step Automation: Breaks complex tasks into sequential steps
|
||||
- Visual Feedback: Can take screenshots to verify results
|
||||
- Context Awareness: Maintains session state across multiple actions
|
||||
- Error Recovery: Can adapt and retry when actions fail
|
||||
|
||||
The agent has access to all Peekaboo automation tools including:
|
||||
- Screen capture and analysis
|
||||
- UI element interaction (click, type, scroll)
|
||||
- Application control (launch, quit, focus)
|
||||
- Window management (move, resize, close)
|
||||
- System interaction (hotkeys, shell commands)
|
||||
|
||||
Example tasks:
|
||||
- "Open Safari and navigate to apple.com"
|
||||
- "Take a screenshot of the current window and save it to Desktop"
|
||||
- "Find the login button and click it, then type my credentials"
|
||||
- "Open TextEdit, write 'Hello World', and save the document"
|
||||
|
||||
Requires OPENAI_API_KEY environment variable to be set.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(agentToolSchema),
|
||||
},
|
||||
{
|
||||
name: "permissions",
|
||||
title: "Check System Permissions",
|
||||
description: `Check macOS system permissions required for automation.
|
||||
Verifies both Screen Recording and Accessibility permissions.
|
||||
Returns the current permission status for each required permission.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(permissionsToolSchema),
|
||||
},
|
||||
{
|
||||
name: "move",
|
||||
title: "Move Mouse Cursor",
|
||||
description: `Move the mouse cursor to a specific position or UI element.
|
||||
Supports absolute coordinates, UI element targeting, or centering on screen.
|
||||
Can animate movement smoothly over a specified duration.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(moveToolSchema),
|
||||
},
|
||||
{
|
||||
name: "drag",
|
||||
title: "Drag and Drop",
|
||||
description: `Perform drag and drop operations between UI elements or coordinates.
|
||||
Supports element queries, specific IDs, or raw coordinates for both start and end points.
|
||||
Includes focus options for handling windows in different spaces.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(dragToolSchema),
|
||||
},
|
||||
{
|
||||
name: "dock",
|
||||
title: "Dock Interaction",
|
||||
description: `Interact with the macOS Dock - launch apps, show context menus, hide/show dock.
|
||||
Actions: launch, right-click (with menu selection), hide, show, list
|
||||
Can list all dock items including persistent and running applications.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(dockToolSchema),
|
||||
},
|
||||
{
|
||||
name: "dialog",
|
||||
title: "System Dialog Interaction",
|
||||
description: `Interact with system dialogs and alerts.
|
||||
Actions: click buttons, input text, select files, dismiss dialogs, list open dialogs.
|
||||
Handles save/open dialogs, alerts, and other system prompts.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(dialogToolSchema),
|
||||
},
|
||||
{
|
||||
name: "space",
|
||||
title: "macOS Spaces Management",
|
||||
description: `Manage macOS Spaces (virtual desktops).
|
||||
Actions: list spaces, switch to a specific space, move windows between spaces.
|
||||
Supports moving windows with optional follow behavior to switch along with the window.${statusSuffix}`,
|
||||
inputSchema: zodToJsonSchema(spaceToolSchema),
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
||||
const { name, arguments: args } = request.params;
|
||||
|
||||
logger.debug({ toolName: name, args }, "Tool call received");
|
||||
|
||||
let response: ToolResponse; // To store the raw response from tool handlers
|
||||
|
||||
try {
|
||||
switch (name) {
|
||||
case "image": {
|
||||
// Store original format before validation
|
||||
const originalFormat = (args as Record<string, unknown>)?.format;
|
||||
const validatedArgs = imageToolSchema.parse(args || {});
|
||||
|
||||
// Check if format was corrected
|
||||
if (originalFormat && typeof originalFormat === "string") {
|
||||
const normalizedOriginal = originalFormat.toLowerCase();
|
||||
const validFormats = ["png", "jpg", "jpeg", "data"];
|
||||
if (!validFormats.includes(normalizedOriginal) && validatedArgs.format === "png") {
|
||||
// Format was corrected, add the original format to the validated args
|
||||
(validatedArgs as ImageInput & { _originalFormat?: string })._originalFormat = originalFormat;
|
||||
}
|
||||
}
|
||||
|
||||
response = await imageToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "analyze": {
|
||||
const validatedArgs = analyzeToolSchema.parse(args || {});
|
||||
response = await analyzeToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "list": {
|
||||
const validatedArgs = listToolSchema.parse(args || {});
|
||||
response = await listToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "see": {
|
||||
const validatedArgs = seeToolSchema.parse(args || {});
|
||||
response = await seeToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "click": {
|
||||
const validatedArgs = clickToolSchema.parse(args || {});
|
||||
response = await clickToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "type": {
|
||||
const validatedArgs = typeToolSchema.parse(args || {});
|
||||
response = await typeToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "scroll": {
|
||||
const validatedArgs = scrollToolSchema.parse(args || {});
|
||||
response = await scrollToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "hotkey": {
|
||||
const validatedArgs = hotkeyToolSchema.parse(args || {});
|
||||
response = await hotkeyToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "swipe": {
|
||||
const validatedArgs = swipeToolSchema.parse(args || {});
|
||||
response = await swipeToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "run": {
|
||||
const validatedArgs = runToolSchema.parse(args || {});
|
||||
response = await runToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "sleep": {
|
||||
const validatedArgs = sleepToolSchema.parse(args || {});
|
||||
response = await sleepToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "clean": {
|
||||
const validatedArgs = cleanToolSchema.parse(args || {});
|
||||
response = await cleanToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "agent": {
|
||||
const validatedArgs = agentToolSchema.parse(args || {});
|
||||
response = await agentToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "app": {
|
||||
const validatedArgs = appToolSchema.parse(args || {});
|
||||
response = await appToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "window": {
|
||||
const validatedArgs = windowToolSchema.parse(args || {});
|
||||
response = await windowToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "menu": {
|
||||
const validatedArgs = menuToolSchema.parse(args || {});
|
||||
response = await menuToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "permissions": {
|
||||
const validatedArgs = permissionsToolSchema.parse(args || {});
|
||||
response = await permissionsToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "move": {
|
||||
const validatedArgs = moveToolSchema.parse(args || {});
|
||||
response = await moveToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "drag": {
|
||||
const validatedArgs = dragToolSchema.parse(args || {});
|
||||
response = await dragToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "dock": {
|
||||
const validatedArgs = dockToolSchema.parse(args || {});
|
||||
response = await dockToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "dialog": {
|
||||
const validatedArgs = dialogToolSchema.parse(args || {});
|
||||
response = await dialogToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
case "space": {
|
||||
const validatedArgs = spaceToolSchema.parse(args || {});
|
||||
response = await spaceToolHandler(validatedArgs, toolContext);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
response = {
|
||||
content: [{ type: "text", text: `Unknown tool: ${name}` }],
|
||||
isError: true,
|
||||
};
|
||||
logger.error(`Unknown tool: ${name}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
} catch (error) {
|
||||
logger.error({ error, toolName: name }, "Tool execution failed");
|
||||
|
||||
// If it's a Zod validation error, return a more helpful message
|
||||
if (error && typeof error === "object" && "issues" in error) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Invalid arguments: ${(error as z.ZodError).issues.map((issue) => issue.message).join(", ")}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
} as ToolResponse;
|
||||
}
|
||||
|
||||
// For any other error, return a proper error response instead of throwing
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
} as ToolResponse;
|
||||
}
|
||||
});
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
// Load credentials and config before starting the server
|
||||
await setupEnvironmentFromCredentials(logger);
|
||||
|
||||
// Set up AI providers from config if not already in environment
|
||||
const aiProviders = await getAIProvidersConfig(logger);
|
||||
if (aiProviders && !process.env.PEEKABOO_AI_PROVIDERS) {
|
||||
process.env.PEEKABOO_AI_PROVIDERS = aiProviders;
|
||||
logger.info({ providers: aiProviders }, "Loaded AI providers from config file");
|
||||
}
|
||||
|
||||
// Create transport and connect
|
||||
const transport = new StdioServerTransport();
|
||||
await server.connect(transport);
|
||||
|
||||
logger.info("Peekaboo MCP Server started successfully");
|
||||
logger.info(`🔥 Hot-reload test: Server restarted at ${new Date().toISOString()}`);
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Failed to start server");
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle graceful shutdown
|
||||
process.on("SIGTERM", async () => {
|
||||
logger.info("SIGTERM received, shutting down gracefully");
|
||||
try {
|
||||
await server.close();
|
||||
logger.flush();
|
||||
} catch (e) {
|
||||
logger.error({ error: e }, "Error during server close on SIGTERM");
|
||||
}
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
process.on("SIGINT", async () => {
|
||||
logger.info("SIGINT received, shutting down gracefully");
|
||||
try {
|
||||
await server.close();
|
||||
logger.flush();
|
||||
} catch (e) {
|
||||
logger.error({ error: e }, "Error during server close on SIGINT");
|
||||
}
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
main().catch((error) => {
|
||||
logger.error({ error }, "Fatal error in main");
|
||||
process.exit(1);
|
||||
});
|
||||
@ -1,274 +0,0 @@
|
||||
import type { Logger } from "pino";
|
||||
import { z } from "zod";
|
||||
import type {
|
||||
AgentErrorResponse,
|
||||
AgentSession,
|
||||
AgentStep,
|
||||
AgentSuccessResponse,
|
||||
ToolResponse,
|
||||
} from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Zod schema for agent tool
|
||||
export const agentToolSchema = z.object({
|
||||
task: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Natural language description of the task to perform (optional when listing sessions)"),
|
||||
verbose: z.boolean().optional().describe("Enable verbose output with full JSON debug information"),
|
||||
quiet: z.boolean().optional().describe("Quiet mode - only show final result"),
|
||||
dry_run: z.boolean().optional().describe("Dry run - show planned steps without executing"),
|
||||
max_steps: z.number().int().positive().optional().describe("Maximum number of steps the agent can take"),
|
||||
model: z.string().optional().describe("OpenAI model to use (e.g., gpt-4-turbo, gpt-4o)"),
|
||||
resume: z.boolean().optional().describe("Resume the most recent session"),
|
||||
resumeSession: z.string().optional().describe("Resume a specific session by ID"),
|
||||
listSessions: z.boolean().optional().describe("List available sessions"),
|
||||
noCache: z.boolean().optional().describe("Disable session caching (always create new session)"),
|
||||
});
|
||||
|
||||
export type AgentInput = z.infer<typeof agentToolSchema>;
|
||||
|
||||
export async function agentToolHandler(input: AgentInput, context: { logger: Logger }): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Agent tool called");
|
||||
|
||||
// Check for OpenAI API key (not needed for list-sessions)
|
||||
if (!input.listSessions && !process.env.OPENAI_API_KEY && !process.env.ANTHROPIC_API_KEY) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "Agent command requires OPENAI_API_KEY or ANTHROPIC_API_KEY environment variable to be set. Please configure your API key to use the agent functionality.",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Validate input
|
||||
if (!input.listSessions && !input.task) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Task description is required unless using --list-sessions",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Build command arguments
|
||||
const args = ["agent"];
|
||||
|
||||
if (input.task) {
|
||||
args.push(input.task);
|
||||
}
|
||||
|
||||
if (input.verbose) {
|
||||
args.push("--verbose");
|
||||
}
|
||||
|
||||
if (input.quiet) {
|
||||
args.push("--quiet");
|
||||
}
|
||||
|
||||
if (input.dry_run) {
|
||||
args.push("--dry-run");
|
||||
}
|
||||
|
||||
if (input.max_steps !== undefined) {
|
||||
args.push("--max-steps", input.max_steps.toString());
|
||||
}
|
||||
|
||||
if (input.model) {
|
||||
args.push("--model", input.model);
|
||||
}
|
||||
|
||||
if (input.resume) {
|
||||
args.push("--resume");
|
||||
}
|
||||
|
||||
if (input.resumeSession) {
|
||||
args.push("--resume-session", input.resumeSession);
|
||||
}
|
||||
|
||||
if (input.listSessions) {
|
||||
args.push("--list-sessions");
|
||||
}
|
||||
|
||||
if (input.noCache) {
|
||||
args.push("--no-cache");
|
||||
}
|
||||
|
||||
// Always use JSON output for MCP integration
|
||||
args.push("--json-output");
|
||||
|
||||
logger.debug({ args }, "Executing agent command");
|
||||
|
||||
const result = await executeSwiftCli(args, logger, {
|
||||
timeout: 300000, // 5 minute timeout for agent tasks
|
||||
});
|
||||
|
||||
logger.debug({ result }, "Agent command completed");
|
||||
|
||||
// Handle Swift CLI response
|
||||
if (!result.success) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Agent command failed: ${result.error?.message || "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// For agent command, the response should already be structured JSON in the data field
|
||||
let parsedResult = result.data;
|
||||
|
||||
// If data is a string, try to parse it as JSON
|
||||
if (typeof result.data === "string") {
|
||||
try {
|
||||
parsedResult = JSON.parse(result.data);
|
||||
} catch (parseError) {
|
||||
// If JSON parsing fails, return the raw output
|
||||
logger.warn({ parseError, data: result.data }, "Failed to parse agent JSON output");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Agent task completed. Output: ${result.data}`,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Handle successful agent execution
|
||||
if (parsedResult && typeof parsedResult === "object" && "success" in parsedResult) {
|
||||
const agentResponse = parsedResult as AgentSuccessResponse | AgentErrorResponse;
|
||||
|
||||
if (agentResponse.success && agentResponse.data) {
|
||||
const agentData = agentResponse.data;
|
||||
let responseText = "";
|
||||
|
||||
// Handle list-sessions response
|
||||
if (input.listSessions && agentData.sessions && Array.isArray(agentData.sessions)) {
|
||||
responseText = "✅ Available sessions:\n";
|
||||
if (agentData.sessions.length === 0) {
|
||||
responseText += "\nNo sessions found.";
|
||||
} else {
|
||||
agentData.sessions.forEach((session: AgentSession) => {
|
||||
responseText += `\n📌 Session: ${session.id}`;
|
||||
if (session.task) {
|
||||
responseText += `\n Task: ${session.task}`;
|
||||
}
|
||||
if (session.created) {
|
||||
responseText += `\n Created: ${new Date(session.created).toLocaleString()}`;
|
||||
}
|
||||
if (session.messageCount !== undefined) {
|
||||
responseText += `\n Messages: ${session.messageCount}`;
|
||||
}
|
||||
responseText += "\n";
|
||||
});
|
||||
}
|
||||
} else if (agentData.summary) {
|
||||
// Format the response based on agent output
|
||||
responseText = `✅ Agent Task Completed\n\n${agentData.summary}`;
|
||||
} else {
|
||||
responseText = "✅ Agent task completed successfully";
|
||||
}
|
||||
|
||||
// Add steps information if available and verbose
|
||||
if (input.verbose && agentData.steps && Array.isArray(agentData.steps)) {
|
||||
responseText += `\n\nSteps executed (${agentData.steps.length}):`;
|
||||
agentData.steps.forEach((step: AgentStep, index: number) => {
|
||||
responseText += `\n${index + 1}. ${step.description || step.command || "Unknown step"}`;
|
||||
if (step.output && step.output.length < 100) {
|
||||
responseText += ` → ${step.output}`;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
|
||||
// Handle agent errors
|
||||
if (!agentResponse.success && "error" in agentResponse && agentResponse.error) {
|
||||
const errorMessage = agentResponse.error.message || "Agent execution failed";
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Agent Error: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for unexpected response format
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Agent execution completed with unexpected response format: ${JSON.stringify(parsedResult)}`,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error, input }, "Agent tool execution failed");
|
||||
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
|
||||
// Check for specific error types
|
||||
if (errorMessage.includes("OPENAI_API_KEY")) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ OpenAI API key missing or invalid. Please set the OPENAI_API_KEY environment variable.",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (errorMessage.includes("timeout")) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Agent task timed out. The task may be too complex or the system may be unresponsive.",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Agent execution failed: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,195 +0,0 @@
|
||||
import path from "path";
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { analyzeImageWithProvider, determineProviderAndModel, parseAIProviders } from "../utils/ai-providers.js";
|
||||
import { getAIProvidersConfig } from "../utils/config-loader.js";
|
||||
import { readImageAsBase64 } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const analyzeToolSchema = z
|
||||
.object({
|
||||
image_path: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Required. Absolute path to image file (.png, .jpg, .webp) to be analyzed."),
|
||||
question: z.string().describe("Required. Question for the AI about the image."),
|
||||
provider_config: z
|
||||
.object({
|
||||
type: z
|
||||
.enum(["auto", "ollama", "openai"])
|
||||
.default("auto")
|
||||
.describe(
|
||||
"AI provider, default: auto. 'auto' uses server's PEEKABOO_AI_PROVIDERS environment preference. Specific provider must be enabled in server's PEEKABOO_AI_PROVIDERS."
|
||||
),
|
||||
model: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
"Optional. Model name. If omitted, uses model from server's PEEKABOO_AI_PROVIDERS for chosen provider, or an internal default for that provider."
|
||||
),
|
||||
})
|
||||
.optional()
|
||||
.describe("Optional. Explicit provider/model. Validated against server's PEEKABOO_AI_PROVIDERS."),
|
||||
})
|
||||
.passthrough() // Allow unknown properties (for the hidden `path` parameter)
|
||||
.refine(
|
||||
(data: unknown) => {
|
||||
const typedData = data as { image_path?: string; path?: string };
|
||||
return typedData.image_path || typedData.path;
|
||||
},
|
||||
{
|
||||
message: "image_path is required",
|
||||
path: ["image_path"],
|
||||
}
|
||||
);
|
||||
|
||||
export type AnalyzeToolInput = z.infer<typeof analyzeToolSchema> & {
|
||||
path?: string; // Hidden parameter for backward compatibility
|
||||
};
|
||||
|
||||
export async function analyzeToolHandler(input: AnalyzeToolInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
// Determine the effective image path (prioritize image_path, fallback to path)
|
||||
const effectiveImagePath = input.image_path || input.path || "";
|
||||
|
||||
logger.debug(
|
||||
{ input: { ...input, effectiveImagePath: effectiveImagePath.split("/").pop() } },
|
||||
"Processing peekaboo.analyze tool call"
|
||||
);
|
||||
|
||||
// Validate image file extension
|
||||
const ext = path.extname(effectiveImagePath).toLowerCase();
|
||||
if (![".png", ".jpg", ".jpeg", ".webp"].includes(ext)) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Unsupported image format: ${ext}. Supported formats: .png, .jpg, .jpeg, .webp`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Check AI providers configuration
|
||||
const aiProvidersEnv = await getAIProvidersConfig(logger);
|
||||
if (!aiProvidersEnv || !aiProvidersEnv.trim()) {
|
||||
logger.error("PEEKABOO_AI_PROVIDERS not configured in environment or config file");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: "AI analysis not configured on this server. Set the PEEKABOO_AI_PROVIDERS environment variable or configure it in ~/.peekaboo/config.json",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse configured providers
|
||||
const configuredProviders = parseAIProviders(aiProvidersEnv);
|
||||
if (configuredProviders.length === 0) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: "No valid AI providers found in PEEKABOO_AI_PROVIDERS configuration.",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Determine provider and model
|
||||
const { provider, model } = await determineProviderAndModel(input.provider_config, configuredProviders, logger);
|
||||
|
||||
if (!provider) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: "No configured AI providers are currently operational.",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Read image as base64
|
||||
let imageBase64: string;
|
||||
try {
|
||||
imageBase64 = await readImageAsBase64(effectiveImagePath);
|
||||
} catch (error) {
|
||||
logger.error({ error, path: effectiveImagePath }, "Failed to read image file");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Failed to read image file: ${error instanceof Error ? error.message : "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Analyze image
|
||||
let analysisResult: string;
|
||||
const startTime = Date.now(); // Record start time
|
||||
try {
|
||||
analysisResult = await analyzeImageWithProvider(
|
||||
{ provider, model },
|
||||
effectiveImagePath,
|
||||
imageBase64,
|
||||
input.question,
|
||||
logger
|
||||
);
|
||||
} catch (error) {
|
||||
logger.error({ error, provider, model }, "AI analysis failed");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `AI analysis failed: ${error instanceof Error ? error.message : "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
_meta: {
|
||||
backend_error_code: "AI_PROVIDER_ERROR",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const endTime = Date.now(); // Record end time
|
||||
const durationMs = endTime - startTime;
|
||||
const durationSeconds = (durationMs / 1000).toFixed(2);
|
||||
|
||||
const analysisTimeMessage = `👻 Peekaboo: Analyzed image with ${provider}/${model} in ${durationSeconds}s.`;
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: analysisResult,
|
||||
},
|
||||
{
|
||||
type: "text" as const,
|
||||
text: analysisTimeMessage, // Add the timing message
|
||||
},
|
||||
],
|
||||
analysis_text: analysisResult,
|
||||
model_used: `${provider}/${model}`,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Unexpected error in analyze tool handler");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Unexpected error: ${error instanceof Error ? error.message : "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,307 +0,0 @@
|
||||
import type { Logger } from "pino";
|
||||
import { z } from "zod";
|
||||
import type { AppInfo, AppResponseData, AppSuccessResponse, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Zod schema for app tool
|
||||
export const appToolSchema = z.object({
|
||||
action: z
|
||||
.enum(["launch", "quit", "relaunch", "focus", "hide", "unhide", "switch", "list"])
|
||||
.describe("The action to perform on the application"),
|
||||
name: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Application name, bundle ID, or process ID (e.g., 'Safari', 'com.apple.Safari', 'PID:663')"),
|
||||
bundleId: z.string().optional().describe("Launch by bundle identifier instead of name (for 'launch' action)"),
|
||||
waitUntilReady: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe("Wait for the application to be ready (for 'launch' and 'relaunch' actions)"),
|
||||
force: z.boolean().optional().describe("Force quit the application (for 'quit' and 'relaunch' actions)"),
|
||||
all: z.boolean().optional().describe("Quit all applications (for 'quit' action)"),
|
||||
except: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Comma-separated list of apps to exclude when using --all (for 'quit' action)"),
|
||||
to: z.string().optional().describe("Application to switch to (for 'switch' action)"),
|
||||
cycle: z.boolean().optional().describe("Cycle to next application like Cmd+Tab (for 'switch' action)"),
|
||||
wait: z
|
||||
.number()
|
||||
.optional()
|
||||
.describe("Wait time in seconds between quit and launch (for 'relaunch' action, default: 2)"),
|
||||
});
|
||||
|
||||
export type AppInput = z.infer<typeof appToolSchema>;
|
||||
|
||||
export async function appToolHandler(input: AppInput, context: { logger: Logger }): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "App tool called");
|
||||
|
||||
// Validate input based on action
|
||||
if (input.action === "launch" && !input.name && !input.bundleId) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Launch action requires either 'name' or 'bundleId' parameter",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (input.action === "switch" && !input.to && !input.cycle) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Switch action requires either 'to' parameter or 'cycle' flag",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
(input.action === "quit" || input.action === "focus" || input.action === "hide" || input.action === "unhide") &&
|
||||
!input.name &&
|
||||
!input.all
|
||||
) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ ${input.action} action requires 'name' parameter${input.action === "quit" ? " or 'all' flag" : ""}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Build command arguments
|
||||
const args = ["app", input.action];
|
||||
|
||||
if (input.name) {
|
||||
args.push(input.name);
|
||||
}
|
||||
|
||||
if (input.bundleId && input.action === "launch") {
|
||||
args.push("--bundle-id", input.bundleId);
|
||||
}
|
||||
|
||||
if (input.waitUntilReady && input.action === "launch") {
|
||||
args.push("--wait-until-ready");
|
||||
}
|
||||
|
||||
if (input.force && input.action === "quit") {
|
||||
args.push("--force");
|
||||
}
|
||||
|
||||
if (input.all && input.action === "quit") {
|
||||
args.push("--all");
|
||||
}
|
||||
|
||||
if (input.except && input.action === "quit") {
|
||||
args.push("--except", input.except);
|
||||
}
|
||||
|
||||
if (input.to && input.action === "switch") {
|
||||
args.push("--to", input.to);
|
||||
}
|
||||
|
||||
if (input.cycle && input.action === "switch") {
|
||||
args.push("--cycle");
|
||||
}
|
||||
|
||||
logger.debug({ args }, "Executing app command");
|
||||
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
logger.debug({ result }, "App command completed");
|
||||
|
||||
// Handle Swift CLI response
|
||||
if (!result.success) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ App command failed: ${result.error?.message || "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse the response data
|
||||
let responseData = result.data;
|
||||
if (typeof result.data === "string") {
|
||||
try {
|
||||
responseData = JSON.parse(result.data);
|
||||
} catch (parseError) {
|
||||
logger.warn({ parseError, data: result.data }, "Failed to parse app command JSON output");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `App ${input.action} completed. Output: ${result.data}`,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Handle successful app command - the response format can vary
|
||||
if (responseData && typeof responseData === "object") {
|
||||
let appData = responseData as AppResponseData | AppSuccessResponse;
|
||||
|
||||
// Check if it's wrapped in success/data structure
|
||||
if ("success" in appData && appData.success && appData.data) {
|
||||
appData = appData.data;
|
||||
} else {
|
||||
appData = appData as AppResponseData;
|
||||
}
|
||||
|
||||
// Check for direct response format (which seems to be what we're getting)
|
||||
if (appData.action || appData.app || appData.pid) {
|
||||
let responseText = "";
|
||||
|
||||
// Format the response based on action
|
||||
switch (input.action) {
|
||||
case "launch":
|
||||
responseText = `✅ Application '${input.bundleId || input.name}' launched successfully`;
|
||||
if (appData.pid) {
|
||||
responseText += `\nProcess ID: ${appData.pid}`;
|
||||
}
|
||||
if (appData.window_count !== undefined) {
|
||||
responseText += `\nWindow count: ${appData.window_count}`;
|
||||
}
|
||||
if (appData.activated !== undefined) {
|
||||
responseText += `\nActive: ${appData.activated ? "Yes" : "No"}`;
|
||||
}
|
||||
if (appData.bundle_id) {
|
||||
responseText += `\nBundle ID: ${appData.bundle_id}`;
|
||||
}
|
||||
break;
|
||||
|
||||
case "quit":
|
||||
if (input.all) {
|
||||
responseText = `✅ All applications quit successfully`;
|
||||
if (input.except) {
|
||||
responseText += ` (except: ${input.except})`;
|
||||
}
|
||||
} else {
|
||||
responseText = `✅ Application '${input.name}' quit successfully`;
|
||||
}
|
||||
break;
|
||||
|
||||
case "focus":
|
||||
responseText = `✅ Application '${input.name}' focused successfully`;
|
||||
break;
|
||||
|
||||
case "switch":
|
||||
if (input.cycle) {
|
||||
responseText = `✅ Cycled to next application`;
|
||||
} else if (input.to) {
|
||||
responseText = `✅ Switched to application '${input.to}'`;
|
||||
} else {
|
||||
responseText = `✅ Application switch completed`;
|
||||
}
|
||||
break;
|
||||
|
||||
case "hide":
|
||||
responseText = `✅ Application '${input.name}' hidden successfully`;
|
||||
break;
|
||||
|
||||
case "unhide":
|
||||
responseText = `✅ Application '${input.name}' unhidden successfully`;
|
||||
break;
|
||||
|
||||
case "list":
|
||||
responseText = "✅ Running applications:\n";
|
||||
if (appData.applications && Array.isArray(appData.applications)) {
|
||||
appData.applications.forEach((app: AppInfo) => {
|
||||
responseText += `\n• ${app.name || app.localizedName}`;
|
||||
if (app.bundleIdentifier) {
|
||||
responseText += ` (${app.bundleIdentifier})`;
|
||||
}
|
||||
if (app.processIdentifier) {
|
||||
responseText += ` - PID: ${app.processIdentifier}`;
|
||||
}
|
||||
if (app.isActive) {
|
||||
responseText += " [Active]";
|
||||
}
|
||||
if (app.isHidden) {
|
||||
responseText += " [Hidden]";
|
||||
}
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
responseText = `✅ App ${input.action} completed successfully`;
|
||||
}
|
||||
|
||||
if (appData.note) {
|
||||
responseText += `\n${appData.note}`;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
|
||||
// Handle app command errors
|
||||
if ("error" in appData && appData.error) {
|
||||
const errorMessage =
|
||||
typeof appData.error === "string"
|
||||
? appData.error
|
||||
: typeof appData.error === "object" && appData.error !== null && "message" in appData.error
|
||||
? String((appData.error as { message: unknown }).message)
|
||||
: "App command failed";
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ App Error: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for unexpected response format
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `App ${input.action} completed with unexpected response format: ${JSON.stringify(responseData)}`,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error, input }, "App tool execution failed");
|
||||
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ App ${input.action} failed: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,151 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const cleanToolSchema = z
|
||||
.object({
|
||||
all_sessions: z.boolean().optional().describe("Optional. Remove all session data."),
|
||||
older_than: z.number().optional().describe("Optional. Remove sessions older than specified hours."),
|
||||
session: z.string().optional().describe("Optional. Remove specific session by ID."),
|
||||
dry_run: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe("Optional. Show what would be deleted without actually deleting."),
|
||||
})
|
||||
.refine((data) => {
|
||||
const options = [data.all_sessions, data.older_than !== undefined, data.session !== undefined];
|
||||
return options.filter(Boolean).length === 1;
|
||||
}, "Specify exactly one of: all_sessions, older_than, or session")
|
||||
.describe(
|
||||
"Cleans up session cache and temporary files. " +
|
||||
"Sessions are stored in ~/.peekaboo/session/<PID>/ directories. " +
|
||||
"Use this to free up disk space and remove orphaned session data."
|
||||
);
|
||||
|
||||
interface CleanResult {
|
||||
sessions_removed: number;
|
||||
bytes_freed: number;
|
||||
session_details: Array<{
|
||||
session_id: string;
|
||||
path: string;
|
||||
size: number;
|
||||
creation_date?: string;
|
||||
}>;
|
||||
execution_time: number;
|
||||
success: boolean;
|
||||
}
|
||||
|
||||
export type CleanInput = z.infer<typeof cleanToolSchema>;
|
||||
|
||||
export async function cleanToolHandler(input: CleanInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.clean tool call");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["clean"];
|
||||
|
||||
if (input.all_sessions) {
|
||||
args.push("--all-sessions");
|
||||
} else if (input.older_than !== undefined) {
|
||||
args.push("--older-than", input.older_than.toString());
|
||||
} else if (input.session) {
|
||||
args.push("--session", input.session);
|
||||
}
|
||||
|
||||
if (input.dry_run) {
|
||||
args.push("--dry-run");
|
||||
}
|
||||
|
||||
logger.debug({ args }, "Executing clean command with args");
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
const errorMessage = result.error?.message || "Clean command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to clean sessions: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const cleanData = result.data as CleanResult;
|
||||
|
||||
// Build response text
|
||||
const lines: string[] = [];
|
||||
|
||||
if (input.dry_run) {
|
||||
lines.push("🔍 Dry run mode - no files were deleted");
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
const sessionsRemoved = cleanData.sessions_removed ?? 0;
|
||||
|
||||
if (sessionsRemoved === 0) {
|
||||
lines.push("✅ No sessions to clean");
|
||||
} else {
|
||||
const action = input.dry_run ? "Would remove" : "Removed";
|
||||
lines.push(`🗑️ ${action} ${sessionsRemoved} session${sessionsRemoved === 1 ? "" : "s"}`);
|
||||
lines.push(`💾 Space ${input.dry_run ? "to be freed" : "freed"}: ${formatBytes(cleanData.bytes_freed)}`);
|
||||
|
||||
if (cleanData.session_details && cleanData.session_details.length > 0 && cleanData.session_details.length <= 5) {
|
||||
lines.push("\nSessions:");
|
||||
for (const session of cleanData.session_details) {
|
||||
lines.push(` - ${session.session_id} (${formatBytes(session.size)})`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cleanData.execution_time !== undefined) {
|
||||
lines.push(`\n⏱️ Completed in ${cleanData.execution_time.toFixed(2)}s`);
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: lines.join("\n"),
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Clean tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function formatBytes(bytes: number | undefined): string {
|
||||
if (bytes === undefined || bytes === null) {
|
||||
return "0.0 B";
|
||||
}
|
||||
|
||||
const units = ["B", "KB", "MB", "GB"];
|
||||
let size = bytes;
|
||||
let unitIndex = 0;
|
||||
|
||||
while (size >= 1024 && unitIndex < units.length - 1) {
|
||||
size /= 1024;
|
||||
unitIndex++;
|
||||
}
|
||||
|
||||
return `${size.toFixed(1)} ${units[unitIndex]}`;
|
||||
}
|
||||
@ -1,157 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const clickToolSchema = z
|
||||
.object({
|
||||
query: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional. Element text or query to click. Will search for matching elements."),
|
||||
on: z.string().optional().describe("Optional. Element ID to click (e.g., B1, T2) from see command output."),
|
||||
coords: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional. Click at specific coordinates in format 'x,y' (e.g., '100,200')."),
|
||||
session: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional. Session ID from see command. Uses latest session if not specified."),
|
||||
wait_for: z
|
||||
.number()
|
||||
.optional()
|
||||
.default(5000)
|
||||
.describe("Optional. Maximum milliseconds to wait for element to become actionable. Default: 5000."),
|
||||
double: z.boolean().optional().default(false).describe("Optional. Double-click instead of single click."),
|
||||
right: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe("Optional. Right-click (secondary click) instead of left-click."),
|
||||
})
|
||||
.refine((data) => data.query || data.on || data.coords, "Must specify either 'query', 'on', or 'coords'")
|
||||
.describe(
|
||||
"Clicks on UI elements or coordinates. " +
|
||||
"Supports element queries, specific IDs from see command, or raw coordinates. " +
|
||||
"Includes smart waiting for elements to become actionable. " +
|
||||
"Works with sessions created by the see command."
|
||||
);
|
||||
|
||||
interface ClickResult {
|
||||
success: boolean;
|
||||
clicked_element?: string;
|
||||
click_location: {
|
||||
x: number;
|
||||
y: number;
|
||||
};
|
||||
wait_time?: number;
|
||||
execution_time: number;
|
||||
}
|
||||
|
||||
export type ClickInput = z.infer<typeof clickToolSchema>;
|
||||
|
||||
export async function clickToolHandler(input: ClickInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.click tool call");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["click"];
|
||||
|
||||
// Click target
|
||||
if (input.query) {
|
||||
args.push(input.query);
|
||||
}
|
||||
|
||||
if (input.on) {
|
||||
args.push("--on", input.on);
|
||||
}
|
||||
|
||||
if (input.coords) {
|
||||
args.push("--coords", input.coords);
|
||||
}
|
||||
|
||||
// Session
|
||||
if (input.session) {
|
||||
args.push("--session", input.session);
|
||||
}
|
||||
|
||||
// Wait timeout
|
||||
args.push("--wait-for", (input.wait_for ?? 5000).toString());
|
||||
|
||||
// Click type
|
||||
if (input.double) {
|
||||
args.push("--double");
|
||||
}
|
||||
|
||||
if (input.right) {
|
||||
args.push("--right");
|
||||
}
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
const errorMessage = result.error?.message || "Click command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to perform click: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const clickData = result.data as ClickResult;
|
||||
|
||||
// Build response text
|
||||
const lines: string[] = [];
|
||||
lines.push("✅ Click successful");
|
||||
|
||||
if (clickData.clicked_element) {
|
||||
lines.push(`🎯 Clicked: ${clickData.clicked_element}`);
|
||||
}
|
||||
|
||||
if (
|
||||
clickData.click_location &&
|
||||
clickData.click_location.x !== undefined &&
|
||||
clickData.click_location.y !== undefined
|
||||
) {
|
||||
lines.push(`📍 Location: (${Math.round(clickData.click_location.x)}, ${Math.round(clickData.click_location.y)})`);
|
||||
}
|
||||
|
||||
if (clickData.wait_time && clickData.wait_time > 0) {
|
||||
lines.push(`⏳ Waited: ${(clickData.wait_time / 1000).toFixed(1)}s`);
|
||||
}
|
||||
|
||||
if (clickData.execution_time !== undefined) {
|
||||
lines.push(`⏱️ Completed in ${clickData.execution_time.toFixed(2)}s`);
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: lines.join("\n"),
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Click tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,219 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Schema for dialog tool
|
||||
export const dialogToolSchema = z
|
||||
.object({
|
||||
action: z.enum(["click", "input", "file", "dismiss", "list"]),
|
||||
button: z.string().optional(),
|
||||
text: z.string().optional(),
|
||||
field: z.string().optional(),
|
||||
index: z.number().int().optional(),
|
||||
clear: z.boolean().optional(),
|
||||
path: z.string().optional(),
|
||||
name: z.string().optional(),
|
||||
select: z.string().optional(),
|
||||
window: z.string().optional(),
|
||||
force: z.boolean().optional(),
|
||||
})
|
||||
.strict()
|
||||
.refine(
|
||||
(data) => {
|
||||
// Validate required parameters for each action
|
||||
switch (data.action) {
|
||||
case "click":
|
||||
return !!data.button;
|
||||
case "input":
|
||||
return !!data.text;
|
||||
case "file":
|
||||
return !!data.path || !!data.name;
|
||||
case "dismiss":
|
||||
case "list":
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
},
|
||||
{
|
||||
message: "Missing required parameters for action",
|
||||
}
|
||||
);
|
||||
|
||||
export type DialogInput = z.infer<typeof dialogToolSchema>;
|
||||
|
||||
interface DialogElement {
|
||||
type: string;
|
||||
label?: string;
|
||||
value?: string;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
interface DialogActionOutput {
|
||||
action: string;
|
||||
button?: string;
|
||||
window?: string;
|
||||
field?: string;
|
||||
path?: string;
|
||||
result?: string;
|
||||
}
|
||||
|
||||
interface DialogListOutput {
|
||||
windows: Array<{
|
||||
title: string;
|
||||
type: string;
|
||||
elements: DialogElement[];
|
||||
}>;
|
||||
}
|
||||
|
||||
export async function dialogToolHandler(args: DialogInput, context: ToolContext): Promise<ToolResponse> {
|
||||
context.logger.debug("Performing dialog operation", { args });
|
||||
|
||||
try {
|
||||
const commandArgs = ["dialog", args.action];
|
||||
|
||||
// Add action-specific parameters
|
||||
switch (args.action) {
|
||||
case "click":
|
||||
if (args.button) {
|
||||
commandArgs.push("--button", args.button);
|
||||
}
|
||||
break;
|
||||
case "input":
|
||||
if (args.text) {
|
||||
commandArgs.push("--text", args.text);
|
||||
}
|
||||
if (args.field) {
|
||||
commandArgs.push("--field", args.field);
|
||||
}
|
||||
if (args.index !== undefined) {
|
||||
commandArgs.push("--index", args.index.toString());
|
||||
}
|
||||
if (args.clear) {
|
||||
commandArgs.push("--clear");
|
||||
}
|
||||
break;
|
||||
case "file":
|
||||
if (args.path) {
|
||||
commandArgs.push("--path", args.path);
|
||||
}
|
||||
if (args.name) {
|
||||
commandArgs.push("--name", args.name);
|
||||
}
|
||||
if (args.select) {
|
||||
commandArgs.push("--select", args.select);
|
||||
}
|
||||
break;
|
||||
case "dismiss":
|
||||
if (args.force) {
|
||||
commandArgs.push("--force");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Add window parameter if provided
|
||||
if (args.window) {
|
||||
commandArgs.push("--window", args.window);
|
||||
}
|
||||
|
||||
// Always use JSON output
|
||||
commandArgs.push("--json-output");
|
||||
|
||||
// Execute dialog command
|
||||
const result = await executeSwiftCli(commandArgs, context.logger, { timeout: 10000 });
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
throw new Error(result.error?.message || "Failed to perform dialog operation");
|
||||
}
|
||||
|
||||
// Parse the JSON output
|
||||
if (args.action === "list") {
|
||||
const listData = result.data as DialogListOutput;
|
||||
|
||||
// Format the list response
|
||||
const dialogsList = listData.windows
|
||||
.map((window) => {
|
||||
let windowText = `Dialog: ${window.title} (${window.type})`;
|
||||
if (window.elements.length > 0) {
|
||||
windowText += "\n Elements:";
|
||||
window.elements.forEach((elem) => {
|
||||
windowText += `\n • ${elem.type}`;
|
||||
if (elem.label) {
|
||||
windowText += `: "${elem.label}"`;
|
||||
}
|
||||
if (!elem.enabled) {
|
||||
windowText += " (disabled)";
|
||||
}
|
||||
});
|
||||
}
|
||||
return windowText;
|
||||
})
|
||||
.join("\n\n");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: dialogsList || "No dialogs found",
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
windows: listData.windows,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
const actionData = result.data as DialogActionOutput;
|
||||
|
||||
// Format action response
|
||||
let responseText = "";
|
||||
switch (args.action) {
|
||||
case "click":
|
||||
responseText = `✓ Clicked '${actionData.button || args.button}' button`;
|
||||
if (actionData.window) {
|
||||
responseText += ` in ${actionData.window}`;
|
||||
}
|
||||
break;
|
||||
case "input":
|
||||
responseText = `✓ Entered text`;
|
||||
if (actionData.field) {
|
||||
responseText += ` in '${actionData.field}' field`;
|
||||
}
|
||||
break;
|
||||
case "file":
|
||||
if (args.path) {
|
||||
responseText = `✓ Selected file: ${actionData.path || args.path}`;
|
||||
} else if (args.name) {
|
||||
responseText = `✓ Entered filename: ${args.name}`;
|
||||
}
|
||||
if (actionData.result) {
|
||||
responseText += ` and clicked '${actionData.result}'`;
|
||||
}
|
||||
break;
|
||||
case "dismiss":
|
||||
responseText = args.force ? "✓ Force dismissed dialog (ESC)" : "✓ Dismissed dialog";
|
||||
break;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
metadata: actionData,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
context.logger.error("Failed to perform dialog operation", { error });
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to perform dialog operation: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,162 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Schema for dock tool
|
||||
export const dockToolSchema = z
|
||||
.object({
|
||||
action: z.enum(["launch", "right-click", "hide", "show", "list"]),
|
||||
app: z.string().optional(),
|
||||
select: z.string().optional(),
|
||||
include_all: z.boolean().optional(), // For list action
|
||||
})
|
||||
.strict()
|
||||
.refine(
|
||||
(data) => {
|
||||
// launch and right-click require app
|
||||
if ((data.action === "launch" || data.action === "right-click") && !data.app) {
|
||||
return false;
|
||||
}
|
||||
// select only valid with right-click
|
||||
if (data.select && data.action !== "right-click") {
|
||||
return false;
|
||||
}
|
||||
// include_all only valid with list
|
||||
if (data.include_all && data.action !== "list") {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
{
|
||||
message: "Invalid combination of action and parameters",
|
||||
}
|
||||
);
|
||||
|
||||
export type DockInput = z.infer<typeof dockToolSchema>;
|
||||
|
||||
interface DockItem {
|
||||
title: string;
|
||||
type: string;
|
||||
bundle_id?: string;
|
||||
path?: string;
|
||||
}
|
||||
|
||||
interface DockActionOutput {
|
||||
action: string;
|
||||
app?: string;
|
||||
item?: string;
|
||||
result?: string;
|
||||
}
|
||||
|
||||
interface DockListOutput {
|
||||
items: DockItem[];
|
||||
}
|
||||
|
||||
export async function dockToolHandler(args: DockInput, context: ToolContext): Promise<ToolResponse> {
|
||||
context.logger.debug("Performing dock operation", { args });
|
||||
|
||||
try {
|
||||
const commandArgs = ["dock", args.action];
|
||||
|
||||
// Add app parameter for launch and right-click
|
||||
if (args.app && (args.action === "launch" || args.action === "right-click")) {
|
||||
if (args.action === "launch") {
|
||||
commandArgs.push(args.app);
|
||||
} else {
|
||||
commandArgs.push("--app", args.app);
|
||||
}
|
||||
}
|
||||
|
||||
// Add select parameter for right-click
|
||||
if (args.select && args.action === "right-click") {
|
||||
commandArgs.push("--select", args.select);
|
||||
}
|
||||
|
||||
// Add include-all for list
|
||||
if (args.include_all && args.action === "list") {
|
||||
commandArgs.push("--include-all");
|
||||
}
|
||||
|
||||
// Always use JSON output
|
||||
commandArgs.push("--json-output");
|
||||
|
||||
// Execute dock command
|
||||
const result = await executeSwiftCli(commandArgs, context.logger, { timeout: 10000 });
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
throw new Error(result.error?.message || "Failed to perform dock operation");
|
||||
}
|
||||
|
||||
// Parse the JSON output
|
||||
if (args.action === "list") {
|
||||
const listData = result.data as DockListOutput;
|
||||
|
||||
// Format the list response
|
||||
const itemsList = listData.items
|
||||
.map((item) => {
|
||||
let itemText = `• ${item.title} (${item.type})`;
|
||||
if (item.bundle_id) {
|
||||
itemText += ` - ${item.bundle_id}`;
|
||||
}
|
||||
return itemText;
|
||||
})
|
||||
.join("\n");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Dock items:\n${itemsList}`,
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
items: listData.items,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
const actionData = result.data as DockActionOutput;
|
||||
|
||||
// Format action response
|
||||
let responseText = "";
|
||||
switch (args.action) {
|
||||
case "launch":
|
||||
responseText = `✓ Launched ${actionData.app || args.app} from Dock`;
|
||||
break;
|
||||
case "right-click":
|
||||
if (args.select) {
|
||||
responseText = `✓ Selected "${args.select}" from ${args.app} context menu`;
|
||||
} else {
|
||||
responseText = `✓ Right-clicked ${args.app} in Dock`;
|
||||
}
|
||||
break;
|
||||
case "hide":
|
||||
responseText = "✓ Dock hidden";
|
||||
break;
|
||||
case "show":
|
||||
responseText = "✓ Dock shown";
|
||||
break;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
metadata: actionData,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
context.logger.error("Failed to perform dock operation", { error });
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to perform dock operation: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,166 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Schema for drag tool - includes focus options from CLI
|
||||
export const dragToolSchema = z
|
||||
.object({
|
||||
from: z.string().optional(),
|
||||
from_coords: z
|
||||
.string()
|
||||
.regex(/^\d+,\d+$/, "Coordinates must be in format 'x,y'")
|
||||
.optional(),
|
||||
to: z.string().optional(),
|
||||
to_coords: z
|
||||
.string()
|
||||
.regex(/^\d+,\d+$/, "Coordinates must be in format 'x,y'")
|
||||
.optional(),
|
||||
to_app: z.string().optional(),
|
||||
session: z.string().optional(),
|
||||
duration: z.number().int().positive().optional(),
|
||||
steps: z.number().int().positive().optional(),
|
||||
modifiers: z.string().optional(),
|
||||
// Focus options
|
||||
auto_focus: z.boolean().optional(),
|
||||
space_switch: z.boolean().optional(),
|
||||
bring_to_current_space: z.boolean().optional(),
|
||||
})
|
||||
.strict()
|
||||
.refine(
|
||||
(data) => {
|
||||
// Must have a starting point
|
||||
const hasStart = data.from || data.from_coords;
|
||||
// Must have an ending point
|
||||
const hasEnd = data.to || data.to_coords || data.to_app;
|
||||
return hasStart && hasEnd;
|
||||
},
|
||||
{
|
||||
message: "Must specify both starting point (from/from_coords) and ending point (to/to_coords/to_app)",
|
||||
}
|
||||
);
|
||||
|
||||
export type DragInput = z.infer<typeof dragToolSchema>;
|
||||
|
||||
interface DragOutput {
|
||||
action: string;
|
||||
from: {
|
||||
x: number;
|
||||
y: number;
|
||||
element?: string;
|
||||
};
|
||||
to: {
|
||||
x: number;
|
||||
y: number;
|
||||
element?: string;
|
||||
app?: string;
|
||||
};
|
||||
duration: number;
|
||||
}
|
||||
|
||||
export async function dragToolHandler(args: DragInput, context: ToolContext): Promise<ToolResponse> {
|
||||
context.logger.debug("Performing drag operation", { args });
|
||||
|
||||
try {
|
||||
const commandArgs = ["drag"];
|
||||
|
||||
// Add starting point
|
||||
if (args.from) {
|
||||
commandArgs.push("--from", args.from);
|
||||
}
|
||||
if (args.from_coords) {
|
||||
commandArgs.push("--from-coords", args.from_coords);
|
||||
}
|
||||
|
||||
// Add ending point
|
||||
if (args.to) {
|
||||
commandArgs.push("--to", args.to);
|
||||
}
|
||||
if (args.to_coords) {
|
||||
commandArgs.push("--to-coords", args.to_coords);
|
||||
}
|
||||
if (args.to_app) {
|
||||
commandArgs.push("--to-app", args.to_app);
|
||||
}
|
||||
|
||||
// Add options
|
||||
if (args.session) {
|
||||
commandArgs.push("--session", args.session);
|
||||
}
|
||||
if (args.duration !== undefined) {
|
||||
commandArgs.push("--duration", args.duration.toString());
|
||||
}
|
||||
if (args.steps !== undefined) {
|
||||
commandArgs.push("--steps", args.steps.toString());
|
||||
}
|
||||
if (args.modifiers) {
|
||||
commandArgs.push("--modifiers", args.modifiers);
|
||||
}
|
||||
|
||||
// Add focus options
|
||||
if (args.auto_focus !== undefined) {
|
||||
commandArgs.push("--auto-focus", args.auto_focus.toString());
|
||||
}
|
||||
if (args.space_switch) {
|
||||
commandArgs.push("--space-switch");
|
||||
}
|
||||
if (args.bring_to_current_space) {
|
||||
commandArgs.push("--bring-to-current-space");
|
||||
}
|
||||
|
||||
// Always use JSON output
|
||||
commandArgs.push("--json-output");
|
||||
|
||||
// Execute drag command
|
||||
const result = await executeSwiftCli(
|
||||
commandArgs,
|
||||
context.logger,
|
||||
{ timeout: 15000 } // Longer timeout for drag operations
|
||||
);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
throw new Error(result.error?.message || "Failed to perform drag");
|
||||
}
|
||||
|
||||
const dragData = result.data as DragOutput;
|
||||
|
||||
// Format the response
|
||||
let responseText = `Dragged from (${dragData.from.x}, ${dragData.from.y})`;
|
||||
if (dragData.from.element) {
|
||||
responseText = `Dragged from ${dragData.from.element}`;
|
||||
}
|
||||
|
||||
responseText += ` to (${dragData.to.x}, ${dragData.to.y})`;
|
||||
if (dragData.to.element) {
|
||||
responseText += ` on ${dragData.to.element}`;
|
||||
} else if (dragData.to.app) {
|
||||
responseText += ` to ${dragData.to.app}`;
|
||||
}
|
||||
|
||||
responseText += ` over ${dragData.duration}ms`;
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
from: dragData.from,
|
||||
to: dragData.to,
|
||||
duration: dragData.duration,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
context.logger.error("Failed to perform drag", { error });
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to perform drag: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,98 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const hotkeyToolSchema = z
|
||||
.object({
|
||||
keys: z
|
||||
.string()
|
||||
.describe(
|
||||
"Comma-separated list of keys to press (e.g., 'cmd,c' for copy, 'cmd,shift,t' for reopen tab). " +
|
||||
"Supported keys: cmd, shift, alt/option, ctrl, fn, a-z, 0-9, space, return, tab, escape, delete, " +
|
||||
"arrow_up, arrow_down, arrow_left, arrow_right, f1-f12."
|
||||
),
|
||||
hold_duration: z
|
||||
.number()
|
||||
.optional()
|
||||
.default(50)
|
||||
.describe("Optional. Delay between key press and release in milliseconds. Default: 50."),
|
||||
})
|
||||
.describe(
|
||||
"Presses keyboard shortcuts and key combinations. " +
|
||||
"Simulates pressing multiple keys simultaneously like Cmd+C or Ctrl+Shift+T. " +
|
||||
"Keys are pressed in order and released in reverse order."
|
||||
);
|
||||
|
||||
interface HotkeyResult {
|
||||
success: boolean;
|
||||
keys: string[];
|
||||
key_count: number;
|
||||
execution_time: number;
|
||||
}
|
||||
|
||||
export type HotkeyInput = z.infer<typeof hotkeyToolSchema>;
|
||||
|
||||
export async function hotkeyToolHandler(input: HotkeyInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.hotkey tool call");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["hotkey"];
|
||||
|
||||
// Keys
|
||||
args.push("--keys", input.keys);
|
||||
|
||||
// Hold duration
|
||||
const holdDuration = input.hold_duration ?? 50;
|
||||
args.push("--hold-duration", holdDuration.toString());
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
const errorMessage = result.error?.message || "Hotkey command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to press hotkey: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const hotkeyData = result.data as HotkeyResult;
|
||||
|
||||
// Build response text
|
||||
const lines: string[] = [];
|
||||
lines.push("✅ Hotkey pressed");
|
||||
lines.push(`🎹 Keys: ${hotkeyData.keys.join(" + ")}`);
|
||||
lines.push(`⏱️ Completed in ${hotkeyData.execution_time.toFixed(2)}s`);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: lines.join("\n"),
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Hotkey tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,302 +0,0 @@
|
||||
import * as path from "path";
|
||||
import type { ImageCaptureData, ImageInput, SavedFile, ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { parseAIProviders } from "../utils/ai-providers.js";
|
||||
import { getAIProvidersConfig } from "../utils/config-loader.js";
|
||||
import { performAutomaticAnalysis } from "../utils/image-analysis.js";
|
||||
import { buildSwiftCliArgs, resolveImagePath } from "../utils/image-cli-args.js";
|
||||
import { buildImageSummary } from "../utils/image-summary.js";
|
||||
import { executeSwiftCli, readImageAsBase64 } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export { imageToolSchema } from "../types/index.js";
|
||||
|
||||
export async function imageToolHandler(input: ImageInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
let _tempDirUsed: string | undefined;
|
||||
let finalSavedFiles: SavedFile[] = [];
|
||||
let analysisAttempted = false;
|
||||
let analysisSucceeded = false;
|
||||
let analysisText: string | undefined;
|
||||
let modelUsed: string | undefined;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.image tool call");
|
||||
|
||||
// Check if this is a screen capture
|
||||
const isScreenCapture = !input.app_target || input.app_target.startsWith("screen:");
|
||||
let formatWarning: string | undefined;
|
||||
|
||||
// Format validation is now handled by the schema preprocessor
|
||||
// The format here is already normalized (lowercase, jpeg->jpg mapping applied)
|
||||
let effectiveFormat = input.format;
|
||||
|
||||
// Check if format was corrected by the preprocessor
|
||||
const originalFormat = (input as ImageInput & { _originalFormat?: string })._originalFormat;
|
||||
if (originalFormat) {
|
||||
logger.info({ originalFormat, correctedFormat: effectiveFormat }, "Format was automatically corrected");
|
||||
formatWarning = `Invalid format '${originalFormat}' was provided. Automatically using ${effectiveFormat?.toUpperCase() || "PNG"} format instead.`;
|
||||
}
|
||||
|
||||
// Defensive validation: ensure format is one of the valid values
|
||||
// This should not be necessary due to schema preprocessing, but provides extra safety
|
||||
const validFormats = ["png", "jpg", "data"];
|
||||
if (effectiveFormat && !validFormats.includes(effectiveFormat)) {
|
||||
logger.warn(
|
||||
{ originalFormat: effectiveFormat, fallbackFormat: "png" },
|
||||
`Invalid format '${effectiveFormat}' detected, falling back to PNG`
|
||||
);
|
||||
effectiveFormat = "png";
|
||||
formatWarning = `Invalid format '${input.format}' was provided. Automatically using PNG format instead.`;
|
||||
}
|
||||
|
||||
// Auto-fallback to PNG for screen captures with format 'data'
|
||||
if (isScreenCapture && effectiveFormat === "data") {
|
||||
logger.warn("Screen capture with format 'data' auto-fallback to PNG due to size constraints");
|
||||
effectiveFormat = "png";
|
||||
formatWarning =
|
||||
"Note: Screen captures cannot use format 'data' due to large image sizes that cause JavaScript stack overflow. Automatically using PNG format instead.";
|
||||
}
|
||||
|
||||
// Determine effective path and format for Swift CLI
|
||||
const swiftFormat = effectiveFormat === "data" ? "png" : effectiveFormat || "png";
|
||||
|
||||
// Create a corrected input object if format or path needs to be adjusted
|
||||
let correctedInput = input;
|
||||
|
||||
// If format was corrected and we have a path, update the file extension to match the actual format
|
||||
if (input.format && input.format !== effectiveFormat && input.path) {
|
||||
const originalPath = input.path;
|
||||
const parsedPath = path.parse(originalPath);
|
||||
|
||||
// Map format to appropriate extension
|
||||
const extensionMap: { [key: string]: string } = {
|
||||
png: ".png",
|
||||
jpg: ".jpg",
|
||||
jpeg: ".jpg",
|
||||
data: ".png", // data format saves as PNG
|
||||
};
|
||||
|
||||
const newExtension = extensionMap[effectiveFormat || "png"] || ".png";
|
||||
const correctedPath = path.join(parsedPath.dir, parsedPath.name + newExtension);
|
||||
|
||||
logger.debug(
|
||||
{ originalPath, correctedPath, originalFormat: input.format, correctedFormat: effectiveFormat },
|
||||
"Correcting file extension to match format"
|
||||
);
|
||||
|
||||
correctedInput = { ...input, path: correctedPath };
|
||||
}
|
||||
|
||||
// Resolve the effective path using the centralized logic
|
||||
const { effectivePath, tempDirUsed: tempDir } = await resolveImagePath(correctedInput, logger);
|
||||
_tempDirUsed = tempDir;
|
||||
|
||||
const args = buildSwiftCliArgs(correctedInput, effectivePath, swiftFormat, logger);
|
||||
|
||||
const swiftResponse = await executeSwiftCli(args, logger, { timeout: 30000 });
|
||||
|
||||
if (!swiftResponse.success) {
|
||||
logger.error({ error: swiftResponse.error }, "Swift CLI returned error for image capture");
|
||||
const errorMessage = swiftResponse.error?.message || "Unknown error";
|
||||
const errorDetails = swiftResponse.error?.details;
|
||||
const fullErrorMessage = errorDetails ? `${errorMessage}\n${errorDetails}` : errorMessage;
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Image capture failed: ${fullErrorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
_meta: { backend_error_code: swiftResponse.error?.code },
|
||||
};
|
||||
}
|
||||
|
||||
const imageData = swiftResponse.data as ImageCaptureData | undefined;
|
||||
if (!imageData || !imageData.saved_files || imageData.saved_files.length === 0) {
|
||||
const errorMessage = [
|
||||
`Image capture failed. The tool tried to save the image to "${effectivePath}".`,
|
||||
"The operation did not complete successfully.",
|
||||
"Please check if you have write permissions for this location.",
|
||||
].join(" ");
|
||||
logger.error({ path: effectivePath }, "Swift CLI reported success but no data/saved_files were returned.");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: errorMessage,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
_meta: { backend_error_code: "INVALID_RESPONSE_NO_SAVED_FILES" },
|
||||
};
|
||||
}
|
||||
|
||||
const captureData = imageData;
|
||||
|
||||
// Always report all saved files
|
||||
finalSavedFiles = captureData.saved_files || [];
|
||||
|
||||
if (input.question) {
|
||||
analysisAttempted = true;
|
||||
const analysisResults: Array<{ label: string; text: string }> = [];
|
||||
|
||||
// Helper function to generate descriptive labels for analysis
|
||||
const getAnalysisLabel = (savedFile: SavedFile, isMultipleFiles: boolean): string => {
|
||||
if (!isMultipleFiles) {
|
||||
// For single files, use the item_label (app name or screen description)
|
||||
return savedFile.item_label || "Unknown";
|
||||
}
|
||||
|
||||
// For multiple files, prefer window_title if available
|
||||
if (savedFile.window_title) {
|
||||
return `"${savedFile.window_title}"`;
|
||||
}
|
||||
|
||||
// Fall back to item_label with window index if available
|
||||
if (savedFile.window_index !== undefined) {
|
||||
return `${savedFile.item_label || "Unknown"} (Window ${savedFile.window_index + 1})`;
|
||||
}
|
||||
|
||||
return savedFile.item_label || "Unknown";
|
||||
};
|
||||
|
||||
const aiProvidersConfig = await getAIProvidersConfig(logger);
|
||||
const configuredProviders = parseAIProviders(aiProvidersConfig || "");
|
||||
if (!configuredProviders.length) {
|
||||
analysisText =
|
||||
"Analysis skipped: AI analysis not configured on this server (PEEKABOO_AI_PROVIDERS is not set or empty).";
|
||||
logger.warn(analysisText);
|
||||
} else {
|
||||
// Iterate through all saved files for analysis
|
||||
const isMultipleFiles = captureData.saved_files.length > 1;
|
||||
for (const savedFile of captureData.saved_files) {
|
||||
const analysisLabel = getAnalysisLabel(savedFile, isMultipleFiles);
|
||||
|
||||
try {
|
||||
const imageBase64 = await readImageAsBase64(savedFile.path);
|
||||
logger.debug({ path: savedFile.path }, "Image read successfully for analysis.");
|
||||
|
||||
const analysisResult = await performAutomaticAnalysis(
|
||||
imageBase64,
|
||||
input.question,
|
||||
logger,
|
||||
aiProvidersConfig || ""
|
||||
);
|
||||
|
||||
if (analysisResult.error) {
|
||||
analysisResults.push({
|
||||
label: analysisLabel,
|
||||
text: analysisResult.error,
|
||||
});
|
||||
} else {
|
||||
analysisResults.push({
|
||||
label: analysisLabel,
|
||||
text: analysisResult.analysisText || "",
|
||||
});
|
||||
modelUsed = analysisResult.modelUsed;
|
||||
analysisSucceeded = true;
|
||||
logger.info({ provider: modelUsed, path: savedFile.path }, "Image analysis successful");
|
||||
}
|
||||
} catch (readError) {
|
||||
logger.error({ error: readError, path: savedFile.path }, "Failed to read captured image for analysis");
|
||||
analysisResults.push({
|
||||
label: analysisLabel,
|
||||
text: `Analysis skipped: Failed to read captured image at ${savedFile.path}. Error: ${readError instanceof Error ? readError.message : "Unknown read error"}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Format the analysis results
|
||||
if (analysisResults.length === 1) {
|
||||
analysisText = analysisResults[0].text;
|
||||
} else if (analysisResults.length > 1) {
|
||||
analysisText = analysisResults.map((result) => `Analysis for ${result.label}:\n${result.text}`).join("\n\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const content: Array<{
|
||||
type: "text" | "image";
|
||||
text?: string;
|
||||
data?: string;
|
||||
mimeType?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
}> = [];
|
||||
let summary = buildImageSummary(input, captureData, input.question);
|
||||
if (analysisAttempted) {
|
||||
summary += `\nAnalysis ${analysisSucceeded ? "succeeded" : "failed/skipped"}.`;
|
||||
}
|
||||
content.push({ type: "text", text: summary });
|
||||
|
||||
// Add format warning if applicable
|
||||
if (formatWarning) {
|
||||
content.push({ type: "text", text: formatWarning });
|
||||
}
|
||||
|
||||
if (analysisText) {
|
||||
content.push({ type: "text", text: `Analysis Result: ${analysisText}` });
|
||||
}
|
||||
|
||||
// Return base64 data if:
|
||||
// 1. Format is explicitly 'data' (but not for screen captures which auto-fallback), OR
|
||||
// 2. No path was provided AND no question is asked
|
||||
const shouldReturnData = (effectiveFormat === "data" || !input.path) && !input.question && !isScreenCapture;
|
||||
|
||||
if (shouldReturnData && captureData.saved_files?.length > 0) {
|
||||
for (const savedFile of captureData.saved_files) {
|
||||
try {
|
||||
const imageBase64 = await readImageAsBase64(savedFile.path);
|
||||
content.push({
|
||||
type: "image",
|
||||
data: imageBase64,
|
||||
mimeType: savedFile.mime_type,
|
||||
metadata: {
|
||||
item_label: savedFile.item_label,
|
||||
window_title: savedFile.window_title,
|
||||
window_id: savedFile.window_id,
|
||||
source_path: savedFile.path,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error({ error, path: savedFile.path }, "Failed to read image file for return_data");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (swiftResponse.messages?.length) {
|
||||
content.push({
|
||||
type: "text",
|
||||
text: `Capture Messages: ${swiftResponse.messages.join("; ")}`,
|
||||
});
|
||||
}
|
||||
|
||||
const result: ToolResponse = {
|
||||
content,
|
||||
saved_files: finalSavedFiles,
|
||||
};
|
||||
|
||||
if (analysisAttempted) {
|
||||
result.analysis_text = analysisText;
|
||||
result.model_used = modelUsed;
|
||||
}
|
||||
if (!analysisSucceeded && analysisAttempted) {
|
||||
result.isError = true;
|
||||
result._meta = { ...(result._meta || {}), analysis_error: analysisText };
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Unexpected error in image tool handler");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Unexpected error: ${error instanceof Error ? error.message : "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
_meta: { backend_error_code: "UNEXPECTED_HANDLER_ERROR" },
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,23 +0,0 @@
|
||||
export { buildSwiftCliArgs } from "../utils/image-cli-args.js";
|
||||
export { agentToolHandler, agentToolSchema } from "./agent.js";
|
||||
export { analyzeToolHandler, analyzeToolSchema } from "./analyze.js";
|
||||
export { appToolHandler, appToolSchema } from "./app.js";
|
||||
export { cleanToolHandler, cleanToolSchema } from "./clean.js";
|
||||
export { clickToolHandler, clickToolSchema } from "./click.js";
|
||||
export { dialogToolHandler, dialogToolSchema } from "./dialog.js";
|
||||
export { dockToolHandler, dockToolSchema } from "./dock.js";
|
||||
export { dragToolHandler, dragToolSchema } from "./drag.js";
|
||||
export { hotkeyToolHandler, hotkeyToolSchema } from "./hotkey.js";
|
||||
export { imageToolHandler, imageToolSchema } from "./image.js";
|
||||
export { listToolHandler, listToolSchema } from "./list.js";
|
||||
export { menuToolHandler, menuToolSchema } from "./menu.js";
|
||||
export { moveToolHandler, moveToolSchema } from "./move.js";
|
||||
export { permissionsToolHandler, permissionsToolSchema } from "./permissions.js";
|
||||
export { runToolHandler, runToolSchema } from "./run.js";
|
||||
export { scrollToolHandler, scrollToolSchema } from "./scroll.js";
|
||||
export { seeToolHandler, seeToolSchema } from "./see.js";
|
||||
export { sleepToolHandler, sleepToolSchema } from "./sleep.js";
|
||||
export { spaceToolHandler, spaceToolSchema } from "./space.js";
|
||||
export { swipeToolHandler, swipeToolSchema } from "./swipe.js";
|
||||
export { typeToolHandler, typeToolSchema } from "./type.js";
|
||||
export { windowToolHandler, windowToolSchema } from "./window.js";
|
||||
@ -1,597 +0,0 @@
|
||||
import { accessSync, constants, existsSync } from "fs";
|
||||
import fs from "fs/promises";
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
import type { Logger } from "pino";
|
||||
import { fileURLToPath } from "url";
|
||||
import { z } from "zod";
|
||||
import type {
|
||||
ApplicationInfo,
|
||||
ApplicationListData,
|
||||
SwiftCliResponse,
|
||||
TargetApplicationInfo,
|
||||
ToolContext,
|
||||
ToolResponse,
|
||||
WindowInfo,
|
||||
WindowListData,
|
||||
} from "../types/index.js";
|
||||
import { getProviderStatus, parseAIProviders } from "../utils/ai-providers.js";
|
||||
import { getAIProvidersConfig } from "../utils/config-loader.js";
|
||||
import { execPeekaboo, executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
import { generateServerStatusString } from "../utils/server-status.js";
|
||||
|
||||
export const listToolSchema = z
|
||||
.object({
|
||||
item_type: z
|
||||
.preprocess((val) => {
|
||||
// Convert empty string to undefined
|
||||
if (val === "" || val === null) {
|
||||
return undefined;
|
||||
}
|
||||
return val;
|
||||
}, z.enum(["running_applications", "application_windows", "server_status"]).optional())
|
||||
.describe(
|
||||
"Specifies the type of items to list. If omitted or empty, it defaults to 'application_windows' if 'app' is provided, otherwise 'running_applications'. Valid options are:\n" +
|
||||
"- `running_applications`: Lists all currently running applications.\n" +
|
||||
"- `application_windows`: Lists open windows for a specific application. Requires the `app` parameter.\n" +
|
||||
"- `server_status`: Returns information about the Peekaboo MCP server."
|
||||
),
|
||||
app: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
"Required when `item_type` is `application_windows`. " +
|
||||
'Specifies the target application by its name (e.g., "Safari", "TextEdit"), bundle ID, or process ID (e.g., "PID:663"). ' +
|
||||
"Fuzzy matching is used for names, so partial names may work."
|
||||
),
|
||||
include_window_details: z.preprocess(
|
||||
(val) => {
|
||||
// Handle empty string or null/undefined
|
||||
if (val === "" || val === null || val === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// If it's already an array, return as-is
|
||||
if (Array.isArray(val)) {
|
||||
return val;
|
||||
}
|
||||
|
||||
// If it's a string that looks like JSON, try to parse it
|
||||
if (typeof val === "string") {
|
||||
try {
|
||||
const parsed = JSON.parse(val);
|
||||
if (Array.isArray(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch {
|
||||
// Not valid JSON, treat as single item
|
||||
}
|
||||
|
||||
// If it's a comma-separated string, split it
|
||||
if (val.includes(",")) {
|
||||
return val.split(",").map((s) => s.trim());
|
||||
}
|
||||
|
||||
// Single string value, wrap in array
|
||||
return [val.trim()];
|
||||
}
|
||||
|
||||
return val;
|
||||
},
|
||||
z
|
||||
.array(z.enum(["off_screen", "bounds", "ids"]))
|
||||
.optional()
|
||||
.describe(
|
||||
"Optional, only applicable when `item_type` is `application_windows`. " +
|
||||
'Specifies additional details to include for each window. Provide an array of strings. Example: `["bounds", "ids"]`.\n' +
|
||||
"- `ids`: Include window ID.\n" +
|
||||
"- `bounds`: Include window position and size (x, y, width, height).\n" +
|
||||
"- `off_screen`: Indicate if the window is currently off-screen."
|
||||
)
|
||||
),
|
||||
})
|
||||
.refine((data) => data.item_type !== "application_windows" || (data.app !== undefined && data.app.trim() !== ""), {
|
||||
message: "For 'application_windows', 'app' identifier is required.",
|
||||
path: ["app"],
|
||||
})
|
||||
.refine(
|
||||
(data) =>
|
||||
!data.include_window_details ||
|
||||
data.include_window_details.length === 0 ||
|
||||
data.item_type === "application_windows" ||
|
||||
(data.app !== undefined && data.app.trim() !== ""),
|
||||
{
|
||||
message:
|
||||
"'include_window_details' is only applicable when 'item_type' is 'application_windows' or when 'app' is provided.",
|
||||
path: ["include_window_details"],
|
||||
}
|
||||
)
|
||||
.refine(
|
||||
(data) =>
|
||||
data.item_type !== "server_status" ||
|
||||
(data.app === undefined &&
|
||||
(data.include_window_details === undefined || data.include_window_details.length === 0)),
|
||||
{
|
||||
message: "'app' and 'include_window_details' not applicable for 'server_status'.",
|
||||
path: ["item_type"],
|
||||
}
|
||||
)
|
||||
.describe(
|
||||
"Lists various system items, providing situational awareness. " +
|
||||
"The `item_type` is optional and will be inferred if omitted (defaults to 'application_windows' if 'app' is provided, else 'running_applications'). " +
|
||||
"App identifier uses fuzzy matching for convenience."
|
||||
);
|
||||
|
||||
export type ListToolInput = z.infer<typeof listToolSchema>;
|
||||
|
||||
export async function listToolHandler(input: ListToolInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.list tool call");
|
||||
|
||||
// Handle server_status directly without calling Swift CLI
|
||||
if (input.item_type === "server_status") {
|
||||
// Get package version and root directory
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const packageRootDir = path.resolve(__dirname, "../..");
|
||||
const packageJsonPath = path.join(packageRootDir, "package.json");
|
||||
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, "utf-8"));
|
||||
const version = packageJson.version || "[unknown]";
|
||||
return await handleServerStatus(version, packageRootDir, logger);
|
||||
}
|
||||
|
||||
// Build Swift CLI arguments
|
||||
const args = buildSwiftCliArgs(input);
|
||||
logger.debug({ args }, "Swift CLI arguments built");
|
||||
|
||||
// Execute Swift CLI
|
||||
const swiftResponse = await executeSwiftCli(args, logger, { timeout: 15000 });
|
||||
|
||||
if (!swiftResponse.success) {
|
||||
logger.error({ error: swiftResponse.error }, "Swift CLI returned error");
|
||||
const errorMessage = swiftResponse.error?.message || "Unknown error";
|
||||
const errorDetails = swiftResponse.error?.details;
|
||||
const fullErrorMessage = errorDetails ? `${errorMessage}\n${errorDetails}` : errorMessage;
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `List operation failed: ${fullErrorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
_meta: {
|
||||
backend_error_code: swiftResponse.error?.code,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Check if data is null or undefined
|
||||
if (!swiftResponse.data) {
|
||||
logger.error("Swift CLI reported success but no data was returned.");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: "List operation failed: Invalid response from list utility (no data).",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
_meta: {
|
||||
backend_error_code: "INVALID_RESPONSE_NO_DATA",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Process the response based on item type
|
||||
let effective_item_type: string;
|
||||
if (input.item_type && typeof input.item_type === "string" && input.item_type.trim() !== "") {
|
||||
effective_item_type = input.item_type.trim();
|
||||
} else if (input.app) {
|
||||
effective_item_type = "application_windows";
|
||||
} else {
|
||||
effective_item_type = "running_applications";
|
||||
}
|
||||
|
||||
if (effective_item_type === "running_applications") {
|
||||
return handleApplicationsList(swiftResponse.data as ApplicationListData, swiftResponse);
|
||||
} else if (effective_item_type === "application_windows") {
|
||||
return handleWindowsList(swiftResponse.data as WindowListData, input, swiftResponse);
|
||||
}
|
||||
|
||||
// Fallback
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: "List operation completed with unknown item type.",
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Unexpected error in list tool handler");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Unexpected error: ${error instanceof Error ? error.message : "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function handleServerStatus(version: string, packageRootDir: string, logger: Logger): Promise<ToolResponse> {
|
||||
const statusSections: string[] = [];
|
||||
|
||||
// 1. Server version and AI providers
|
||||
statusSections.push(generateServerStatusString(version));
|
||||
|
||||
// 2. Native Binary Status
|
||||
statusSections.push("\n## Native Binary (Swift CLI) Status");
|
||||
|
||||
const cliPath = process.env.PEEKABOO_CLI_PATH || path.join(packageRootDir, "peekaboo");
|
||||
let cliStatus = "❌ Not found";
|
||||
let cliVersion = "Unknown";
|
||||
let cliExecutable = false;
|
||||
|
||||
if (existsSync(cliPath)) {
|
||||
try {
|
||||
accessSync(cliPath, constants.X_OK);
|
||||
cliExecutable = true;
|
||||
|
||||
// Try to get CLI version
|
||||
const versionResult = await execPeekaboo(["version"], packageRootDir, { expectSuccess: false });
|
||||
|
||||
if (versionResult.success && versionResult.data) {
|
||||
cliVersion = versionResult.data.trim();
|
||||
cliStatus = "✅ Found and executable";
|
||||
} else {
|
||||
cliStatus = "⚠️ Found but version check failed";
|
||||
}
|
||||
} catch (_error) {
|
||||
cliStatus = "⚠️ Found but not executable";
|
||||
}
|
||||
}
|
||||
|
||||
statusSections.push(`- Location: ${cliPath}`);
|
||||
statusSections.push(`- Status: ${cliStatus}`);
|
||||
statusSections.push(`- Version: ${cliVersion}`);
|
||||
statusSections.push(`- Executable: ${cliExecutable ? "Yes" : "No"}`);
|
||||
|
||||
// 3. Permissions Status
|
||||
statusSections.push("\n## System Permissions");
|
||||
|
||||
if (cliExecutable) {
|
||||
try {
|
||||
const permissionsResult = await execPeekaboo(["list", "permissions", "--json-output"], packageRootDir, {
|
||||
expectSuccess: false,
|
||||
});
|
||||
|
||||
if (permissionsResult.success && permissionsResult.data) {
|
||||
const status = JSON.parse(permissionsResult.data);
|
||||
if (status.data?.permissions) {
|
||||
const perms = status.data.permissions;
|
||||
statusSections.push(`- Screen Recording: ${perms.screen_recording ? "✅ Granted" : "❌ Not granted"}`);
|
||||
statusSections.push(`- Accessibility: ${perms.accessibility ? "✅ Granted" : "❌ Not granted"}`);
|
||||
} else {
|
||||
statusSections.push("- Unable to determine permissions status");
|
||||
}
|
||||
} else {
|
||||
statusSections.push("- Unable to check permissions (CLI error)");
|
||||
}
|
||||
} catch (error) {
|
||||
statusSections.push(`- Unable to check permissions: ${error}`);
|
||||
}
|
||||
} else {
|
||||
statusSections.push("- Unable to check permissions (CLI not available)");
|
||||
}
|
||||
|
||||
// 4. AI Provider Status
|
||||
statusSections.push("\n## AI Provider Status");
|
||||
|
||||
const aiProvidersEnv = await getAIProvidersConfig(logger);
|
||||
if (!aiProvidersEnv || !aiProvidersEnv.trim()) {
|
||||
statusSections.push("❌ No AI providers configured");
|
||||
statusSections.push(
|
||||
"Configure PEEKABOO_AI_PROVIDERS environment variable or ~/.peekaboo/config.json to enable image analysis"
|
||||
);
|
||||
} else {
|
||||
const providers = parseAIProviders(aiProvidersEnv);
|
||||
if (providers.length === 0) {
|
||||
statusSections.push("❌ Invalid AI provider configuration");
|
||||
statusSections.push(`Raw config: ${aiProvidersEnv}`);
|
||||
} else {
|
||||
statusSections.push(`Found ${providers.length} configured provider${providers.length !== 1 ? "s" : ""}:`);
|
||||
|
||||
for (const provider of providers) {
|
||||
statusSections.push(`\n### ${provider.provider}/${provider.model}`);
|
||||
|
||||
try {
|
||||
const status = await getProviderStatus(provider, logger);
|
||||
|
||||
if (status.available) {
|
||||
statusSections.push("✅ **Available and working**");
|
||||
|
||||
if (status.details?.modelList && status.details.modelList.length > 0) {
|
||||
const modelCount = status.details.modelList.length;
|
||||
statusSections.push(`- Found ${modelCount} available model${modelCount !== 1 ? "s" : ""}`);
|
||||
}
|
||||
} else {
|
||||
statusSections.push("❌ **Not available**");
|
||||
if (status.error) {
|
||||
statusSections.push(`- Error: ${status.error}`);
|
||||
}
|
||||
|
||||
// Provide specific troubleshooting info
|
||||
if (status.details) {
|
||||
const details = status.details;
|
||||
|
||||
if (provider.provider.toLowerCase() === "openai") {
|
||||
if (!details.apiKeyPresent) {
|
||||
statusSections.push("- Missing: Set OPENAI_API_KEY environment variable");
|
||||
} else if (!details.serverReachable) {
|
||||
statusSections.push("- Network issue: Cannot reach OpenAI API");
|
||||
} else if (details.apiKeyPresent && !status.available) {
|
||||
statusSections.push("- Invalid API key or insufficient permissions");
|
||||
}
|
||||
} else if (provider.provider.toLowerCase() === "ollama") {
|
||||
if (!details.serverReachable) {
|
||||
statusSections.push("- Ollama server not running or not reachable");
|
||||
statusSections.push("- Start with: ollama serve");
|
||||
} else if (!details.modelAvailable) {
|
||||
statusSections.push(`- Model '${provider.model}' not installed`);
|
||||
statusSections.push(`- Install with: ollama pull ${provider.model}`);
|
||||
if (details.modelList && details.modelList.length > 0) {
|
||||
statusSections.push(
|
||||
`- Available models: ${details.modelList.slice(0, 5).join(", ")}${details.modelList.length > 5 ? "..." : ""}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
statusSections.push("❌ **Status check failed**");
|
||||
statusSections.push(`- Error: ${error instanceof Error ? error.message : "Unknown error"}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Environment Configuration
|
||||
statusSections.push("\n## Environment Configuration");
|
||||
|
||||
const logFile = process.env.PEEKABOO_LOG_FILE || path.join(os.homedir(), "Library/Logs/peekaboo-mcp.log");
|
||||
const logLevel = process.env.PEEKABOO_LOG_LEVEL || "info";
|
||||
const consoleLogging = process.env.PEEKABOO_CONSOLE_LOGGING === "true";
|
||||
const aiProviders = aiProvidersEnv || "None configured";
|
||||
const customCliPath = process.env.PEEKABOO_CLI_PATH;
|
||||
const defaultSavePath = process.env.PEEKABOO_DEFAULT_SAVE_PATH || "Not set";
|
||||
|
||||
statusSections.push(`- Log File: ${logFile}`);
|
||||
|
||||
// Check log file accessibility
|
||||
try {
|
||||
const logDir = path.dirname(logFile);
|
||||
await fs.access(logDir, constants.W_OK);
|
||||
statusSections.push(" Status: ✅ Directory writable");
|
||||
} catch (_error) {
|
||||
statusSections.push(" Status: ❌ Directory not writable");
|
||||
}
|
||||
|
||||
statusSections.push(`- Log Level: ${logLevel}`);
|
||||
statusSections.push(`- Console Logging: ${consoleLogging ? "Enabled" : "Disabled"}`);
|
||||
statusSections.push(`- AI Providers: ${aiProviders}`);
|
||||
statusSections.push(`- Custom CLI Path: ${customCliPath || "Not set (using default)"}`);
|
||||
statusSections.push(`- Default Save Path: ${defaultSavePath}`);
|
||||
|
||||
// 6. Configuration Issues
|
||||
statusSections.push("\n## Configuration Issues");
|
||||
|
||||
const issues: string[] = [];
|
||||
|
||||
if (!cliExecutable) {
|
||||
issues.push("❌ Swift CLI not found or not executable");
|
||||
}
|
||||
|
||||
if (cliVersion !== version && cliVersion !== "Unknown") {
|
||||
issues.push(`⚠️ Version mismatch: Server ${version} vs CLI ${cliVersion}`);
|
||||
}
|
||||
|
||||
if (!aiProviders || aiProviders === "None configured") {
|
||||
issues.push("⚠️ No AI providers configured (analysis features will be limited)");
|
||||
}
|
||||
|
||||
// Check if log directory is writable
|
||||
try {
|
||||
const logDir = path.dirname(logFile);
|
||||
await fs.access(logDir, constants.W_OK);
|
||||
} catch {
|
||||
issues.push(`❌ Log directory not writable: ${path.dirname(logFile)}`);
|
||||
}
|
||||
|
||||
if (issues.length === 0) {
|
||||
statusSections.push("✅ No configuration issues detected");
|
||||
} else {
|
||||
issues.forEach((issue) => statusSections.push(issue));
|
||||
}
|
||||
|
||||
// 7. System Information
|
||||
statusSections.push("\n## System Information");
|
||||
statusSections.push(`- Platform: ${os.platform()}`);
|
||||
statusSections.push(`- Architecture: ${os.arch()}`);
|
||||
statusSections.push(`- OS Version: ${os.release()}`);
|
||||
statusSections.push(`- Node.js Version: ${process.version}`);
|
||||
|
||||
const fullStatus = statusSections.join("\n");
|
||||
|
||||
logger.info({ status: fullStatus }, "Server status info generated");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: fullStatus,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
export function buildSwiftCliArgs(input: ListToolInput): string[] {
|
||||
const args: string[] = ["list"];
|
||||
|
||||
// Determine item type with defensive checks
|
||||
let itemType: string;
|
||||
if (input.item_type && typeof input.item_type === "string" && input.item_type.trim() !== "") {
|
||||
itemType = input.item_type.trim();
|
||||
} else if (input.app) {
|
||||
itemType = "application_windows";
|
||||
} else {
|
||||
itemType = "running_applications";
|
||||
}
|
||||
|
||||
// Add appropriate subcommand
|
||||
switch (itemType) {
|
||||
case "running_applications":
|
||||
args.push("apps");
|
||||
break;
|
||||
case "application_windows":
|
||||
args.push("windows");
|
||||
if (input.app?.trim()) {
|
||||
args.push("--app", input.app.trim());
|
||||
}
|
||||
if (input.include_window_details && input.include_window_details.length > 0) {
|
||||
const details = input.include_window_details.filter((d) => d?.trim()).join(",");
|
||||
if (details) {
|
||||
args.push("--include-details", details);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "server_status":
|
||||
args.push("permissions"); // Always map to permissions subcommand
|
||||
break;
|
||||
default:
|
||||
// Fallback to apps if unknown type
|
||||
args.push("apps");
|
||||
break;
|
||||
}
|
||||
|
||||
// Filter out any undefined or empty values
|
||||
return args.filter((arg) => arg !== undefined && arg !== null && arg !== "");
|
||||
}
|
||||
|
||||
function handleApplicationsList(
|
||||
data: ApplicationListData,
|
||||
swiftResponse: SwiftCliResponse
|
||||
): ToolResponse & { application_list: ApplicationInfo[] } {
|
||||
const apps = data.applications || [];
|
||||
|
||||
let summary = `Found ${apps.length} running application${apps.length !== 1 ? "s" : ""}`;
|
||||
|
||||
if (apps.length > 0) {
|
||||
summary += ":\n\n";
|
||||
apps.forEach((app, index) => {
|
||||
summary += `${index + 1}. ${app.app_name}`;
|
||||
if (app.bundle_id) {
|
||||
summary += ` (${app.bundle_id})`;
|
||||
}
|
||||
summary += ` - PID: ${app.pid}`;
|
||||
if (app.is_active) {
|
||||
summary += " [ACTIVE]";
|
||||
}
|
||||
summary += ` - Windows: ${app.window_count}\n`;
|
||||
});
|
||||
}
|
||||
|
||||
// Add messages from Swift CLI if any
|
||||
if (swiftResponse.messages?.length) {
|
||||
summary += `\nMessages: ${swiftResponse.messages.join("; ")}`;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: summary,
|
||||
},
|
||||
],
|
||||
application_list: apps,
|
||||
};
|
||||
}
|
||||
|
||||
function handleWindowsList(
|
||||
data: WindowListData,
|
||||
_input: ListToolInput,
|
||||
swiftResponse: SwiftCliResponse
|
||||
): ToolResponse & {
|
||||
window_list?: WindowInfo[];
|
||||
target_application_info?: TargetApplicationInfo;
|
||||
} {
|
||||
const windows = data.windows || [];
|
||||
const appInfo = data.target_application_info;
|
||||
|
||||
// Validate required fields
|
||||
if (!appInfo) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: "List operation failed: Invalid response from list utility (missing application info).",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
_meta: {
|
||||
backend_error_code: "INVALID_RESPONSE_MISSING_APP_INFO",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let summary = `Found ${windows.length} window${windows.length !== 1 ? "s" : ""} for application: ${appInfo.app_name}`;
|
||||
|
||||
if (appInfo.bundle_id) {
|
||||
summary += ` (${appInfo.bundle_id})`;
|
||||
}
|
||||
summary += ` - PID: ${appInfo.pid}`;
|
||||
|
||||
if (windows.length > 0) {
|
||||
summary += "\n\nWindows:\n";
|
||||
windows.forEach((window, index) => {
|
||||
summary += `${index + 1}. "${window.window_title}"`;
|
||||
|
||||
if (window.window_id !== undefined) {
|
||||
summary += ` [ID: ${window.window_id}]`;
|
||||
}
|
||||
|
||||
if (window.is_on_screen !== undefined) {
|
||||
summary += window.is_on_screen ? " [ON-SCREEN]" : " [OFF-SCREEN]";
|
||||
}
|
||||
|
||||
if (window.bounds) {
|
||||
summary += ` [${window.bounds.x},${window.bounds.y} ${window.bounds.width}×${window.bounds.height}]`;
|
||||
}
|
||||
|
||||
summary += "\n";
|
||||
});
|
||||
}
|
||||
|
||||
// Add messages from Swift CLI if any
|
||||
if (swiftResponse.messages?.length) {
|
||||
summary += `\nMessages: ${swiftResponse.messages.join("; ")}`;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: summary,
|
||||
},
|
||||
],
|
||||
window_list: windows,
|
||||
target_application_info: appInfo,
|
||||
};
|
||||
}
|
||||
@ -1,283 +0,0 @@
|
||||
import type { Logger } from "pino";
|
||||
import { z } from "zod";
|
||||
import type { Menu, MenuErrorResponse, MenuItem, MenuSuccessResponse, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Zod schema for menu tool
|
||||
export const menuToolSchema = z.object({
|
||||
action: z
|
||||
.enum(["list", "click", "click-extra", "list-all"])
|
||||
.describe(
|
||||
"Action to perform: 'list' to discover menus, 'click' to interact with menu items, 'click-extra' for system menu extras, 'list-all' for all menus"
|
||||
),
|
||||
app: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Target application name, bundle ID, or process ID (required for list and click actions)"),
|
||||
item: z.string().optional().describe("Simple menu item to click (for non-nested items)"),
|
||||
path: z.string().optional().describe("Menu path for nested items (e.g., 'File > Save As...' or 'Edit > Copy')"),
|
||||
title: z.string().optional().describe("Title of system menu extra (for click-extra action)"),
|
||||
});
|
||||
|
||||
export type MenuInput = z.infer<typeof menuToolSchema>;
|
||||
|
||||
export async function menuToolHandler(input: MenuInput, context: { logger: Logger }): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Menu tool called");
|
||||
|
||||
// Validate input based on action
|
||||
if (input.action === "click") {
|
||||
if (!input.item && !input.path) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Click action requires either 'item' or 'path' parameter",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
if (input.item && input.path) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Click action cannot have both 'item' and 'path' parameters",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
if (!input.app) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Click action requires 'app' parameter",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (input.action === "list" && !input.app) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ List action requires 'app' parameter",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (input.action === "click-extra" && !input.title) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Click-extra action requires 'title' parameter for the menu extra",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Build command arguments
|
||||
const args = ["menu", input.action];
|
||||
|
||||
if (input.app) {
|
||||
args.push("--app", input.app);
|
||||
}
|
||||
|
||||
if (input.item) {
|
||||
args.push("--item", input.item);
|
||||
}
|
||||
|
||||
if (input.path) {
|
||||
args.push("--path", input.path);
|
||||
}
|
||||
|
||||
if (input.title) {
|
||||
args.push("--title", input.title);
|
||||
}
|
||||
|
||||
logger.debug({ args }, "Executing menu command");
|
||||
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
logger.debug({ result }, "Menu command completed");
|
||||
|
||||
// Handle Swift CLI response
|
||||
if (!result.success) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Menu command failed: ${result.error?.message || "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse the response data
|
||||
let responseData = result.data;
|
||||
if (typeof result.data === "string") {
|
||||
try {
|
||||
responseData = JSON.parse(result.data);
|
||||
} catch (parseError) {
|
||||
logger.warn({ parseError, data: result.data }, "Failed to parse menu command JSON output");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Menu ${input.action} completed. Output: ${result.data}`,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Handle error responses first
|
||||
if (responseData && typeof responseData === "object" && "error" in responseData) {
|
||||
const errorResponse = responseData as MenuErrorResponse;
|
||||
const errorMessage = errorResponse.error.message || "Menu command failed";
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Menu Error: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Handle successful menu command
|
||||
if (responseData && typeof responseData === "object" && "success" in responseData) {
|
||||
const menuResponse = responseData as MenuSuccessResponse | MenuErrorResponse;
|
||||
|
||||
if (menuResponse.success && "data" in menuResponse && menuResponse.data) {
|
||||
const menuData = menuResponse.data;
|
||||
let responseText = "";
|
||||
|
||||
if (input.action === "list") {
|
||||
responseText = `✅ Menu structure for ${input.app}:\n\n`;
|
||||
|
||||
if (menuData.menus && Array.isArray(menuData.menus)) {
|
||||
menuData.menus.forEach((menu: Menu) => {
|
||||
responseText += `**${menu.title || menu.name}**\n`;
|
||||
if (menu.items && Array.isArray(menu.items)) {
|
||||
menu.items.forEach((item: MenuItem) => {
|
||||
const itemName = item.title || item.name || "Unnamed Item";
|
||||
const separator = item.separator ? " (separator)" : "";
|
||||
const enabled = item.enabled === false ? " (disabled)" : "";
|
||||
responseText += ` • ${itemName}${separator}${enabled}\n`;
|
||||
});
|
||||
}
|
||||
responseText += "\n";
|
||||
});
|
||||
} else if (menuData.menu_bar && Array.isArray(menuData.menu_bar)) {
|
||||
// Alternative format
|
||||
menuData.menu_bar.forEach((menu: Menu) => {
|
||||
responseText += `**${menu.title}**\n`;
|
||||
if (menu.items) {
|
||||
menu.items.forEach((item: MenuItem) => {
|
||||
responseText += ` • ${item.title || item.name}\n`;
|
||||
});
|
||||
}
|
||||
responseText += "\n";
|
||||
});
|
||||
} else {
|
||||
responseText += "Menu structure data available but in unexpected format.";
|
||||
}
|
||||
} else if (input.action === "click") {
|
||||
const clickedItem = input.path || input.item || "menu item";
|
||||
responseText = `✅ Successfully clicked menu item: ${clickedItem}`;
|
||||
if (menuData.message) {
|
||||
responseText += `\n${menuData.message}`;
|
||||
}
|
||||
} else if (input.action === "click-extra") {
|
||||
responseText = `✅ Successfully clicked menu extra: ${input.title}`;
|
||||
if (menuData.message) {
|
||||
responseText += `\n${menuData.message}`;
|
||||
}
|
||||
} else if (input.action === "list-all") {
|
||||
responseText = `✅ All menus listed:\n\n`;
|
||||
// Similar structure to list, but for all applications
|
||||
if (menuData.menus && Array.isArray(menuData.menus)) {
|
||||
menuData.menus.forEach((menu: Menu) => {
|
||||
responseText += `**${menu.title || menu.name}**\n`;
|
||||
if (menu.items && Array.isArray(menu.items)) {
|
||||
menu.items.forEach((item: MenuItem) => {
|
||||
const itemName = item.title || item.name || "Unnamed Item";
|
||||
const separator = item.separator ? " (separator)" : "";
|
||||
const enabled = item.enabled === false ? " (disabled)" : "";
|
||||
responseText += ` • ${itemName}${separator}${enabled}\n`;
|
||||
});
|
||||
}
|
||||
responseText += "\n";
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
|
||||
// Handle menu command errors within wrapped response
|
||||
if (!menuResponse.success) {
|
||||
const errorResponse = menuResponse as MenuErrorResponse;
|
||||
const errorMessage = errorResponse.error?.message || "Menu command failed";
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Menu Error: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for unexpected response format
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Menu ${input.action} completed with unexpected response format: ${JSON.stringify(responseData)}`,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error, input }, "Menu tool execution failed");
|
||||
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Menu ${input.action} failed: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,126 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Schema for move tool
|
||||
export const moveToolSchema = z
|
||||
.object({
|
||||
coordinates: z
|
||||
.string()
|
||||
.regex(/^\d+,\d+$/, "Coordinates must be in format 'x,y'")
|
||||
.optional(),
|
||||
to: z.string().optional(),
|
||||
id: z.string().optional(),
|
||||
center: z.boolean().optional(),
|
||||
smooth: z.boolean().optional(),
|
||||
duration: z.number().int().positive().optional(),
|
||||
steps: z.number().int().positive().optional(),
|
||||
session: z.string().optional(),
|
||||
})
|
||||
.strict()
|
||||
.refine(
|
||||
(data) => {
|
||||
// At least one target must be specified
|
||||
return data.coordinates || data.to || data.id || data.center;
|
||||
},
|
||||
{
|
||||
message: "Must specify either coordinates, to, id, or center",
|
||||
}
|
||||
);
|
||||
|
||||
export type MoveInput = z.infer<typeof moveToolSchema>;
|
||||
|
||||
interface MoveOutput {
|
||||
action: string;
|
||||
position: {
|
||||
x: number;
|
||||
y: number;
|
||||
};
|
||||
target?: string;
|
||||
duration?: number;
|
||||
}
|
||||
|
||||
export async function moveToolHandler(args: MoveInput, context: ToolContext): Promise<ToolResponse> {
|
||||
context.logger.debug("Moving mouse cursor", { args });
|
||||
|
||||
try {
|
||||
const commandArgs = ["move"];
|
||||
|
||||
// Add position arguments
|
||||
if (args.coordinates) {
|
||||
commandArgs.push(args.coordinates);
|
||||
}
|
||||
if (args.to) {
|
||||
commandArgs.push("--to", args.to);
|
||||
}
|
||||
if (args.id) {
|
||||
commandArgs.push("--id", args.id);
|
||||
}
|
||||
if (args.center) {
|
||||
commandArgs.push("--center");
|
||||
}
|
||||
|
||||
// Add movement options
|
||||
if (args.smooth) {
|
||||
commandArgs.push("--smooth");
|
||||
}
|
||||
if (args.duration !== undefined) {
|
||||
commandArgs.push("--duration", args.duration.toString());
|
||||
}
|
||||
if (args.steps !== undefined) {
|
||||
commandArgs.push("--steps", args.steps.toString());
|
||||
}
|
||||
if (args.session) {
|
||||
commandArgs.push("--session", args.session);
|
||||
}
|
||||
|
||||
// Always use JSON output
|
||||
commandArgs.push("--json-output");
|
||||
|
||||
// Execute move command
|
||||
const result = await executeSwiftCli(
|
||||
commandArgs,
|
||||
context.logger,
|
||||
{ timeout: 10000 } // Longer timeout for smooth movements
|
||||
);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
throw new Error(result.error?.message || "Failed to move cursor");
|
||||
}
|
||||
|
||||
const moveData = result.data as MoveOutput;
|
||||
|
||||
// Format the response
|
||||
let responseText = `Moved cursor to (${moveData.position.x}, ${moveData.position.y})`;
|
||||
if (moveData.target) {
|
||||
responseText += ` on ${moveData.target}`;
|
||||
}
|
||||
if (args.smooth && moveData.duration) {
|
||||
responseText += ` over ${moveData.duration}ms`;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
position: moveData.position,
|
||||
target: moveData.target,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
context.logger.error("Failed to move cursor", { error });
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to move cursor: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,63 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Schema for permissions tool
|
||||
export const permissionsToolSchema = z.object({}).strict();
|
||||
|
||||
export type PermissionsInput = z.infer<typeof permissionsToolSchema>;
|
||||
|
||||
interface PermissionsOutput {
|
||||
screen_recording: boolean;
|
||||
accessibility: boolean;
|
||||
screen_recording_message?: string;
|
||||
accessibility_message?: string;
|
||||
}
|
||||
|
||||
export async function permissionsToolHandler(_args: PermissionsInput, context: ToolContext): Promise<ToolResponse> {
|
||||
context.logger.debug("Checking macOS permissions");
|
||||
|
||||
try {
|
||||
// Execute permissions command with JSON output
|
||||
const result = await executeSwiftCli(["permissions", "--json-output"], context.logger, { timeout: 5000 });
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
throw new Error(result.error?.message || "Failed to get permissions");
|
||||
}
|
||||
|
||||
const permissionsData = result.data as PermissionsOutput;
|
||||
|
||||
// Format the response
|
||||
const statusText = [
|
||||
`Screen Recording: ${permissionsData.screen_recording ? "✅ Granted" : "❌ Not granted"}`,
|
||||
permissionsData.screen_recording_message || "",
|
||||
`Accessibility: ${permissionsData.accessibility ? "✅ Granted" : "❌ Not granted"}`,
|
||||
permissionsData.accessibility_message || "",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: statusText,
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
permissions: permissionsData,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
context.logger.error("Failed to check permissions", { error });
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to check permissions: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,181 +0,0 @@
|
||||
import * as fs from "fs/promises";
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const runToolSchema = z
|
||||
.object({
|
||||
script_path: z.string().describe("Path to .peekaboo.json script file containing automation commands."),
|
||||
output: z.string().optional().describe("Optional. Save results to file instead of stdout."),
|
||||
no_fail_fast: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe("Optional. Continue execution even if a step fails. Default: false."),
|
||||
verbose: z.boolean().optional().default(false).describe("Optional. Show detailed step execution. Default: false."),
|
||||
})
|
||||
.describe(
|
||||
"Runs a batch script of Peekaboo commands from a .peekaboo.json file. " +
|
||||
"Scripts can automate complex UI workflows by chaining see, click, type, and other commands. " +
|
||||
"Each command in the script runs sequentially."
|
||||
);
|
||||
|
||||
interface RunResult {
|
||||
success: boolean;
|
||||
scriptPath: string;
|
||||
description?: string;
|
||||
totalSteps: number;
|
||||
completedSteps: number;
|
||||
failedSteps: number;
|
||||
executionTime: number;
|
||||
steps: Array<{
|
||||
stepNumber: number;
|
||||
command: string;
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
interface PeekabooScript {
|
||||
name?: string;
|
||||
description?: string;
|
||||
commands: Array<{
|
||||
command: string;
|
||||
args?: string[];
|
||||
comment?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
export type RunInput = z.infer<typeof runToolSchema>;
|
||||
|
||||
export async function runToolHandler(input: RunInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.run tool call");
|
||||
|
||||
// Validate script file exists and is readable
|
||||
try {
|
||||
const scriptContent = await fs.readFile(input.script_path, "utf-8");
|
||||
const script: PeekabooScript = JSON.parse(scriptContent);
|
||||
|
||||
if (!script.commands || !Array.isArray(script.commands)) {
|
||||
throw new Error("Script must contain a 'commands' array");
|
||||
}
|
||||
|
||||
logger.info(
|
||||
{
|
||||
scriptName: script.name,
|
||||
commandCount: script.commands.length,
|
||||
},
|
||||
"Loaded Peekaboo script"
|
||||
);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to load script: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Build command arguments
|
||||
const args = ["run", input.script_path];
|
||||
|
||||
// Output file
|
||||
if (input.output) {
|
||||
args.push("--output", input.output);
|
||||
}
|
||||
|
||||
// No fail fast flag
|
||||
if (input.no_fail_fast) {
|
||||
args.push("--no-fail-fast");
|
||||
}
|
||||
|
||||
// Verbose flag
|
||||
if (input.verbose) {
|
||||
args.push("--verbose");
|
||||
}
|
||||
|
||||
// Always request JSON output for parsing
|
||||
args.push("--json-output");
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.data) {
|
||||
const errorMessage = result.error?.message || "Run command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to execute script: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const runData = result.data as RunResult;
|
||||
|
||||
// Build response text
|
||||
const lines: string[] = [];
|
||||
|
||||
if (runData.success) {
|
||||
lines.push("✅ Script executed successfully");
|
||||
} else {
|
||||
lines.push("❌ Script execution failed");
|
||||
}
|
||||
|
||||
lines.push(`📄 Script: ${runData.scriptPath}`);
|
||||
if (runData.description) {
|
||||
lines.push(`📝 Description: ${runData.description}`);
|
||||
}
|
||||
lines.push(`🔢 Total steps: ${runData.totalSteps}`);
|
||||
lines.push(`✅ Completed: ${runData.completedSteps}`);
|
||||
lines.push(`❌ Failed: ${runData.failedSteps}`);
|
||||
lines.push(`⏱️ Total time: ${runData.executionTime?.toFixed(2) || "0.00"}s`);
|
||||
|
||||
// Show failed steps
|
||||
const failedSteps = runData.steps.filter((step) => !step.success);
|
||||
if (failedSteps.length > 0) {
|
||||
lines.push("\n❌ Failed steps:");
|
||||
failedSteps.forEach((step) => {
|
||||
lines.push(` - Step ${step.stepNumber} (${step.command}): ${step.error || "Unknown error"}`);
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: lines.join("\n"),
|
||||
},
|
||||
],
|
||||
_meta: {
|
||||
script_path: runData.scriptPath,
|
||||
completed_steps: runData.completedSteps,
|
||||
total_steps: runData.totalSteps,
|
||||
success: runData.success,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Run tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,136 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const scrollToolSchema = z
|
||||
.object({
|
||||
direction: z
|
||||
.enum(["up", "down", "left", "right"])
|
||||
.describe("Scroll direction: up (content moves up), down (content moves down), left, or right."),
|
||||
amount: z.number().optional().default(3).describe("Optional. Number of scroll ticks/lines. Default: 3."),
|
||||
on: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
"Optional. Element ID to scroll on (from see command). If not specified, scrolls at current mouse position."
|
||||
),
|
||||
session: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional. Session ID from see command. Uses latest session if not specified."),
|
||||
delay: z
|
||||
.number()
|
||||
.optional()
|
||||
.default(2)
|
||||
.describe("Optional. Delay between scroll ticks in milliseconds. Default: 2."),
|
||||
smooth: z.boolean().optional().default(false).describe("Optional. Use smooth scrolling with smaller increments."),
|
||||
})
|
||||
.describe(
|
||||
"Scrolls the mouse wheel in any direction. " +
|
||||
"Can target specific elements or scroll at current mouse position. " +
|
||||
"Supports smooth scrolling and configurable speed."
|
||||
);
|
||||
|
||||
interface ScrollResult {
|
||||
success: boolean;
|
||||
direction: string;
|
||||
amount: number;
|
||||
location: {
|
||||
x: number;
|
||||
y: number;
|
||||
};
|
||||
total_ticks: number;
|
||||
execution_time: number;
|
||||
}
|
||||
|
||||
export type ScrollInput = z.infer<typeof scrollToolSchema>;
|
||||
|
||||
export async function scrollToolHandler(input: ScrollInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.scroll tool call");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["scroll"];
|
||||
|
||||
// Direction
|
||||
args.push("--direction", input.direction);
|
||||
|
||||
// Amount
|
||||
const amount = input.amount ?? 3;
|
||||
args.push("--amount", amount.toString());
|
||||
|
||||
// Target element
|
||||
if (input.on) {
|
||||
args.push("--on", input.on);
|
||||
}
|
||||
|
||||
// Session
|
||||
if (input.session) {
|
||||
args.push("--session", input.session);
|
||||
}
|
||||
|
||||
// Delay between ticks
|
||||
const delay = input.delay ?? 2;
|
||||
args.push("--delay", delay.toString());
|
||||
|
||||
// Smooth scrolling
|
||||
if (input.smooth) {
|
||||
args.push("--smooth");
|
||||
}
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
const errorMessage = result.error?.message || "Scroll command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to perform scroll: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const scrollData = result.data as ScrollResult;
|
||||
|
||||
// Build response text
|
||||
const lines: string[] = [];
|
||||
lines.push("✅ Scroll completed");
|
||||
lines.push(`🎯 Direction: ${scrollData.direction}`);
|
||||
lines.push(`📊 Amount: ${scrollData.amount} ticks`);
|
||||
|
||||
if (input.on) {
|
||||
lines.push(`📍 Location: (${Math.round(scrollData.location.x)}, ${Math.round(scrollData.location.y)})`);
|
||||
}
|
||||
|
||||
lines.push(`⏱️ Completed in ${scrollData.execution_time.toFixed(2)}s`);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: lines.join("\n"),
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Scroll tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,288 +0,0 @@
|
||||
import * as fs from "fs/promises";
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import { z } from "zod";
|
||||
import type { SeeResponseData, ToolContext, ToolResponse, UIElement } from "../types/index.js";
|
||||
import { executeSwiftCli, readImageAsBase64 } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const seeToolSchema = z
|
||||
.object({
|
||||
app_target: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
"Optional. Specifies the capture target (same as image tool).\n" +
|
||||
"For example:\n" +
|
||||
"Omit or use an empty string (e.g., `''`) for all screens.\n" +
|
||||
"Use `'screen:INDEX'` (e.g., `'screen:0'`) for a specific display.\n" +
|
||||
"Use `'frontmost'` for all windows of the current foreground application.\n" +
|
||||
"Use `'AppName'` (e.g., `'Safari'`) for all windows of that application.\n" +
|
||||
"Use `'PID:PROCESS_ID'` (e.g., `'PID:663'`) to target a specific process by its PID."
|
||||
),
|
||||
path: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional. Path to save the screenshot. If not provided, uses a temporary file."),
|
||||
session: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional. Session ID for UI automation state tracking. Creates new session if not provided."),
|
||||
annotate: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe("Optional. If true, generates an annotated screenshot with interaction markers and IDs."),
|
||||
})
|
||||
.describe(
|
||||
"Captures a screenshot and analyzes UI elements for automation. " +
|
||||
"Returns UI element map with Peekaboo IDs (B1 for buttons, T1 for text fields, etc.) " +
|
||||
"that can be used with click, type, and other interaction commands. " +
|
||||
"Creates or updates a session for tracking UI state."
|
||||
);
|
||||
|
||||
interface UIElementResult {
|
||||
id: string;
|
||||
role: string;
|
||||
title?: string;
|
||||
label?: string;
|
||||
value?: string;
|
||||
bounds: {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
is_actionable: boolean;
|
||||
}
|
||||
|
||||
interface SeeResult {
|
||||
screenshot_path: string;
|
||||
session_id: string;
|
||||
ui_elements: UIElementResult[];
|
||||
application?: string;
|
||||
window?: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export type SeeInput = z.infer<typeof seeToolSchema>;
|
||||
|
||||
export async function seeToolHandler(input: SeeInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.see tool call");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["see"];
|
||||
|
||||
if (input.app_target) {
|
||||
// Parse app_target similar to image tool
|
||||
const [targetType, ...targetParts] = input.app_target.split(":");
|
||||
|
||||
if (targetType === "screen" && targetParts.length > 0) {
|
||||
args.push("--mode", "screen", "--screen-index", targetParts[0]);
|
||||
} else if (targetType === "frontmost") {
|
||||
args.push("--mode", "frontmost");
|
||||
} else if (targetType.startsWith("PID") && targetParts.length > 0) {
|
||||
args.push("--app", `PID:${targetParts[0]}`);
|
||||
} else if (targetParts.length === 0) {
|
||||
args.push("--app", targetType);
|
||||
} else if (targetParts[0] === "WINDOW_TITLE" && targetParts.length > 1) {
|
||||
args.push("--app", targetType, "--window-title", targetParts.slice(1).join(":"));
|
||||
} else if (targetParts[0] === "WINDOW_INDEX" && targetParts.length > 1) {
|
||||
args.push("--app", targetType, "--window-index", targetParts[1]);
|
||||
}
|
||||
}
|
||||
|
||||
// Output path
|
||||
const outputPath = input.path || path.join(os.tmpdir(), `peekaboo-see-${Date.now()}.png`);
|
||||
args.push("--path", outputPath);
|
||||
|
||||
// Session management
|
||||
if (input.session) {
|
||||
args.push("--session", input.session);
|
||||
}
|
||||
|
||||
// Annotation
|
||||
if (input.annotate) {
|
||||
args.push("--annotate");
|
||||
}
|
||||
|
||||
// Add JSON output flag to get structured data
|
||||
args.push("--json-output");
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
const errorMessage = result.error?.message || "See command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to capture UI state: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// The CLI returns data in a different format than expected
|
||||
const cliData = result.data as SeeResponseData;
|
||||
|
||||
// Read the UI map from the file
|
||||
let uiElements: UIElementResult[] = [];
|
||||
if (cliData.ui_map && typeof cliData.ui_map === "string") {
|
||||
try {
|
||||
const mapFileContent = await fs.readFile(cliData.ui_map, "utf-8");
|
||||
const mapData = JSON.parse(mapFileContent);
|
||||
|
||||
// Transform the UI map to the expected format
|
||||
if (mapData.uiMap) {
|
||||
uiElements = Object.entries(mapData.uiMap).map(([key, elem]) => {
|
||||
const element = elem as UIElement;
|
||||
return {
|
||||
id: element.id || key,
|
||||
role: element.role || "unknown",
|
||||
title: element.title,
|
||||
label: element.label,
|
||||
value: element.value,
|
||||
bounds: element.frame
|
||||
? {
|
||||
x: element.frame[0][0],
|
||||
y: element.frame[0][1],
|
||||
width: element.frame[1][0],
|
||||
height: element.frame[1][1],
|
||||
}
|
||||
: { x: 0, y: 0, width: 0, height: 0 },
|
||||
is_actionable: element.isActionable || false,
|
||||
};
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn({ error: err }, "Failed to read UI map file");
|
||||
}
|
||||
}
|
||||
|
||||
// Build the SeeResult in the expected format
|
||||
const seeData: SeeResult = {
|
||||
screenshot_path: cliData.screenshot_annotated || cliData.screenshot_raw || cliData.screenshot || outputPath,
|
||||
session_id: cliData.session_id || cliData.session || "unknown",
|
||||
ui_elements: uiElements,
|
||||
application: cliData.application_name,
|
||||
window: cliData.window_title,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
// Build response
|
||||
const responseContent: Array<{ type: "text" | "image"; text?: string; data?: string; mimeType?: string }> = [];
|
||||
|
||||
// Add text summary
|
||||
const summary = buildSeeSummary(seeData);
|
||||
responseContent.push({
|
||||
type: "text",
|
||||
text: summary,
|
||||
});
|
||||
|
||||
// If annotated, include the screenshot as base64
|
||||
if (input.annotate && seeData.screenshot_path) {
|
||||
try {
|
||||
const base64Data = await readImageAsBase64(seeData.screenshot_path);
|
||||
responseContent.push({
|
||||
type: "image",
|
||||
data: base64Data,
|
||||
mimeType: "image/png",
|
||||
});
|
||||
} catch (err) {
|
||||
logger.warn({ error: err }, "Failed to read annotated screenshot");
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: responseContent,
|
||||
_meta: {
|
||||
session_id: seeData.session_id,
|
||||
element_count: seeData.ui_elements.length,
|
||||
actionable_count: seeData.ui_elements.filter((el) => el.is_actionable).length,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "See tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function buildSeeSummary(data: SeeResult): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push("📸 UI State Captured");
|
||||
lines.push(`Session ID: ${data.session_id}`);
|
||||
|
||||
if (data.application) {
|
||||
lines.push(`Application: ${data.application}`);
|
||||
}
|
||||
if (data.window) {
|
||||
lines.push(`Window: ${data.window}`);
|
||||
}
|
||||
|
||||
lines.push(`Screenshot: ${data.screenshot_path}`);
|
||||
lines.push(`Elements found: ${data.ui_elements.length}`);
|
||||
|
||||
// Group elements by type
|
||||
const elementsByRole = new Map<string, typeof data.ui_elements>();
|
||||
for (const elem of data.ui_elements) {
|
||||
const roleElems = elementsByRole.get(elem.role) || [];
|
||||
roleElems.push(elem);
|
||||
elementsByRole.set(elem.role, roleElems);
|
||||
}
|
||||
|
||||
lines.push("\nUI Elements:");
|
||||
|
||||
// Sort roles for consistent output
|
||||
const sortedRoles = Array.from(elementsByRole.keys()).sort();
|
||||
|
||||
for (const role of sortedRoles) {
|
||||
const elements = elementsByRole.get(role);
|
||||
if (!elements) {
|
||||
continue;
|
||||
}
|
||||
const actionableCount = elements.filter((el) => el.is_actionable).length;
|
||||
|
||||
lines.push(`\n${role} (${elements.length} found, ${actionableCount} actionable):`);
|
||||
|
||||
for (const elem of elements) {
|
||||
const parts = [` ${elem.id}`];
|
||||
|
||||
if (elem.title) {
|
||||
parts.push(`"${elem.title}"`);
|
||||
} else if (elem.label) {
|
||||
parts.push(`"${elem.label}"`);
|
||||
} else if (elem.value) {
|
||||
parts.push(`value: "${elem.value}"`);
|
||||
}
|
||||
|
||||
parts.push(`at (${Math.round(elem.bounds.x)}, ${Math.round(elem.bounds.y)})`);
|
||||
|
||||
if (!elem.is_actionable) {
|
||||
parts.push("[not actionable]");
|
||||
}
|
||||
|
||||
lines.push(parts.join(" - "));
|
||||
}
|
||||
}
|
||||
|
||||
lines.push("\nUse element IDs (B1, T1, etc.) with click, type, and other interaction commands.");
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
@ -1,85 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const sleepToolSchema = z
|
||||
.object({
|
||||
duration: z
|
||||
.preprocess((val) => {
|
||||
// Convert string to number if possible
|
||||
if (typeof val === "string") {
|
||||
const num = Number.parseFloat(val);
|
||||
return Number.isNaN(num) ? val : num;
|
||||
}
|
||||
return val;
|
||||
}, z.number().min(0))
|
||||
.describe("Sleep duration in milliseconds."),
|
||||
})
|
||||
.describe(
|
||||
"Pauses execution for a specified duration. " +
|
||||
"Useful for waiting between UI actions, allowing animations to complete, " +
|
||||
"or pacing automated workflows."
|
||||
);
|
||||
|
||||
interface SleepResult {
|
||||
success: boolean;
|
||||
requested_duration: number;
|
||||
actual_duration: number;
|
||||
}
|
||||
|
||||
export type SleepInput = z.infer<typeof sleepToolSchema>;
|
||||
|
||||
export async function sleepToolHandler(input: SleepInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.sleep tool call");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["sleep", input.duration.toString()];
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
const errorMessage = result.error?.message || "Sleep command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to sleep: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const sleepData = result.data as SleepResult;
|
||||
|
||||
// Build response text
|
||||
const durationSeconds = sleepData.actual_duration / 1000;
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `⏸️ Paused for ${durationSeconds.toFixed(1)}s`,
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Sleep tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,192 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Schema for space tool - includes follow option
|
||||
export const spaceToolSchema = z
|
||||
.object({
|
||||
action: z.enum(["list", "switch", "move-window"]),
|
||||
to: z.number().int().positive().optional(),
|
||||
to_current: z.boolean().optional(),
|
||||
app: z.string().optional(),
|
||||
window_title: z.string().optional(),
|
||||
window_index: z.number().int().optional(),
|
||||
detailed: z.boolean().optional(),
|
||||
follow: z.boolean().optional(), // Added missing option
|
||||
})
|
||||
.strict()
|
||||
.refine(
|
||||
(data) => {
|
||||
// switch requires 'to'
|
||||
if (data.action === "switch" && !data.to) {
|
||||
return false;
|
||||
}
|
||||
// move-window requires app and either 'to' or 'to_current'
|
||||
if (data.action === "move-window") {
|
||||
if (!data.app) {
|
||||
return false;
|
||||
}
|
||||
if (!data.to && !data.to_current) {
|
||||
return false;
|
||||
}
|
||||
if (data.to && data.to_current) {
|
||||
return false;
|
||||
} // Can't have both
|
||||
}
|
||||
// follow only valid with move-window
|
||||
if (data.follow && data.action !== "move-window") {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
{
|
||||
message: "Invalid combination of action and parameters",
|
||||
}
|
||||
);
|
||||
|
||||
export type SpaceInput = z.infer<typeof spaceToolSchema>;
|
||||
|
||||
interface SpaceInfo {
|
||||
id: number;
|
||||
type: string;
|
||||
is_active: boolean;
|
||||
display_id?: number;
|
||||
}
|
||||
|
||||
interface SpaceListOutput {
|
||||
spaces: SpaceInfo[];
|
||||
}
|
||||
|
||||
interface SpaceActionOutput {
|
||||
action: string;
|
||||
space?: number;
|
||||
app?: string;
|
||||
window?: string;
|
||||
result: string;
|
||||
}
|
||||
|
||||
export async function spaceToolHandler(args: SpaceInput, context: ToolContext): Promise<ToolResponse> {
|
||||
context.logger.debug("Performing space operation", { args });
|
||||
|
||||
try {
|
||||
const commandArgs = ["space", args.action];
|
||||
|
||||
// Add action-specific parameters
|
||||
switch (args.action) {
|
||||
case "list":
|
||||
if (args.detailed) {
|
||||
commandArgs.push("--detailed");
|
||||
}
|
||||
break;
|
||||
case "switch":
|
||||
if (args.to) {
|
||||
commandArgs.push("--to", args.to.toString());
|
||||
}
|
||||
break;
|
||||
case "move-window":
|
||||
if (args.app) {
|
||||
commandArgs.push("--app", args.app);
|
||||
}
|
||||
if (args.to) {
|
||||
commandArgs.push("--to", args.to.toString());
|
||||
} else if (args.to_current) {
|
||||
commandArgs.push("--to-current");
|
||||
}
|
||||
if (args.window_title) {
|
||||
commandArgs.push("--window-title", args.window_title);
|
||||
}
|
||||
if (args.window_index !== undefined) {
|
||||
commandArgs.push("--window-index", args.window_index.toString());
|
||||
}
|
||||
if (args.follow) {
|
||||
commandArgs.push("--follow");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Always use JSON output
|
||||
commandArgs.push("--json-output");
|
||||
|
||||
// Execute space command
|
||||
const result = await executeSwiftCli(commandArgs, context.logger, { timeout: 10000 });
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
throw new Error(result.error?.message || "Failed to perform space operation");
|
||||
}
|
||||
|
||||
// Parse the JSON output
|
||||
if (args.action === "list") {
|
||||
const listData = result.data as SpaceListOutput;
|
||||
|
||||
// Format the list response
|
||||
const spacesList = listData.spaces
|
||||
.map((space, index) => {
|
||||
const marker = space.is_active ? "→" : " ";
|
||||
let spaceText = `${marker} Space ${index + 1} [ID: ${space.id}, Type: ${space.type}`;
|
||||
if (space.display_id !== undefined) {
|
||||
spaceText += `, Display ${space.display_id}`;
|
||||
}
|
||||
spaceText += "]";
|
||||
return spaceText;
|
||||
})
|
||||
.join("\n");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Spaces:\n${spacesList}`,
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
spaces: listData.spaces,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
const actionData = result.data as SpaceActionOutput;
|
||||
|
||||
// Format action response
|
||||
let responseText = "";
|
||||
switch (args.action) {
|
||||
case "switch":
|
||||
responseText = `✓ Switched to Space ${actionData.space || args.to}`;
|
||||
break;
|
||||
case "move-window":
|
||||
responseText = `✓ Moved ${actionData.app || args.app}`;
|
||||
if (actionData.window) {
|
||||
responseText += ` window "${actionData.window}"`;
|
||||
}
|
||||
if (args.to_current) {
|
||||
responseText += " to current Space";
|
||||
} else {
|
||||
responseText += ` to Space ${actionData.space || args.to}`;
|
||||
}
|
||||
if (args.follow) {
|
||||
responseText += " (and switched to it)";
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
metadata: actionData,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
context.logger.error("Failed to perform space operation", { error });
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to perform space operation: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,114 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const swipeToolSchema = z
|
||||
.object({
|
||||
from: z.string().describe("Starting coordinates in format 'x,y' (e.g., '100,200')."),
|
||||
to: z.string().describe("Ending coordinates in format 'x,y' (e.g., '300,400')."),
|
||||
duration: z
|
||||
.number()
|
||||
.optional()
|
||||
.default(500)
|
||||
.describe("Optional. Duration of the swipe in milliseconds. Default: 500."),
|
||||
steps: z
|
||||
.number()
|
||||
.optional()
|
||||
.default(10)
|
||||
.describe("Optional. Number of intermediate steps for smooth movement. Default: 10."),
|
||||
})
|
||||
.describe(
|
||||
"Performs a swipe/drag gesture from one point to another. " +
|
||||
"Useful for dragging elements, swiping through content, or gesture-based interactions. " +
|
||||
"Creates smooth movement with configurable duration and steps."
|
||||
);
|
||||
|
||||
interface SwipeResult {
|
||||
success: boolean;
|
||||
start_location: {
|
||||
x: number;
|
||||
y: number;
|
||||
};
|
||||
end_location: {
|
||||
x: number;
|
||||
y: number;
|
||||
};
|
||||
distance: number;
|
||||
duration: number;
|
||||
execution_time: number;
|
||||
}
|
||||
|
||||
export type SwipeInput = z.infer<typeof swipeToolSchema>;
|
||||
|
||||
export async function swipeToolHandler(input: SwipeInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.swipe tool call");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["swipe"];
|
||||
|
||||
// From and to coordinates
|
||||
args.push("--from", input.from);
|
||||
args.push("--to", input.to);
|
||||
|
||||
// Duration
|
||||
const duration = input.duration ?? 500;
|
||||
args.push("--duration", duration.toString());
|
||||
|
||||
// Steps
|
||||
const steps = input.steps ?? 10;
|
||||
args.push("--steps", steps.toString());
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
const errorMessage = result.error?.message || "Swipe command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to perform swipe: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const swipeData = result.data as SwipeResult;
|
||||
|
||||
// Build response text
|
||||
const lines: string[] = [];
|
||||
lines.push("✅ Swipe completed");
|
||||
lines.push(`📍 From: (${Math.round(swipeData.start_location.x)}, ${Math.round(swipeData.start_location.y)})`);
|
||||
lines.push(`📍 To: (${Math.round(swipeData.end_location.x)}, ${Math.round(swipeData.end_location.y)})`);
|
||||
lines.push(`📏 Distance: ${Math.round(swipeData.distance)}px`);
|
||||
lines.push(`⏱️ Duration: ${swipeData.duration}ms`);
|
||||
lines.push(`⏱️ Completed in ${swipeData.execution_time.toFixed(2)}s`);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: lines.join("\n"),
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Swipe tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,149 +0,0 @@
|
||||
import { z } from "zod";
|
||||
import type { ToolContext, ToolResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
export const typeToolSchema = z
|
||||
.object({
|
||||
text: z.string().optional().describe("The text to type. If not specified, can use special key flags instead."),
|
||||
on: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional. Element ID to type into (from see command). If not specified, types at current focus."),
|
||||
session: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional. Session ID from see command. Uses latest session if not specified."),
|
||||
delay: z.number().optional().default(5).describe("Optional. Delay between keystrokes in milliseconds. Default: 5."),
|
||||
press_return: z.boolean().optional().default(false).describe("Optional. Press return/enter after typing."),
|
||||
tab: z.number().optional().describe("Optional. Press tab N times."),
|
||||
escape: z.boolean().optional().default(false).describe("Optional. Press escape key."),
|
||||
delete: z.boolean().optional().default(false).describe("Optional. Press delete/backspace key."),
|
||||
clear: z.boolean().optional().default(false).describe("Optional. Clear the field before typing (Cmd+A, Delete)."),
|
||||
})
|
||||
.describe(
|
||||
"Types text or sends special keys. " +
|
||||
"Can type text, press special keys, or combine both actions. " +
|
||||
"Types at current keyboard focus."
|
||||
);
|
||||
|
||||
interface TypeResult {
|
||||
success: boolean;
|
||||
text_typed?: string;
|
||||
keys_pressed: number;
|
||||
execution_time: number;
|
||||
}
|
||||
|
||||
export type TypeInput = z.infer<typeof typeToolSchema>;
|
||||
|
||||
export async function typeToolHandler(input: TypeInput, context: ToolContext): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Processing peekaboo.type tool call");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["type"];
|
||||
|
||||
// Add text if provided
|
||||
if (input.text) {
|
||||
args.push(input.text);
|
||||
}
|
||||
|
||||
// Session
|
||||
if (input.session) {
|
||||
args.push("--session", input.session);
|
||||
}
|
||||
|
||||
// Element target
|
||||
if (input.on) {
|
||||
args.push("--on", input.on);
|
||||
}
|
||||
|
||||
// Delay
|
||||
const delay = input.delay ?? 5;
|
||||
args.push("--delay", delay.toString());
|
||||
|
||||
// Press return flag
|
||||
if (input.press_return) {
|
||||
args.push("--press-return");
|
||||
}
|
||||
|
||||
// Tab count
|
||||
if (input.tab) {
|
||||
args.push("--tab", input.tab.toString());
|
||||
}
|
||||
|
||||
// Escape flag
|
||||
if (input.escape) {
|
||||
args.push("--escape");
|
||||
}
|
||||
|
||||
// Delete flag
|
||||
if (input.delete) {
|
||||
args.push("--delete");
|
||||
}
|
||||
|
||||
// Clear flag
|
||||
if (input.clear) {
|
||||
args.push("--clear");
|
||||
}
|
||||
|
||||
// Always request JSON output for parsing
|
||||
args.push("--json-output");
|
||||
|
||||
// Execute the command
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
if (!result.success || !result.data) {
|
||||
const errorMessage = result.error?.message || "Type command failed";
|
||||
logger.error({ result }, errorMessage);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Failed to type text: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
const typeData = result.data as TypeResult;
|
||||
|
||||
// Build response text
|
||||
const lines: string[] = [];
|
||||
lines.push("✅ Typing completed successfully");
|
||||
|
||||
if (typeData.text_typed) {
|
||||
// Show a preview of what was typed (truncate if too long)
|
||||
const preview =
|
||||
typeData.text_typed.length > 50 ? `${typeData.text_typed.substring(0, 47)}...` : typeData.text_typed;
|
||||
lines.push(`📝 Text: "${preview}"`);
|
||||
}
|
||||
|
||||
lines.push(`⌨️ Key presses: ${typeData.keys_pressed}`);
|
||||
lines.push(`⏱️ Completed in ${typeData.execution_time.toFixed(2)}s`);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: lines.join("\n"),
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error }, "Type tool execution failed");
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,255 +0,0 @@
|
||||
import type { Logger } from "pino";
|
||||
import { z } from "zod";
|
||||
import type { ToolResponse, WindowErrorResponse, WindowSuccessResponse } from "../types/index.js";
|
||||
import { executeSwiftCli } from "../utils/peekaboo-cli.js";
|
||||
|
||||
// Zod schema for window tool
|
||||
export const windowToolSchema = z.object({
|
||||
action: z
|
||||
.enum(["close", "minimize", "maximize", "move", "resize", "set-bounds", "focus"])
|
||||
.describe("The action to perform on the window"),
|
||||
app: z.string().optional().describe("Target application name, bundle ID, or process ID"),
|
||||
title: z.string().optional().describe("Window title to target (partial matching supported)"),
|
||||
index: z.number().int().nonnegative().optional().describe("Window index (0-based) for multi-window applications"),
|
||||
x: z.number().optional().describe("X coordinate for move or set-bounds action"),
|
||||
y: z.number().optional().describe("Y coordinate for move or set-bounds action"),
|
||||
width: z.number().optional().describe("Width for resize or set-bounds action"),
|
||||
height: z.number().optional().describe("Height for resize or set-bounds action"),
|
||||
});
|
||||
|
||||
export type WindowInput = z.infer<typeof windowToolSchema>;
|
||||
|
||||
export async function windowToolHandler(input: WindowInput, context: { logger: Logger }): Promise<ToolResponse> {
|
||||
const { logger } = context;
|
||||
|
||||
try {
|
||||
logger.debug({ input }, "Window tool called");
|
||||
|
||||
// Build command arguments
|
||||
const args = ["window", input.action];
|
||||
|
||||
if (input.app) {
|
||||
args.push("--app", input.app);
|
||||
}
|
||||
|
||||
if (input.title) {
|
||||
args.push("--window-title", input.title);
|
||||
}
|
||||
|
||||
if (input.index !== undefined) {
|
||||
args.push("--window-index", input.index.toString());
|
||||
}
|
||||
|
||||
// Add position/size arguments for move and resize actions
|
||||
if (input.action === "move") {
|
||||
if (input.x === undefined || input.y === undefined) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Move action requires both 'x' and 'y' coordinates",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
args.push("--x", input.x.toString(), "--y", input.y.toString());
|
||||
}
|
||||
|
||||
if (input.action === "resize") {
|
||||
if (input.width === undefined || input.height === undefined) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Resize action requires both 'width' and 'height' dimensions",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
args.push("--width", input.width.toString(), "--height", input.height.toString());
|
||||
}
|
||||
|
||||
if (input.action === "set-bounds") {
|
||||
if (input.x === undefined || input.y === undefined || input.width === undefined || input.height === undefined) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "❌ Set-bounds action requires all parameters: 'x', 'y', 'width', and 'height'",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
args.push(
|
||||
"--x",
|
||||
input.x.toString(),
|
||||
"--y",
|
||||
input.y.toString(),
|
||||
"--width",
|
||||
input.width.toString(),
|
||||
"--height",
|
||||
input.height.toString()
|
||||
);
|
||||
}
|
||||
|
||||
logger.debug({ args }, "Executing window command");
|
||||
|
||||
const result = await executeSwiftCli(args, logger);
|
||||
|
||||
logger.debug({ result }, "Window command completed");
|
||||
|
||||
// Handle Swift CLI response
|
||||
if (!result.success) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Window command failed: ${result.error?.message || "Unknown error"}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse the response data
|
||||
let responseData = result.data;
|
||||
if (typeof result.data === "string") {
|
||||
try {
|
||||
responseData = JSON.parse(result.data);
|
||||
} catch (parseError) {
|
||||
logger.warn({ parseError, data: result.data }, "Failed to parse window command JSON output");
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Window ${input.action} completed. Output: ${result.data}`,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Handle error responses first
|
||||
if (responseData && typeof responseData === "object" && "error" in responseData) {
|
||||
const errorResponse = responseData as WindowErrorResponse;
|
||||
const errorMessage = errorResponse.error.message || "Window command failed";
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Window Error: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Handle successful window command
|
||||
if (responseData && typeof responseData === "object" && "success" in responseData) {
|
||||
const windowResponse = responseData as WindowSuccessResponse | WindowErrorResponse;
|
||||
|
||||
if (windowResponse.success && "data" in windowResponse && windowResponse.data) {
|
||||
const windowData = windowResponse.data;
|
||||
let responseText = "";
|
||||
|
||||
// Format the response based on action
|
||||
const targetDesc = input.app
|
||||
? input.title
|
||||
? `'${input.title}' window of ${input.app}`
|
||||
: `${input.app} window`
|
||||
: "window";
|
||||
|
||||
switch (input.action) {
|
||||
case "close":
|
||||
responseText = `✅ Closed ${targetDesc}`;
|
||||
break;
|
||||
|
||||
case "minimize":
|
||||
responseText = `✅ Minimized ${targetDesc}`;
|
||||
break;
|
||||
|
||||
case "maximize":
|
||||
responseText = `✅ Maximized ${targetDesc}`;
|
||||
break;
|
||||
|
||||
case "move":
|
||||
responseText = `✅ Moved ${targetDesc} to (${input.x}, ${input.y})`;
|
||||
break;
|
||||
|
||||
case "resize":
|
||||
responseText = `✅ Resized ${targetDesc} to ${input.width}×${input.height}`;
|
||||
break;
|
||||
|
||||
case "set-bounds":
|
||||
responseText = `✅ Set bounds of ${targetDesc} to (${input.x}, ${input.y}) with size ${input.width}×${input.height}`;
|
||||
break;
|
||||
|
||||
case "focus":
|
||||
responseText = `✅ Focused ${targetDesc}`;
|
||||
break;
|
||||
|
||||
default:
|
||||
responseText = `✅ Window ${input.action} completed successfully`;
|
||||
}
|
||||
|
||||
if (windowData.message) {
|
||||
responseText += `\n${windowData.message}`;
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
}
|
||||
|
||||
// Handle window command errors within wrapped response
|
||||
if (!windowResponse.success) {
|
||||
const errorResponse = windowResponse as WindowErrorResponse;
|
||||
const errorMessage = errorResponse.error?.message || "Window command failed";
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Window Error: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for unexpected response format
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Window ${input.action} completed with unexpected response format: ${JSON.stringify(responseData)}`,
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error({ error, input }, "Window tool execution failed");
|
||||
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `❌ Window ${input.action} failed: ${errorMessage}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -1,429 +0,0 @@
|
||||
import type { Logger } from "pino";
|
||||
import { z } from "zod";
|
||||
|
||||
export interface SwiftCliResponse {
|
||||
success: boolean;
|
||||
data?: ApplicationListData | WindowListData | ImageCaptureData | ServerStatusData | unknown;
|
||||
messages?: string[];
|
||||
debug_logs?: string[];
|
||||
error?: {
|
||||
message: string;
|
||||
code: string;
|
||||
details?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface SavedFile {
|
||||
path: string;
|
||||
item_label?: string;
|
||||
window_title?: string;
|
||||
window_id?: number;
|
||||
window_index?: number;
|
||||
mime_type: string;
|
||||
}
|
||||
|
||||
export interface ApplicationInfo {
|
||||
app_name: string;
|
||||
bundle_id: string;
|
||||
pid: number;
|
||||
is_active: boolean;
|
||||
window_count: number;
|
||||
}
|
||||
|
||||
export interface WindowInfo {
|
||||
window_title: string;
|
||||
window_id?: number;
|
||||
window_index?: number;
|
||||
bounds?: {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
is_on_screen?: boolean;
|
||||
}
|
||||
|
||||
export interface TargetApplicationInfo {
|
||||
app_name: string;
|
||||
bundle_id?: string;
|
||||
pid: number;
|
||||
}
|
||||
|
||||
export interface ToolContext {
|
||||
logger: Logger;
|
||||
}
|
||||
|
||||
export interface ImageCaptureData {
|
||||
saved_files: SavedFile[];
|
||||
}
|
||||
|
||||
export interface ApplicationListData {
|
||||
applications: ApplicationInfo[];
|
||||
}
|
||||
|
||||
export interface WindowListData {
|
||||
target_application_info: TargetApplicationInfo;
|
||||
windows: WindowInfo[];
|
||||
}
|
||||
|
||||
export interface ServerStatusData {
|
||||
cli_version?: string;
|
||||
permissions?: {
|
||||
screen_recording?: boolean;
|
||||
accessibility?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
export interface AIProvider {
|
||||
provider: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export interface OllamaConfig {
|
||||
type: "ollama";
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
requestTimeout?: number;
|
||||
keepAlive?: string;
|
||||
}
|
||||
|
||||
export interface OpenAIConfig {
|
||||
type: "openai";
|
||||
apiKey?: string; // Optional because it can be set via env
|
||||
model: string;
|
||||
maxTokens?: number;
|
||||
temperature?: number;
|
||||
}
|
||||
|
||||
export type AIProviderConfig = OllamaConfig | OpenAIConfig;
|
||||
|
||||
export interface ToolResponse {
|
||||
content: Array<{
|
||||
type: "text" | "image";
|
||||
text?: string;
|
||||
data?: string;
|
||||
mimeType?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
}>;
|
||||
isError?: boolean;
|
||||
saved_files?: SavedFile[];
|
||||
analysis_text?: string;
|
||||
model_used?: string;
|
||||
_meta?: Record<string, unknown>;
|
||||
[key: string]: unknown; // Allow additional properties
|
||||
}
|
||||
|
||||
export const imageToolSchema = z
|
||||
.object({
|
||||
app_target: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
"Optional. Specifies the capture target.\n" +
|
||||
"For example:\n" +
|
||||
"Omit or use an empty string (e.g., `''`) for all screens.\n" +
|
||||
"Use `'screen:INDEX'` (e.g., `'screen:0'`) for a specific display.\n" +
|
||||
"Use `'frontmost'` for all windows of the current foreground application.\n" +
|
||||
"Use `'AppName'` (e.g., `'Safari'`) for all windows of that application.\n" +
|
||||
"Use `'PID:PROCESS_ID'` (e.g., `'PID:663'`) to target a specific process by its PID.\n" +
|
||||
"Use `'AppName:WINDOW_TITLE:Title'` (e.g., `'TextEdit:WINDOW_TITLE:My Notes'`) for a window of 'AppName' matching that title.\n" +
|
||||
"Use `'AppName:WINDOW_INDEX:Index'` (e.g., `'Preview:WINDOW_INDEX:0'`) for a window of 'AppName' at that index.\n" +
|
||||
"Ensure components are correctly colon-separated."
|
||||
),
|
||||
path: z
|
||||
.preprocess((val) => {
|
||||
// Handle null, undefined, empty string, or literal "null" string by returning undefined
|
||||
if (val === null || val === undefined || val === "" || val === "null") {
|
||||
return undefined;
|
||||
}
|
||||
return val;
|
||||
}, z.string().optional())
|
||||
.describe(
|
||||
"Optional. Base absolute path for saving the image.\n" +
|
||||
"Relevant if `format` is `'png'`, `'jpg'`, or if `'data'` is used with the intention to also save the file.\n" +
|
||||
"If a `question` is provided and `path` is omitted, a temporary path is used for image capture, and this temporary file is deleted after analysis."
|
||||
),
|
||||
question: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
"Optional. If provided, the captured image will be analyzed by an AI model.\n" +
|
||||
"The server automatically selects an AI provider from the `PEEKABOO_AI_PROVIDERS` environment variable.\n" +
|
||||
"The analysis result (text) is included in the response."
|
||||
),
|
||||
format: z.preprocess(
|
||||
(val) => {
|
||||
// Handle null, undefined, or empty string by returning undefined (will use default)
|
||||
if (val === null || val === undefined || val === "") {
|
||||
return undefined;
|
||||
}
|
||||
// Convert to lowercase for case-insensitive matching
|
||||
const lowerVal = String(val).toLowerCase();
|
||||
|
||||
// Map common aliases
|
||||
const formatMap: Record<string, string> = {
|
||||
jpeg: "jpg",
|
||||
png: "png",
|
||||
jpg: "jpg",
|
||||
data: "data",
|
||||
};
|
||||
|
||||
// Return mapped value or fall back to 'png'
|
||||
return formatMap[lowerVal] || "png";
|
||||
},
|
||||
z
|
||||
.enum(["png", "jpg", "data"])
|
||||
.optional()
|
||||
.describe(
|
||||
"Optional. Output format.\n" +
|
||||
"Can be `'png'`, `'jpg'`, `'jpeg'` (alias for jpg), or `'data'`.\n" +
|
||||
"Format is case-insensitive (e.g., 'PNG', 'Png', 'png' are all valid).\n" +
|
||||
"If `'png'` or `'jpg'`, saves the image to the specified `path`.\n" +
|
||||
"If `'data'`, returns Base64 encoded PNG data inline in the response.\n" +
|
||||
"If `path` is also provided when `format` is `'data'`, the image is saved (as PNG) AND Base64 data is returned.\n" +
|
||||
"Defaults to `'data'` if `path` is not given.\n" +
|
||||
"Invalid format values automatically fall back to 'png'."
|
||||
)
|
||||
),
|
||||
capture_focus: z.preprocess(
|
||||
(val) => (val === "" || val === null ? undefined : val),
|
||||
z
|
||||
.enum(["background", "auto", "foreground"])
|
||||
.optional()
|
||||
.default("auto")
|
||||
.describe(
|
||||
"Optional. Focus behavior. 'auto' (default): bring target to front only if not already active. " +
|
||||
"'background': capture without altering window focus. " +
|
||||
"'foreground': always bring target to front before capture."
|
||||
)
|
||||
),
|
||||
})
|
||||
.describe(
|
||||
"Captures screen content and optionally analyzes it. " +
|
||||
"Targets entire screens, specific app windows, or all windows of an app (via `app_target`). " +
|
||||
"Supports foreground/background capture. " +
|
||||
'Output to file path or inline Base64 data (`format: "data"`). ' +
|
||||
"If a `question` is provided, an AI model analyzes the image. " +
|
||||
"Window shadows/frames excluded."
|
||||
);
|
||||
|
||||
export type ImageInput = z.infer<typeof imageToolSchema>;
|
||||
|
||||
// Tool input types
|
||||
export interface SeeInput {
|
||||
app_target?: string;
|
||||
path?: string;
|
||||
session?: string;
|
||||
annotate?: boolean;
|
||||
}
|
||||
|
||||
export interface ClickInput {
|
||||
query?: string;
|
||||
on?: string;
|
||||
coords?: string;
|
||||
session?: string;
|
||||
wait_for?: number;
|
||||
double?: boolean;
|
||||
right?: boolean;
|
||||
}
|
||||
|
||||
export interface TypeInput {
|
||||
text: string;
|
||||
on?: string;
|
||||
session?: string;
|
||||
clear?: boolean;
|
||||
delay?: number;
|
||||
wait_for?: number;
|
||||
}
|
||||
|
||||
export interface ScrollInput {
|
||||
direction: "up" | "down" | "left" | "right";
|
||||
amount?: number;
|
||||
on?: string;
|
||||
session?: string;
|
||||
delay?: number;
|
||||
smooth?: boolean;
|
||||
}
|
||||
|
||||
export interface HotkeyInput {
|
||||
keys: string;
|
||||
hold_duration?: number;
|
||||
}
|
||||
|
||||
export interface SwipeInput {
|
||||
from: string;
|
||||
to: string;
|
||||
duration?: number;
|
||||
steps?: number;
|
||||
}
|
||||
|
||||
export interface RunInput {
|
||||
script_path: string;
|
||||
session?: string;
|
||||
stop_on_error?: boolean;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export interface SleepInput {
|
||||
duration: number;
|
||||
}
|
||||
|
||||
// Agent-specific response types
|
||||
export interface AgentSession {
|
||||
id: string;
|
||||
task?: string;
|
||||
created?: string;
|
||||
messageCount?: number;
|
||||
}
|
||||
|
||||
export interface AgentStep {
|
||||
description?: string;
|
||||
command?: string;
|
||||
output?: string;
|
||||
}
|
||||
|
||||
export interface AgentResponseData {
|
||||
sessions?: AgentSession[];
|
||||
summary?: string;
|
||||
steps?: AgentStep[];
|
||||
}
|
||||
|
||||
export interface AgentSuccessResponse {
|
||||
success: true;
|
||||
data: AgentResponseData;
|
||||
}
|
||||
|
||||
export interface AgentErrorResponse {
|
||||
success: false;
|
||||
error: {
|
||||
message?: string;
|
||||
};
|
||||
}
|
||||
|
||||
// App-specific response types
|
||||
export interface AppInfo {
|
||||
name?: string;
|
||||
localizedName?: string;
|
||||
bundleIdentifier?: string;
|
||||
processIdentifier?: number;
|
||||
isTerminated?: boolean;
|
||||
isActive?: boolean;
|
||||
isHidden?: boolean;
|
||||
}
|
||||
|
||||
export interface AppResponseData {
|
||||
action?: string;
|
||||
app?: string;
|
||||
pid?: number;
|
||||
window_count?: number;
|
||||
activated?: boolean;
|
||||
bundle_id?: string;
|
||||
applications?: AppInfo[];
|
||||
note?: string;
|
||||
error?: unknown;
|
||||
}
|
||||
|
||||
export interface AppSuccessResponse {
|
||||
success: true;
|
||||
data: AppResponseData;
|
||||
}
|
||||
|
||||
export interface AppErrorResponse {
|
||||
error: {
|
||||
message?: string;
|
||||
};
|
||||
}
|
||||
|
||||
// Menu-specific response types
|
||||
export interface MenuItem {
|
||||
title?: string;
|
||||
name?: string;
|
||||
separator?: boolean;
|
||||
enabled?: boolean;
|
||||
}
|
||||
|
||||
export interface Menu {
|
||||
title?: string;
|
||||
name?: string;
|
||||
items?: MenuItem[];
|
||||
}
|
||||
|
||||
export interface MenuResponseData {
|
||||
menus?: Menu[];
|
||||
menu_bar?: Menu[];
|
||||
clicked?: boolean;
|
||||
item?: string;
|
||||
path?: string;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export interface MenuSuccessResponse {
|
||||
success: true;
|
||||
data: MenuResponseData;
|
||||
}
|
||||
|
||||
export interface MenuErrorResponse {
|
||||
success: false;
|
||||
error: {
|
||||
message?: string;
|
||||
};
|
||||
}
|
||||
|
||||
// See tool response types
|
||||
export interface UIElement {
|
||||
id: string;
|
||||
role: string;
|
||||
label?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
value?: string;
|
||||
rect?: {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
frame?: number[][];
|
||||
isActionable?: boolean;
|
||||
}
|
||||
|
||||
export interface SeeResponseData {
|
||||
screenshot?: string;
|
||||
screenshot_raw?: string;
|
||||
screenshot_annotated?: string;
|
||||
annotated?: string;
|
||||
ui_map?: string;
|
||||
session?: string;
|
||||
session_id?: string;
|
||||
application_name?: string;
|
||||
window_title?: string;
|
||||
}
|
||||
|
||||
// Window tool response types
|
||||
export interface WindowResponseData {
|
||||
action?: string;
|
||||
app?: string;
|
||||
window?: {
|
||||
title?: string;
|
||||
index?: number;
|
||||
};
|
||||
bounds?: {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export interface WindowSuccessResponse {
|
||||
success: true;
|
||||
data: WindowResponseData;
|
||||
}
|
||||
|
||||
export interface WindowErrorResponse {
|
||||
success: false;
|
||||
error: {
|
||||
message?: string;
|
||||
};
|
||||
}
|
||||
@ -1,398 +0,0 @@
|
||||
import OpenAI from "openai";
|
||||
import type { Logger } from "pino";
|
||||
import type { AIProvider } from "../types/index.js";
|
||||
|
||||
export function parseAIProviders(aiProvidersEnv: string): AIProvider[] {
|
||||
if (!aiProvidersEnv || !aiProvidersEnv.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return aiProvidersEnv
|
||||
.split(/[,;]/) // Support both comma and semicolon separators
|
||||
.map((p) => p.trim())
|
||||
.filter(Boolean)
|
||||
.map((provider) => {
|
||||
const [providerName, model] = provider.split("/");
|
||||
return {
|
||||
provider: providerName?.trim() || "",
|
||||
model: model?.trim() || "",
|
||||
};
|
||||
})
|
||||
.filter((p) => p.provider && p.model);
|
||||
}
|
||||
|
||||
export interface ProviderStatus {
|
||||
available: boolean;
|
||||
error?: string;
|
||||
details?: {
|
||||
modelAvailable?: boolean;
|
||||
serverReachable?: boolean;
|
||||
apiKeyPresent?: boolean;
|
||||
modelList?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
export async function isProviderAvailable(provider: AIProvider, logger: Logger): Promise<boolean> {
|
||||
const status = await getProviderStatus(provider, logger);
|
||||
return status.available;
|
||||
}
|
||||
|
||||
export async function getProviderStatus(provider: AIProvider, logger: Logger): Promise<ProviderStatus> {
|
||||
try {
|
||||
switch (provider.provider.toLowerCase()) {
|
||||
case "ollama":
|
||||
return await checkOllamaStatus(provider.model, logger);
|
||||
case "openai":
|
||||
return await checkOpenAIStatus(provider.model, logger);
|
||||
case "anthropic":
|
||||
return checkAnthropicStatus(provider.model);
|
||||
default:
|
||||
logger.warn({ provider: provider.provider }, "Unknown AI provider");
|
||||
return {
|
||||
available: false,
|
||||
error: `Unknown provider: ${provider.provider}`,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error({ error, provider: provider.provider }, "Error checking provider status");
|
||||
return {
|
||||
available: false,
|
||||
error: error instanceof Error ? error.message : "Unknown error",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function checkOllamaStatus(model: string, logger: Logger): Promise<ProviderStatus> {
|
||||
try {
|
||||
const baseUrl = process.env.PEEKABOO_OLLAMA_BASE_URL || "http://localhost:11434";
|
||||
|
||||
// Check if server is reachable
|
||||
const tagsResponse = await fetch(`${baseUrl}/api/tags`, {
|
||||
signal: AbortSignal.timeout(3000), // 3 second timeout
|
||||
});
|
||||
|
||||
if (!tagsResponse.ok) {
|
||||
return {
|
||||
available: false,
|
||||
error: `Ollama server returned ${tagsResponse.status}`,
|
||||
details: {
|
||||
serverReachable: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const tagsData = await tagsResponse.json();
|
||||
const availableModels = tagsData.models?.map((m: { name: string }) => m.name) || [];
|
||||
|
||||
// Check if the specific model is available
|
||||
const modelAvailable = availableModels.some(
|
||||
(m: string) => m === model || m.startsWith(`${model}:`) || model.startsWith(m.split(":")[0])
|
||||
);
|
||||
|
||||
if (!modelAvailable) {
|
||||
return {
|
||||
available: false,
|
||||
error: `Model '${model}' not found. Available models: ${availableModels.join(", ") || "none"}`,
|
||||
details: {
|
||||
serverReachable: true,
|
||||
modelAvailable: false,
|
||||
modelList: availableModels,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
available: true,
|
||||
details: {
|
||||
serverReachable: true,
|
||||
modelAvailable: true,
|
||||
modelList: availableModels,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.debug({ error }, "Ollama not available");
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
|
||||
if (errorMessage.includes("fetch") || errorMessage.includes("timeout")) {
|
||||
return {
|
||||
available: false,
|
||||
error: "Ollama server not reachable (not running or network issue)",
|
||||
details: {
|
||||
serverReachable: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
available: false,
|
||||
error: errorMessage,
|
||||
details: {
|
||||
serverReachable: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function checkOpenAIStatus(model: string, logger: Logger): Promise<ProviderStatus> {
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
return {
|
||||
available: false,
|
||||
error: "OpenAI API key not configured (OPENAI_API_KEY environment variable missing)",
|
||||
details: {
|
||||
apiKeyPresent: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
// Test the API key by making a simple models list request
|
||||
const openai = new OpenAI({
|
||||
apiKey,
|
||||
timeout: 3000, // 3 second timeout
|
||||
});
|
||||
|
||||
const modelsResponse = await openai.models.list();
|
||||
const availableModels = modelsResponse.data.map((m) => m.id);
|
||||
|
||||
// Check if the specific model is available
|
||||
const modelAvailable = availableModels.includes(model);
|
||||
|
||||
if (!modelAvailable) {
|
||||
// For OpenAI, we'll be more lenient and just warn if model isn't in the list
|
||||
// since the models list API might not include all available models
|
||||
logger.debug(
|
||||
{ model, availableCount: availableModels.length },
|
||||
"Model not found in OpenAI models list, but this might be normal"
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
available: true,
|
||||
details: {
|
||||
apiKeyPresent: true,
|
||||
serverReachable: true,
|
||||
modelAvailable: modelAvailable,
|
||||
modelList: availableModels.slice(0, 10), // Limit to first 10 models for brevity
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.debug({ error }, "OpenAI API check failed");
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
|
||||
if (errorMessage.includes("401") || errorMessage.includes("Unauthorized")) {
|
||||
return {
|
||||
available: false,
|
||||
error: "Invalid OpenAI API key",
|
||||
details: {
|
||||
apiKeyPresent: true,
|
||||
serverReachable: true,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
if (errorMessage.includes("network") || errorMessage.includes("fetch")) {
|
||||
return {
|
||||
available: false,
|
||||
error: "Cannot reach OpenAI API (network issue)",
|
||||
details: {
|
||||
apiKeyPresent: true,
|
||||
serverReachable: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
available: false,
|
||||
error: `OpenAI API error: ${errorMessage}`,
|
||||
details: {
|
||||
apiKeyPresent: true,
|
||||
serverReachable: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function checkAnthropicStatus(_model: string): ProviderStatus {
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
return {
|
||||
available: false,
|
||||
error: "Anthropic API key not configured (ANTHROPIC_API_KEY environment variable missing)",
|
||||
details: {
|
||||
apiKeyPresent: false,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Anthropic is implemented in the Swift CLI, mark as available when API key is present
|
||||
return {
|
||||
available: true,
|
||||
details: {
|
||||
apiKeyPresent: true,
|
||||
serverReachable: true,
|
||||
modelAvailable: true,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function analyzeImageWithProvider(
|
||||
provider: AIProvider,
|
||||
_imagePath: string,
|
||||
imageBase64: string,
|
||||
question: string,
|
||||
logger: Logger
|
||||
): Promise<string> {
|
||||
switch (provider.provider.toLowerCase()) {
|
||||
case "ollama":
|
||||
return await analyzeWithOllama(provider.model, imageBase64, question, logger);
|
||||
case "openai":
|
||||
return await analyzeWithOpenAI(provider.model, imageBase64, question, logger);
|
||||
case "anthropic":
|
||||
throw new Error("Anthropic support not yet implemented");
|
||||
default:
|
||||
throw new Error(`Unsupported AI provider: ${provider.provider}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function analyzeWithOllama(
|
||||
model: string,
|
||||
imageBase64: string,
|
||||
question: string,
|
||||
logger: Logger
|
||||
): Promise<string> {
|
||||
const baseUrl = process.env.PEEKABOO_OLLAMA_BASE_URL || "http://localhost:11434";
|
||||
|
||||
logger.debug({ model, baseUrl }, "Analyzing image with Ollama");
|
||||
|
||||
// Default to describing the image if no question is provided
|
||||
const prompt = question.trim() || "Please describe what you see in this image.";
|
||||
|
||||
const response = await fetch(`${baseUrl}/api/generate`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
prompt,
|
||||
images: [imageBase64],
|
||||
stream: false,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error({ status: response.status, error: errorText }, "Ollama API error");
|
||||
throw new Error(`Ollama API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return result.response || "No response from Ollama";
|
||||
}
|
||||
|
||||
async function analyzeWithOpenAI(
|
||||
model: string,
|
||||
imageBase64: string,
|
||||
question: string,
|
||||
logger: Logger
|
||||
): Promise<string> {
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error("OpenAI API key not configured");
|
||||
}
|
||||
|
||||
logger.debug({ model }, "Analyzing image with OpenAI");
|
||||
|
||||
const openai = new OpenAI({ apiKey });
|
||||
|
||||
// Default to describing the image if no question is provided
|
||||
const prompt = question.trim() || "Please describe what you see in this image.";
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model: model || "gpt-4.1",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: prompt },
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:image/jpeg;base64,${imageBase64}`,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
max_tokens: 1000,
|
||||
});
|
||||
|
||||
return response.choices[0]?.message?.content || "No response from OpenAI";
|
||||
}
|
||||
|
||||
export function getDefaultModelForProvider(provider: string): string {
|
||||
switch (provider.toLowerCase()) {
|
||||
case "ollama":
|
||||
return "llava:latest";
|
||||
case "openai":
|
||||
return "gpt-4.1";
|
||||
case "anthropic":
|
||||
return "claude-3-sonnet-20240229";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
export async function determineProviderAndModel(
|
||||
providerConfig: { type?: string; model?: string } | undefined,
|
||||
configuredProviders: AIProvider[],
|
||||
logger: Logger
|
||||
): Promise<{ provider: string | null; model: string }> {
|
||||
const requestedProviderType = providerConfig?.type || "auto";
|
||||
const requestedModelName = providerConfig?.model;
|
||||
|
||||
if (requestedProviderType !== "auto") {
|
||||
// Find specific provider in configuration
|
||||
const configuredProvider = configuredProviders.find(
|
||||
(p) => p.provider.toLowerCase() === requestedProviderType.toLowerCase()
|
||||
);
|
||||
|
||||
if (!configuredProvider) {
|
||||
throw new Error(
|
||||
`Provider '${requestedProviderType}' is not enabled in server's PEEKABOO_AI_PROVIDERS configuration.`
|
||||
);
|
||||
}
|
||||
|
||||
// Check if provider is available
|
||||
const available = await isProviderAvailable(configuredProvider, logger);
|
||||
if (!available) {
|
||||
throw new Error(`Provider '${requestedProviderType}' is configured but not currently available.`);
|
||||
}
|
||||
|
||||
const model = requestedModelName || configuredProvider.model || getDefaultModelForProvider(requestedProviderType);
|
||||
|
||||
return {
|
||||
provider: requestedProviderType,
|
||||
model,
|
||||
};
|
||||
}
|
||||
|
||||
// Auto mode - find first available provider
|
||||
for (const configuredProvider of configuredProviders) {
|
||||
const available = await isProviderAvailable(configuredProvider, logger);
|
||||
if (available) {
|
||||
const model =
|
||||
requestedModelName || configuredProvider.model || getDefaultModelForProvider(configuredProvider.provider);
|
||||
|
||||
return {
|
||||
provider: configuredProvider.provider,
|
||||
model,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return { provider: null, model: "" };
|
||||
}
|
||||
@ -1,120 +0,0 @@
|
||||
import * as fs from "fs/promises";
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import type { Logger } from "pino";
|
||||
|
||||
interface PeekabooConfig {
|
||||
aiProviders?: {
|
||||
providers?: string;
|
||||
};
|
||||
agent?: {
|
||||
defaultModel?: string;
|
||||
maxTokens?: number;
|
||||
temperature?: number;
|
||||
};
|
||||
logging?: {
|
||||
level?: string;
|
||||
path?: string;
|
||||
};
|
||||
defaults?: {
|
||||
savePath?: string;
|
||||
imageFormat?: string;
|
||||
captureMode?: string;
|
||||
captureFocus?: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface PeekabooCredentials {
|
||||
[key: string]: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads Peekaboo configuration from the config file
|
||||
*/
|
||||
export async function loadPeekabooConfig(logger: Logger): Promise<PeekabooConfig> {
|
||||
const configPath = path.join(os.homedir(), ".peekaboo", "config.json");
|
||||
|
||||
try {
|
||||
const configContent = await fs.readFile(configPath, "utf-8");
|
||||
// Remove comments for JSONC support
|
||||
const jsonContent = configContent.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "");
|
||||
const config = JSON.parse(jsonContent) as PeekabooConfig;
|
||||
logger.debug({ configPath }, "Loaded Peekaboo config file");
|
||||
return config;
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === "ENOENT") {
|
||||
logger.debug({ configPath }, "Peekaboo config file not found");
|
||||
} else {
|
||||
logger.warn({ error, configPath }, "Failed to load Peekaboo config file");
|
||||
}
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads Peekaboo credentials from the credentials file
|
||||
*/
|
||||
export async function loadPeekabooCredentials(logger: Logger): Promise<PeekabooCredentials> {
|
||||
const credentialsPath = path.join(os.homedir(), ".peekaboo", "credentials");
|
||||
|
||||
try {
|
||||
const credentialsContent = await fs.readFile(credentialsPath, "utf-8");
|
||||
const credentials: PeekabooCredentials = {};
|
||||
|
||||
// Parse key=value format
|
||||
const lines = credentialsContent.split("\n");
|
||||
for (const line of lines) {
|
||||
const trimmedLine = line.trim();
|
||||
if (trimmedLine && !trimmedLine.startsWith("#")) {
|
||||
const [key, ...valueParts] = trimmedLine.split("=");
|
||||
if (key && valueParts.length > 0) {
|
||||
credentials[key.trim()] = valueParts.join("=").trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug({ credentialsPath, count: Object.keys(credentials).length }, "Loaded Peekaboo credentials");
|
||||
return credentials;
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === "ENOENT") {
|
||||
logger.debug({ credentialsPath }, "Peekaboo credentials file not found");
|
||||
} else {
|
||||
logger.warn({ error, credentialsPath }, "Failed to load Peekaboo credentials");
|
||||
}
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets AI providers configuration from environment or config file
|
||||
*/
|
||||
export async function getAIProvidersConfig(logger: Logger): Promise<string | undefined> {
|
||||
// Priority 1: Environment variable
|
||||
if (process.env.PEEKABOO_AI_PROVIDERS) {
|
||||
return process.env.PEEKABOO_AI_PROVIDERS;
|
||||
}
|
||||
|
||||
// Priority 2: Config file
|
||||
const config = await loadPeekabooConfig(logger);
|
||||
if (config.aiProviders?.providers) {
|
||||
logger.info("Using AI providers from Peekaboo config file");
|
||||
return config.aiProviders.providers;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up environment variables from credentials file if not already set
|
||||
*/
|
||||
export async function setupEnvironmentFromCredentials(logger: Logger): Promise<void> {
|
||||
const credentials = await loadPeekabooCredentials(logger);
|
||||
|
||||
// Only set environment variables if they're not already set
|
||||
for (const [key, value] of Object.entries(credentials)) {
|
||||
if (!process.env[key]) {
|
||||
process.env[key] = value;
|
||||
logger.debug({ key }, "Set environment variable from credentials");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,65 +0,0 @@
|
||||
import fs from "fs/promises";
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
import type { Logger } from "pino";
|
||||
import { analyzeImageWithProvider, parseAIProviders } from "./ai-providers.js";
|
||||
|
||||
export async function performAutomaticAnalysis(
|
||||
base64Image: string,
|
||||
question: string,
|
||||
logger: Logger,
|
||||
availableProvidersEnv: string
|
||||
): Promise<{
|
||||
analysisText?: string;
|
||||
modelUsed?: string;
|
||||
error?: string;
|
||||
}> {
|
||||
const providers = parseAIProviders(availableProvidersEnv);
|
||||
|
||||
if (!providers.length) {
|
||||
return {
|
||||
error: "Analysis skipped: No AI providers configured",
|
||||
};
|
||||
}
|
||||
|
||||
// Try each provider in order until one succeeds
|
||||
for (const provider of providers) {
|
||||
try {
|
||||
logger.debug({ provider: `${provider.provider}/${provider.model}` }, "Attempting analysis with provider");
|
||||
|
||||
// Create a temporary file for the provider (some providers need file paths)
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "peekaboo-analysis-"));
|
||||
const tempPath = path.join(tempDir, "image.png");
|
||||
const imageBuffer = Buffer.from(base64Image, "base64");
|
||||
await fs.writeFile(tempPath, imageBuffer);
|
||||
|
||||
try {
|
||||
const analysisText = await analyzeImageWithProvider(provider, tempPath, base64Image, question, logger);
|
||||
|
||||
// Clean up temp file
|
||||
await fs.unlink(tempPath);
|
||||
await fs.rmdir(tempDir);
|
||||
|
||||
return {
|
||||
analysisText,
|
||||
modelUsed: `${provider.provider}/${provider.model}`,
|
||||
};
|
||||
} finally {
|
||||
// Ensure cleanup even if analysis fails
|
||||
try {
|
||||
await fs.unlink(tempPath);
|
||||
await fs.rmdir(tempDir);
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.debug({ error, provider: `${provider.provider}/${provider.model}` }, "Provider failed, trying next");
|
||||
// Continue to next provider
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
error: "Analysis failed: All configured AI providers failed or are unavailable",
|
||||
};
|
||||
}
|
||||
@ -1,173 +0,0 @@
|
||||
import * as fs from "fs/promises";
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import type { Logger } from "pino";
|
||||
import type { ImageInput } from "../types/index.js";
|
||||
|
||||
export interface ResolvedImagePath {
|
||||
effectivePath: string | undefined;
|
||||
tempDirUsed: string | undefined;
|
||||
}
|
||||
|
||||
export async function resolveImagePath(input: ImageInput, logger: Logger): Promise<ResolvedImagePath> {
|
||||
// If input.path is provided, use it directly
|
||||
if (input.path) {
|
||||
return { effectivePath: input.path, tempDirUsed: undefined };
|
||||
}
|
||||
|
||||
// Check if a temporary directory is required
|
||||
// A temp dir is needed if:
|
||||
// 1. A question is present
|
||||
// 2. Format is explicitly set to 'data'
|
||||
const needsTempDir = input.question || input.format === "data";
|
||||
|
||||
if (needsTempDir) {
|
||||
// Create a temporary directory
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "peekaboo-img-"));
|
||||
// Generate a full file path with appropriate extension
|
||||
const format = input.format === "data" ? "png" : input.format || "png";
|
||||
const extension = format === "jpg" ? ".jpg" : ".png";
|
||||
const tempFilePath = path.join(tempDir, `capture${extension}`);
|
||||
logger.debug({ tempPath: tempFilePath }, "Created temporary file path for capture");
|
||||
return { effectivePath: tempFilePath, tempDirUsed: tempDir };
|
||||
}
|
||||
|
||||
// Check for PEEKABOO_DEFAULT_SAVE_PATH environment variable
|
||||
const defaultSavePath = process.env.PEEKABOO_DEFAULT_SAVE_PATH;
|
||||
if (defaultSavePath) {
|
||||
return { effectivePath: defaultSavePath, tempDirUsed: undefined };
|
||||
}
|
||||
|
||||
// Final fallback: create a temporary directory with full file path
|
||||
// This happens when: no path, no question, no explicit 'data' format, no env var
|
||||
const fallbackTempDir = await fs.mkdtemp(path.join(os.tmpdir(), "peekaboo-img-"));
|
||||
const format = input.format || "png";
|
||||
const extension = format === "jpg" ? ".jpg" : ".png";
|
||||
const fallbackFilePath = path.join(fallbackTempDir, `capture${extension}`);
|
||||
logger.debug({ tempPath: fallbackFilePath }, "Created fallback temporary file path for capture");
|
||||
return { effectivePath: fallbackFilePath, tempDirUsed: fallbackTempDir };
|
||||
}
|
||||
|
||||
export function buildSwiftCliArgs(
|
||||
input: ImageInput,
|
||||
effectivePath: string | undefined,
|
||||
swiftFormat?: string,
|
||||
logger?: Logger
|
||||
): string[] {
|
||||
const args = ["image"];
|
||||
|
||||
// Use provided format or derive from input
|
||||
// Format validation is already handled by the schema preprocessor
|
||||
const inputFormat = input.format || "png";
|
||||
const actualFormat = swiftFormat || (inputFormat === "data" ? "png" : inputFormat);
|
||||
|
||||
// Create a logger if not provided (for backward compatibility)
|
||||
const log = logger || {
|
||||
warn: (_msg: unknown) => {},
|
||||
error: (_msg: unknown) => {},
|
||||
debug: (_msg: unknown) => {},
|
||||
};
|
||||
|
||||
// Parse app_target to determine Swift CLI arguments
|
||||
if (!input.app_target || input.app_target === "") {
|
||||
// Omitted/empty: All screens
|
||||
args.push("--mode", "screen");
|
||||
} else if (input.app_target.startsWith("screen:")) {
|
||||
// 'screen:INDEX': Specific display
|
||||
const screenIndexStr = input.app_target.substring(7);
|
||||
const screenIndex = Number.parseInt(screenIndexStr, 10);
|
||||
if (Number.isNaN(screenIndex) || screenIndex < 0) {
|
||||
log.warn(
|
||||
{ screenIndex: screenIndexStr },
|
||||
`Invalid screen index '${screenIndexStr}' in app_target, capturing all screens.`
|
||||
);
|
||||
args.push("--mode", "screen");
|
||||
} else {
|
||||
args.push("--mode", "screen", "--screen-index", screenIndex.toString());
|
||||
}
|
||||
} else if (input.app_target.toLowerCase() === "frontmost") {
|
||||
// 'frontmost': Capture the frontmost window of the frontmost app
|
||||
// This requires special handling to first find the frontmost app, then capture its frontmost window
|
||||
log.debug("Using frontmost mode - will attempt to capture frontmost window");
|
||||
args.push("--mode", "frontmost");
|
||||
} else if (input.app_target.includes(":")) {
|
||||
// Check for PID reference first
|
||||
const parts = input.app_target.split(":");
|
||||
if (parts[0].toUpperCase() === "PID" && parts.length >= 2) {
|
||||
// 'PID:12345': Target process by PID
|
||||
const pid = parts[1].trim();
|
||||
if (!pid || Number.isNaN(Number(pid))) {
|
||||
log.warn({ pid: parts[1] }, "Invalid PID value, must be a number");
|
||||
args.push("--mode", "screen");
|
||||
} else {
|
||||
log.debug({ pid }, "Targeting process by PID");
|
||||
args.push("--app", `PID:${pid}`);
|
||||
args.push("--mode", "multi");
|
||||
}
|
||||
} else if (parts.length >= 3) {
|
||||
// 'AppName:WINDOW_TITLE:Title' or 'AppName:WINDOW_INDEX:Index'
|
||||
const appName = parts[0].trim();
|
||||
const specifierType = parts[1].trim();
|
||||
const specifierValue = parts.slice(2).join(":"); // Handle colons in window titles
|
||||
|
||||
// Validate that we have a non-empty app name
|
||||
if (!appName) {
|
||||
log.warn({ app_target: input.app_target }, "Empty app name detected in app_target, treating as malformed");
|
||||
// Try to find the first non-empty part as the app name
|
||||
const nonEmptyParts = parts.filter((part) => part.trim());
|
||||
if (nonEmptyParts.length > 0) {
|
||||
args.push("--app", nonEmptyParts[0].trim());
|
||||
args.push("--mode", "multi");
|
||||
} else {
|
||||
// All parts are empty, default to screen mode
|
||||
log.warn("All parts of app_target are empty, defaulting to screen mode");
|
||||
args.push("--mode", "screen");
|
||||
}
|
||||
} else {
|
||||
args.push("--app", appName);
|
||||
args.push("--mode", "window");
|
||||
|
||||
if (specifierType.toUpperCase() === "WINDOW_TITLE") {
|
||||
args.push("--window-title", specifierValue);
|
||||
} else if (specifierType.toUpperCase() === "WINDOW_INDEX") {
|
||||
args.push("--window-index", specifierValue);
|
||||
} else {
|
||||
log.warn({ specifierType }, "Unknown window specifier type, defaulting to main window");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Malformed: treat as app name, but validate it's not empty
|
||||
const cleanAppTarget = input.app_target.trim();
|
||||
if (!cleanAppTarget || cleanAppTarget === ":".repeat(cleanAppTarget.length)) {
|
||||
log.warn(
|
||||
{ app_target: input.app_target },
|
||||
"Malformed app_target with only colons or empty, defaulting to screen mode"
|
||||
);
|
||||
args.push("--mode", "screen");
|
||||
} else {
|
||||
log.warn({ app_target: input.app_target }, "Malformed window specifier, treating as app name");
|
||||
// Remove trailing colons from app name
|
||||
const appName = cleanAppTarget.replace(/:+$/, "");
|
||||
args.push("--app", appName);
|
||||
args.push("--mode", "multi");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// 'AppName': All windows of that app
|
||||
args.push("--app", input.app_target.trim());
|
||||
args.push("--mode", "multi");
|
||||
}
|
||||
|
||||
// Add path if it was provided
|
||||
if (effectivePath) {
|
||||
args.push("--path", effectivePath);
|
||||
}
|
||||
|
||||
// Add format
|
||||
args.push("--format", actualFormat);
|
||||
|
||||
// Add capture focus
|
||||
args.push("--capture-focus", input.capture_focus || "background");
|
||||
|
||||
return args;
|
||||
}
|
||||
@ -1,35 +0,0 @@
|
||||
import type { ImageCaptureData, ImageInput } from "../types/index.js";
|
||||
|
||||
export function buildImageSummary(input: ImageInput, data: ImageCaptureData, question?: string): string {
|
||||
if (!data.saved_files || data.saved_files.length === 0) {
|
||||
return "Image capture completed but no files were saved or available for analysis.";
|
||||
}
|
||||
|
||||
// Determine mode and target from app_target (removed since we're not using them anymore)
|
||||
// The summary now just shows the count of images captured
|
||||
|
||||
// Generate summary matching the expected format
|
||||
const imageCount = data.saved_files.length;
|
||||
let summary = `Captured ${imageCount} image${imageCount > 1 ? "s" : ""}`;
|
||||
|
||||
if (data.saved_files.length === 1) {
|
||||
if (!question || (question && input.path)) {
|
||||
// Show path if no question or if question with explicit path
|
||||
summary += `\nImage saved to: ${data.saved_files[0].path}`;
|
||||
}
|
||||
} else if (data.saved_files.length > 1) {
|
||||
summary += `\n${data.saved_files.length} images saved:`;
|
||||
data.saved_files.forEach((file, index) => {
|
||||
summary += `\n${index + 1}. ${file.path}`;
|
||||
if (file.item_label) {
|
||||
summary += ` (${file.item_label})`;
|
||||
}
|
||||
});
|
||||
} else if (input.question && input.path && data.saved_files?.length) {
|
||||
summary += `\nImage saved to: ${data.saved_files[0].path}`;
|
||||
} else if (input.question && data.saved_files?.length) {
|
||||
summary += "\nImage captured to temporary location for analysis.";
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
@ -1,410 +0,0 @@
|
||||
/// <reference types="node" />
|
||||
|
||||
import { spawn } from "child_process";
|
||||
import { existsSync } from "fs";
|
||||
import fsPromises from "fs/promises";
|
||||
import path from "path";
|
||||
// import { fileURLToPath } from 'url'; // No longer needed here
|
||||
import type { Logger } from "pino";
|
||||
import type { SwiftCliResponse } from "../types/index.js";
|
||||
|
||||
let resolvedCliPath: string | null = null;
|
||||
const INVALID_PATH_SENTINEL = "PEEKABOO_CLI_PATH_RESOLUTION_FAILED";
|
||||
|
||||
function determineSwiftCliPath(packageRootDirForFallback?: string): string {
|
||||
const envPath = process.env.PEEKABOO_CLI_PATH;
|
||||
if (envPath) {
|
||||
try {
|
||||
if (existsSync(envPath)) {
|
||||
return envPath;
|
||||
}
|
||||
// If envPath is set but invalid, fall through to use packageRootDirForFallback
|
||||
} catch (_err) {
|
||||
/* Fall through if existsSync fails */
|
||||
}
|
||||
}
|
||||
|
||||
if (packageRootDirForFallback) {
|
||||
return path.resolve(packageRootDirForFallback, "peekaboo");
|
||||
}
|
||||
|
||||
// If neither PEEKABOO_CLI_PATH is valid nor packageRootDirForFallback is provided,
|
||||
// this is a critical failure in path determination.
|
||||
return INVALID_PATH_SENTINEL;
|
||||
}
|
||||
|
||||
export function initializeSwiftCliPath(packageRootDir: string): void {
|
||||
if (!packageRootDir) {
|
||||
// If PEEKABOO_CLI_PATH is also not set or invalid, this will lead to INVALID_PATH_SENTINEL
|
||||
// Allow determineSwiftCliPath to handle this, and the error will be caught by getInitializedSwiftCliPath
|
||||
}
|
||||
resolvedCliPath = determineSwiftCliPath(packageRootDir);
|
||||
|
||||
// Log the resolved path for debugging
|
||||
if (resolvedCliPath && resolvedCliPath !== INVALID_PATH_SENTINEL) {
|
||||
const binaryExists = existsSync(resolvedCliPath);
|
||||
if (!binaryExists) {
|
||||
console.error(`[Peekaboo MCP] Warning: Binary not found at ${resolvedCliPath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getInitializedSwiftCliPath(logger: Logger): string {
|
||||
// Logger is now mandatory
|
||||
if (!resolvedCliPath || resolvedCliPath === INVALID_PATH_SENTINEL) {
|
||||
const errorMessage =
|
||||
"Peekaboo Swift CLI path is not properly initialized or resolution failed. " +
|
||||
`Resolved path: '${resolvedCliPath}'. Ensure PEEKABOO_CLI_PATH is valid or ` +
|
||||
"initializeSwiftCliPath() was called with a correct package root directory at startup.";
|
||||
logger.error(errorMessage);
|
||||
// Throw an error to prevent attempting to use an invalid path
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
// Check if the binary actually exists at the resolved path
|
||||
if (!existsSync(resolvedCliPath)) {
|
||||
const errorMessage =
|
||||
`Peekaboo Swift CLI binary not found at expected path: ${resolvedCliPath}\n` +
|
||||
`The peekaboo binary should be located in the package root directory.\n` +
|
||||
`You can override this by setting the PEEKABOO_CLI_PATH environment variable.`;
|
||||
logger.error({ binaryPath: resolvedCliPath }, errorMessage);
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
return resolvedCliPath;
|
||||
}
|
||||
|
||||
function mapExitCodeToErrorMessage(
|
||||
exitCode: number,
|
||||
stderr: string,
|
||||
_command: "image" | "list",
|
||||
appTarget?: string
|
||||
): { message: string; code: string } {
|
||||
const defaultMessage = stderr.trim()
|
||||
? `Peekaboo CLI Error: ${stderr.trim()}`
|
||||
: `Swift CLI execution failed (exit code: ${exitCode})`;
|
||||
|
||||
// Handle exit code 18 specially with command context
|
||||
if (exitCode === 18) {
|
||||
return {
|
||||
message: `The specified application ('${appTarget || "unknown"}') is not running or could not be found.`,
|
||||
code: "SWIFT_CLI_APP_NOT_FOUND",
|
||||
};
|
||||
}
|
||||
|
||||
const errorCodeMap: { [key: number]: { message: string; code: string } } = {
|
||||
1: { message: "An unknown error occurred in the Swift CLI.", code: "SWIFT_CLI_UNKNOWN_ERROR" },
|
||||
7: {
|
||||
message:
|
||||
"The specified application is running but has no capturable windows. Try setting 'capture_focus' to 'foreground' to un-hide application windows.",
|
||||
code: "SWIFT_CLI_NO_WINDOWS_FOUND",
|
||||
},
|
||||
10: { message: "No displays available for capture.", code: "SWIFT_CLI_NO_DISPLAYS" },
|
||||
11: {
|
||||
message:
|
||||
"Screen Recording permission is not granted. Please enable it in System Settings > Privacy & Security > Screen Recording.",
|
||||
code: "SWIFT_CLI_NO_SCREEN_RECORDING_PERMISSION",
|
||||
},
|
||||
12: {
|
||||
message:
|
||||
"Accessibility permission is not granted. Please enable it in System Settings > Privacy & Security > Accessibility.",
|
||||
code: "SWIFT_CLI_NO_ACCESSIBILITY_PERMISSION",
|
||||
},
|
||||
13: { message: "Invalid display ID provided for capture.", code: "SWIFT_CLI_INVALID_DISPLAY_ID" },
|
||||
14: { message: "The screen capture could not be created.", code: "SWIFT_CLI_CAPTURE_CREATION_FAILED" },
|
||||
15: { message: "The specified window was not found.", code: "SWIFT_CLI_WINDOW_NOT_FOUND" },
|
||||
16: { message: "Failed to capture the specified window.", code: "SWIFT_CLI_WINDOW_CAPTURE_FAILED" },
|
||||
17: {
|
||||
message:
|
||||
"Failed to write the capture to a file. This is often a file permissions issue. Please ensure the application has permissions to write to the destination directory.",
|
||||
code: "SWIFT_CLI_FILE_WRITE_ERROR",
|
||||
},
|
||||
19: { message: "The specified window index is invalid.", code: "SWIFT_CLI_INVALID_WINDOW_INDEX" },
|
||||
20: { message: "Invalid argument provided to the Swift CLI.", code: "SWIFT_CLI_INVALID_ARGUMENT" },
|
||||
};
|
||||
return errorCodeMap[exitCode] || { message: defaultMessage, code: "SWIFT_CLI_EXECUTION_ERROR" };
|
||||
}
|
||||
|
||||
export async function executeSwiftCli(
|
||||
args: string[],
|
||||
logger: Logger,
|
||||
options: { timeout?: number } = {}
|
||||
): Promise<SwiftCliResponse> {
|
||||
let cliPath: string;
|
||||
try {
|
||||
cliPath = getInitializedSwiftCliPath(logger);
|
||||
} catch (error) {
|
||||
// Error already logged by getInitializedSwiftCliPath
|
||||
return {
|
||||
success: false,
|
||||
error: {
|
||||
message: (error as Error).message,
|
||||
code: "SWIFT_CLI_PATH_INIT_ERROR",
|
||||
details: (error as Error).stack,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Always add --json-output flag
|
||||
const fullArgs = [...args, "--json-output"];
|
||||
|
||||
// Default timeout of 30 seconds, configurable via options or environment variable
|
||||
const defaultTimeout = Number.parseInt(process.env.PEEKABOO_CLI_TIMEOUT || "30000", 10);
|
||||
const timeoutMs = options.timeout || defaultTimeout;
|
||||
|
||||
logger.debug({ command: cliPath, args: fullArgs, timeoutMs }, "Executing Swift CLI");
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const process = spawn(cliPath, fullArgs);
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let isResolved = false;
|
||||
|
||||
// Set up timeout
|
||||
const timeoutId = setTimeout(() => {
|
||||
if (!isResolved) {
|
||||
isResolved = true;
|
||||
|
||||
// Kill the process with SIGTERM first
|
||||
try {
|
||||
process.kill("SIGTERM");
|
||||
} catch (_err) {
|
||||
// Process might already be dead
|
||||
}
|
||||
|
||||
// Give it a moment to terminate gracefully, then force kill
|
||||
setTimeout(() => {
|
||||
try {
|
||||
// Check if process is still running by trying to send signal 0
|
||||
process.kill(0);
|
||||
// If we get here, process is still alive, so force kill it
|
||||
process.kill("SIGKILL");
|
||||
} catch (_err) {
|
||||
// Process is already dead, which is what we want
|
||||
}
|
||||
}, 1000);
|
||||
|
||||
resolve({
|
||||
success: false,
|
||||
error: {
|
||||
message:
|
||||
`Swift CLI execution timed out after ${timeoutMs}ms. ` +
|
||||
"This may indicate a permission dialog is waiting for user input, or the process is stuck.",
|
||||
code: "SWIFT_CLI_TIMEOUT",
|
||||
details: `Command: ${cliPath} ${fullArgs.join(" ")}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
const cleanup = () => {
|
||||
if (timeoutId) {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
};
|
||||
|
||||
process.stdout.on("data", (data: Buffer | string) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
process.stderr.on("data", (data: Buffer | string) => {
|
||||
const stderrData = data.toString();
|
||||
stderr += stderrData;
|
||||
// Log stderr immediately as it comes in
|
||||
logger.warn({ swift_stderr: stderrData.trim() }, "[SwiftCLI-stderr]");
|
||||
});
|
||||
|
||||
process.on("close", (exitCode: number | null) => {
|
||||
cleanup();
|
||||
|
||||
if (isResolved) {
|
||||
return; // Already resolved due to timeout
|
||||
}
|
||||
isResolved = true;
|
||||
|
||||
logger.debug({ exitCode, stdout: stdout.slice(0, 200) }, "Swift CLI completed");
|
||||
|
||||
// Always try to parse JSON first, even on non-zero exit codes
|
||||
if (!stdout.trim()) {
|
||||
logger.error({ exitCode, stdout, stderr }, "Swift CLI execution failed with no output");
|
||||
|
||||
// Determine command and app target from args for fallback error message
|
||||
const command = args[0] as "image" | "list";
|
||||
let appTarget: string | undefined;
|
||||
|
||||
// Find app target in args
|
||||
const appIndex = args.indexOf("--app");
|
||||
if (appIndex !== -1 && appIndex < args.length - 1) {
|
||||
appTarget = args[appIndex + 1];
|
||||
}
|
||||
|
||||
const { message, code } = mapExitCodeToErrorMessage(exitCode || 1, stderr, command, appTarget);
|
||||
const errorDetails = stderr.trim() || "No output received";
|
||||
|
||||
resolve({
|
||||
success: false,
|
||||
error: {
|
||||
message,
|
||||
code,
|
||||
details: errorDetails,
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const trimmedOutput = stdout.trim();
|
||||
const response: SwiftCliResponse = JSON.parse(trimmedOutput);
|
||||
|
||||
// Log debug messages from Swift CLI
|
||||
if (response.debug_logs && Array.isArray(response.debug_logs)) {
|
||||
response.debug_logs.forEach((entry) => {
|
||||
logger.debug({ backend: "swift", swift_log: entry });
|
||||
});
|
||||
}
|
||||
|
||||
resolve(response);
|
||||
} catch (parseError) {
|
||||
logger.error(
|
||||
{ parseError, stdout, exitCode },
|
||||
"Failed to parse Swift CLI JSON output, falling back to exit code mapping"
|
||||
);
|
||||
|
||||
// Determine command and app target from args for fallback error message
|
||||
const command = args[0] as "image" | "list";
|
||||
let appTarget: string | undefined;
|
||||
|
||||
// Find app target in args
|
||||
const appIndex = args.indexOf("--app");
|
||||
if (appIndex !== -1 && appIndex < args.length - 1) {
|
||||
appTarget = args[appIndex + 1];
|
||||
}
|
||||
|
||||
const { message, code } = mapExitCodeToErrorMessage(exitCode || 1, stderr, command, appTarget);
|
||||
|
||||
resolve({
|
||||
success: false,
|
||||
error: {
|
||||
message,
|
||||
code,
|
||||
details: `Failed to parse JSON response. Raw output: ${stdout.slice(0, 500)}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
process.on("error", (error: Error) => {
|
||||
cleanup();
|
||||
|
||||
if (isResolved) {
|
||||
return; // Already resolved due to timeout
|
||||
}
|
||||
isResolved = true;
|
||||
|
||||
logger.error({ error }, "Failed to spawn Swift CLI process");
|
||||
resolve({
|
||||
success: false,
|
||||
error: {
|
||||
message: `Failed to execute Swift CLI: ${error.message}`,
|
||||
code: "SWIFT_CLI_SPAWN_ERROR",
|
||||
details: error.toString(),
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function readImageAsBase64(imagePath: string): Promise<string> {
|
||||
const buffer = await fsPromises.readFile(imagePath);
|
||||
return buffer.toString("base64");
|
||||
}
|
||||
|
||||
// Simple execution function for basic commands without logger dependency
|
||||
export async function execPeekaboo(
|
||||
args: string[],
|
||||
packageRootDir: string,
|
||||
options: { expectSuccess?: boolean; timeout?: number } = {}
|
||||
): Promise<{ success: boolean; data?: string; error?: string }> {
|
||||
const cliPath = process.env.PEEKABOO_CLI_PATH || path.resolve(packageRootDir, "peekaboo");
|
||||
const timeoutMs = options.timeout || 15000; // Default 15 seconds for simple commands
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const process = spawn(cliPath, args);
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let isResolved = false;
|
||||
|
||||
// Set up timeout
|
||||
const timeoutId = setTimeout(() => {
|
||||
if (!isResolved) {
|
||||
isResolved = true;
|
||||
|
||||
// Kill the process
|
||||
try {
|
||||
process.kill("SIGTERM");
|
||||
} catch (_err) {
|
||||
// Process might already be dead
|
||||
}
|
||||
|
||||
// Give it a moment to terminate gracefully, then force kill
|
||||
setTimeout(() => {
|
||||
try {
|
||||
// Check if process is still running by trying to send signal 0
|
||||
process.kill(0);
|
||||
// If we get here, process is still alive, so force kill it
|
||||
process.kill("SIGKILL");
|
||||
} catch (_err) {
|
||||
// Process is already dead, which is what we want
|
||||
}
|
||||
}, 1000);
|
||||
|
||||
resolve({
|
||||
success: false,
|
||||
error: `Command timed out after ${timeoutMs}ms: ${cliPath} ${args.join(" ")}`,
|
||||
});
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
const cleanup = () => {
|
||||
if (timeoutId) {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
};
|
||||
|
||||
process.stdout.on("data", (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
process.stderr.on("data", (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
process.on("close", (code) => {
|
||||
cleanup();
|
||||
|
||||
if (isResolved) {
|
||||
return; // Already resolved due to timeout
|
||||
}
|
||||
isResolved = true;
|
||||
|
||||
const success = code === 0;
|
||||
if (options.expectSuccess !== false && !success) {
|
||||
resolve({ success: false, error: stderr || stdout });
|
||||
} else {
|
||||
resolve({ success, data: stdout, error: stderr });
|
||||
}
|
||||
});
|
||||
|
||||
process.on("error", (err) => {
|
||||
cleanup();
|
||||
|
||||
if (isResolved) {
|
||||
return; // Already resolved due to timeout
|
||||
}
|
||||
isResolved = true;
|
||||
|
||||
resolve({ success: false, error: err.message });
|
||||
});
|
||||
});
|
||||
}
|
||||
@ -1,14 +0,0 @@
|
||||
export function generateServerStatusString(version: string): string {
|
||||
const aiProviders = process.env.PEEKABOO_AI_PROVIDERS;
|
||||
|
||||
let providersText = "None Configured. Set PEEKABOO_AI_PROVIDERS ENV.";
|
||||
if (aiProviders?.trim()) {
|
||||
const providers = aiProviders
|
||||
.split(/[,;]/) // Support both comma and semicolon separators
|
||||
.map((p) => p.trim())
|
||||
.filter(Boolean);
|
||||
providersText = providers.join(", ");
|
||||
}
|
||||
|
||||
return `\n\nPeekaboo MCP ${version} using ${providersText}`.trim();
|
||||
}
|
||||
@ -1,297 +0,0 @@
|
||||
import type { z } from "zod";
|
||||
|
||||
// Type for accessing internal Zod definitions
|
||||
type ZodDefAny = z.ZodTypeAny & {
|
||||
_def?: {
|
||||
description?: string;
|
||||
checks?: Array<{ kind: string; value?: unknown; message?: string }>;
|
||||
type?: string;
|
||||
values?: readonly unknown[];
|
||||
innerType?: z.ZodTypeAny;
|
||||
schema?: z.ZodTypeAny;
|
||||
typeName?: string;
|
||||
defaultValue?: () => unknown;
|
||||
};
|
||||
description?: string;
|
||||
};
|
||||
|
||||
// JSON Schema type definition
|
||||
interface JSONSchema {
|
||||
type?: string | string[];
|
||||
properties?: Record<string, JSONSchema>;
|
||||
items?: JSONSchema;
|
||||
required?: string[];
|
||||
enum?: unknown[];
|
||||
const?: unknown;
|
||||
description?: string;
|
||||
default?: unknown;
|
||||
additionalProperties?: boolean | JSONSchema;
|
||||
anyOf?: JSONSchema[];
|
||||
allOf?: JSONSchema[];
|
||||
oneOf?: JSONSchema[];
|
||||
not?: JSONSchema;
|
||||
minimum?: number;
|
||||
maximum?: number;
|
||||
minLength?: number;
|
||||
maxLength?: number;
|
||||
minItems?: number;
|
||||
maxItems?: number;
|
||||
pattern?: string;
|
||||
format?: string;
|
||||
$ref?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to recursively unwrap Zod schema wrappers
|
||||
* This properly extracts descriptions from nested wrapper types
|
||||
*/
|
||||
function unwrapZodSchema(field: z.ZodTypeAny): {
|
||||
coreSchema: z.ZodTypeAny;
|
||||
description: string | undefined;
|
||||
hasDefault: boolean;
|
||||
defaultValue?: unknown;
|
||||
} {
|
||||
const zodField = field as ZodDefAny;
|
||||
const description = zodField._def?.description || zodField.description;
|
||||
let hasDefault = false;
|
||||
let defaultValue: unknown;
|
||||
|
||||
// Get typeName for reliable type checking
|
||||
const typeName = zodField._def?.typeName;
|
||||
|
||||
// Handle wrapper types
|
||||
if (typeName === "ZodOptional") {
|
||||
const zodWithDef = field as ZodDefAny;
|
||||
const inner = unwrapZodSchema(zodWithDef._def?.innerType as z.ZodTypeAny);
|
||||
return {
|
||||
coreSchema: inner.coreSchema,
|
||||
description: description || inner.description,
|
||||
hasDefault: inner.hasDefault,
|
||||
defaultValue: inner.defaultValue,
|
||||
};
|
||||
}
|
||||
|
||||
if (typeName === "ZodDefault") {
|
||||
hasDefault = true;
|
||||
const zodWithDef = field as ZodDefAny;
|
||||
defaultValue = zodWithDef._def?.defaultValue?.();
|
||||
const inner = unwrapZodSchema(zodWithDef._def?.innerType as z.ZodTypeAny);
|
||||
return {
|
||||
coreSchema: inner.coreSchema,
|
||||
description: description || inner.description,
|
||||
hasDefault: true,
|
||||
defaultValue,
|
||||
};
|
||||
}
|
||||
|
||||
if (typeName === "ZodEffects") {
|
||||
const zodWithDef = field as ZodDefAny;
|
||||
const inner = unwrapZodSchema(zodWithDef._def?.schema as z.ZodTypeAny);
|
||||
return {
|
||||
coreSchema: inner.coreSchema,
|
||||
description: description || inner.description,
|
||||
hasDefault: inner.hasDefault,
|
||||
defaultValue: inner.defaultValue,
|
||||
};
|
||||
}
|
||||
|
||||
// Return the core schema
|
||||
return { coreSchema: field, description, hasDefault, defaultValue };
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Zod schema to JSON Schema format
|
||||
* This is a robust converter for common Zod types used in the tools
|
||||
*/
|
||||
export function zodToJsonSchema(schema: z.ZodTypeAny): JSONSchema {
|
||||
const { coreSchema, description: rootDescription, hasDefault, defaultValue } = unwrapZodSchema(schema);
|
||||
|
||||
// Get the type name for reliable type checking
|
||||
const coreSchemaWithDef = coreSchema as ZodDefAny;
|
||||
const typeName = coreSchemaWithDef._def?.typeName;
|
||||
|
||||
// Handle ZodObject
|
||||
if (typeName === "ZodObject") {
|
||||
const shape = (coreSchema as ZodDefAny & { shape?: Record<string, z.ZodTypeAny> }).shape;
|
||||
const properties: Record<string, JSONSchema> = {};
|
||||
const required: string[] = [];
|
||||
|
||||
for (const [key, value] of Object.entries(shape || {})) {
|
||||
const fieldSchema = value as z.ZodTypeAny;
|
||||
const unwrapped = unwrapZodSchema(fieldSchema);
|
||||
|
||||
// Check if field is optional or has a default
|
||||
const fieldSchemaWithDef = fieldSchema as ZodDefAny;
|
||||
const fieldTypeName = fieldSchemaWithDef._def?.typeName;
|
||||
const isOptional = fieldTypeName === "ZodOptional" || fieldTypeName === "ZodDefault" || unwrapped.hasDefault;
|
||||
|
||||
// Build JSON schema for the property
|
||||
const propertySchema = zodToJsonSchema(unwrapped.coreSchema);
|
||||
|
||||
// Add description from unwrapping if not already present
|
||||
if (unwrapped.description && !propertySchema.description) {
|
||||
propertySchema.description = unwrapped.description;
|
||||
}
|
||||
|
||||
// Add default value if available
|
||||
if (unwrapped.hasDefault && unwrapped.defaultValue !== undefined) {
|
||||
propertySchema.default = unwrapped.defaultValue;
|
||||
}
|
||||
|
||||
properties[key] = propertySchema;
|
||||
|
||||
// Add to required array if not optional and no default
|
||||
if (!isOptional && !unwrapped.hasDefault) {
|
||||
required.push(key);
|
||||
}
|
||||
}
|
||||
|
||||
const jsonSchema: JSONSchema = {
|
||||
type: "object",
|
||||
properties,
|
||||
};
|
||||
|
||||
// Only add required array if it has elements
|
||||
if (required.length > 0) {
|
||||
jsonSchema.required = required;
|
||||
}
|
||||
|
||||
if (rootDescription) {
|
||||
jsonSchema.description = rootDescription;
|
||||
}
|
||||
|
||||
return jsonSchema;
|
||||
}
|
||||
|
||||
// Handle ZodArray
|
||||
if (typeName === "ZodArray") {
|
||||
const jsonSchema: JSONSchema = {
|
||||
type: "array",
|
||||
items: zodToJsonSchema(coreSchema._def.type),
|
||||
};
|
||||
|
||||
// Handle array constraints
|
||||
const zodArray = coreSchema as ZodDefAny;
|
||||
const minLength = zodArray._def?.minLength;
|
||||
if (
|
||||
minLength &&
|
||||
typeof minLength === "object" &&
|
||||
"value" in minLength &&
|
||||
typeof minLength.value === "number" &&
|
||||
minLength.value > 0
|
||||
) {
|
||||
jsonSchema.minItems = minLength.value;
|
||||
}
|
||||
|
||||
const maxLength = zodArray._def?.maxLength;
|
||||
if (maxLength && typeof maxLength === "object" && "value" in maxLength && typeof maxLength.value === "number") {
|
||||
jsonSchema.maxItems = maxLength.value;
|
||||
}
|
||||
|
||||
if (rootDescription) {
|
||||
jsonSchema.description = rootDescription;
|
||||
}
|
||||
|
||||
if (hasDefault && defaultValue !== undefined) {
|
||||
jsonSchema.default = defaultValue;
|
||||
}
|
||||
|
||||
return jsonSchema;
|
||||
}
|
||||
|
||||
// Handle ZodString
|
||||
if (typeName === "ZodString") {
|
||||
const jsonSchema: JSONSchema = { type: "string" };
|
||||
if (rootDescription) {
|
||||
jsonSchema.description = rootDescription;
|
||||
}
|
||||
if (hasDefault && defaultValue !== undefined) {
|
||||
jsonSchema.default = defaultValue;
|
||||
}
|
||||
return jsonSchema;
|
||||
}
|
||||
|
||||
// Handle ZodNumber
|
||||
if (typeName === "ZodNumber") {
|
||||
const jsonSchema: JSONSchema = { type: "number" };
|
||||
if (rootDescription) {
|
||||
jsonSchema.description = rootDescription;
|
||||
}
|
||||
// Check if it's an integer
|
||||
const checks = coreSchemaWithDef._def?.checks || [];
|
||||
if (checks.some((check: { kind: string }) => check.kind === "int")) {
|
||||
jsonSchema.type = "integer";
|
||||
}
|
||||
if (hasDefault && defaultValue !== undefined) {
|
||||
jsonSchema.default = defaultValue;
|
||||
}
|
||||
return jsonSchema;
|
||||
}
|
||||
|
||||
// Handle ZodBoolean
|
||||
if (typeName === "ZodBoolean") {
|
||||
const jsonSchema: JSONSchema = { type: "boolean" };
|
||||
if (rootDescription) {
|
||||
jsonSchema.description = rootDescription;
|
||||
}
|
||||
if (hasDefault && defaultValue !== undefined) {
|
||||
jsonSchema.default = defaultValue;
|
||||
}
|
||||
return jsonSchema;
|
||||
}
|
||||
|
||||
// Handle ZodEnum
|
||||
if (typeName === "ZodEnum") {
|
||||
const jsonSchema: JSONSchema = {
|
||||
type: "string",
|
||||
enum: coreSchema._def.values as unknown[],
|
||||
};
|
||||
if (rootDescription) {
|
||||
jsonSchema.description = rootDescription;
|
||||
}
|
||||
if (hasDefault && defaultValue !== undefined) {
|
||||
jsonSchema.default = defaultValue;
|
||||
}
|
||||
return jsonSchema;
|
||||
}
|
||||
|
||||
// Handle ZodUnion
|
||||
if (typeName === "ZodUnion") {
|
||||
const jsonSchema: JSONSchema = {
|
||||
oneOf: coreSchema._def.options.map((option: z.ZodTypeAny) => zodToJsonSchema(option)),
|
||||
};
|
||||
if (rootDescription) {
|
||||
jsonSchema.description = rootDescription;
|
||||
}
|
||||
return jsonSchema;
|
||||
}
|
||||
|
||||
// Handle ZodLiteral
|
||||
if (typeName === "ZodLiteral") {
|
||||
const value = coreSchema._def.value;
|
||||
const jsonSchema: JSONSchema = {};
|
||||
|
||||
if (typeof value === "string") {
|
||||
jsonSchema.type = "string";
|
||||
jsonSchema.const = value;
|
||||
} else if (typeof value === "number") {
|
||||
jsonSchema.type = "number";
|
||||
jsonSchema.const = value;
|
||||
} else if (typeof value === "boolean") {
|
||||
jsonSchema.type = "boolean";
|
||||
jsonSchema.const = value;
|
||||
} else {
|
||||
// For other types, just use const
|
||||
jsonSchema.const = value;
|
||||
}
|
||||
|
||||
if (rootDescription) {
|
||||
jsonSchema.description = rootDescription;
|
||||
}
|
||||
|
||||
return jsonSchema;
|
||||
}
|
||||
|
||||
// Fallback
|
||||
return { type: "string" }; // Default fallback for unknown types
|
||||
}
|
||||
@ -1,20 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Node",
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "**/*.test.ts"]
|
||||
}
|
||||
@ -1,64 +0,0 @@
|
||||
import { defineConfig } from "vitest/config";
|
||||
|
||||
// Helper function to determine if Swift binary is available
|
||||
const isSwiftBinaryAvailable = () => {
|
||||
// On macOS, we expect the Swift binary to be available
|
||||
// On other platforms (like Linux), we skip Swift-dependent tests
|
||||
return process.platform === "darwin";
|
||||
};
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
globals: true,
|
||||
environment: "node",
|
||||
// Run tests sequentially to avoid OS-level conflicts
|
||||
pool: "forks",
|
||||
poolOptions: {
|
||||
forks: {
|
||||
singleFork: true,
|
||||
},
|
||||
},
|
||||
include: [
|
||||
"../tests/unit/**/*.test.ts",
|
||||
// Include all integration tests
|
||||
"../tests/integration/**/*.test.ts",
|
||||
// Only include E2E tests if running on macOS and not in CI
|
||||
...(process.platform === "darwin" && !process.env.CI
|
||||
? ["../peekaboo-cli/tests/e2e/**/*.test.ts"]
|
||||
: []
|
||||
),
|
||||
],
|
||||
exclude: [
|
||||
"**/node_modules/**",
|
||||
"**/dist/**",
|
||||
// Exclude E2E tests in CI or non-macOS environments
|
||||
...(process.platform !== "darwin" || process.env.CI
|
||||
? ["../peekaboo-cli/tests/e2e/**/*.test.ts"]
|
||||
: []
|
||||
),
|
||||
],
|
||||
// Set reasonable timeouts to prevent hanging
|
||||
testTimeout: 60000, // 60 seconds for individual tests
|
||||
hookTimeout: 30000, // 30 seconds for setup/teardown hooks
|
||||
coverage: {
|
||||
provider: "v8",
|
||||
reporter: ["text", "lcov", "html"],
|
||||
reportsDirectory: "./coverage",
|
||||
include: ["src/**/*.ts"],
|
||||
exclude: [
|
||||
"src/**/*.d.ts",
|
||||
"src/index.ts", // Assuming this is the main entry point
|
||||
],
|
||||
},
|
||||
// Global setup for platform-specific test skipping
|
||||
setupFiles: ["../tests/setup.ts"],
|
||||
// alias: {
|
||||
// '^(\.{1,2}/.*)\.js$': '$1',
|
||||
// },
|
||||
},
|
||||
// resolve: {
|
||||
// alias: [
|
||||
// { find: /^(\..*)\.js$/, replacement: '$1' },
|
||||
// ],
|
||||
// },
|
||||
});
|
||||
@ -1,6 +1,8 @@
|
||||
# Peekaboo Swift MCP Server Migration Plan
|
||||
# Peekaboo Swift MCP Server Implementation
|
||||
|
||||
This document outlines the comprehensive plan to migrate Peekaboo from a TypeScript-based MCP server to a pure Swift implementation with a minimal Node.js restart wrapper for npm distribution.
|
||||
> **✅ UPDATE (2025-01-31)**: Migration complete! Peekaboo now runs as a pure Swift MCP server. The TypeScript server has been removed.
|
||||
|
||||
This document describes the Swift MCP server implementation in Peekaboo, which provides all automation tools through a native Swift server using the official MCP SDK (v0.9.0).
|
||||
|
||||
## Table of Contents
|
||||
|
||||
@ -15,55 +17,37 @@ This document outlines the comprehensive plan to migrate Peekaboo from a TypeScr
|
||||
|
||||
## Executive Summary
|
||||
|
||||
### Goals
|
||||
- Eliminate TypeScript/Node.js runtime dependency for core functionality
|
||||
- Improve performance by ~10x through direct API calls
|
||||
- Maintain npm distribution compatibility with restart wrapper
|
||||
- Enable Peekaboo to act as both MCP server and client
|
||||
- Preserve all existing functionality and user experience
|
||||
|
||||
### Timeline
|
||||
- **Total Duration**: 10-15 days
|
||||
- **MVP (Basic tools)**: 5-7 days
|
||||
- **Full parity**: 10-12 days
|
||||
- **Testing & Polish**: 2-3 days
|
||||
### Achievements
|
||||
- ✅ Eliminated TypeScript/Node.js runtime dependency
|
||||
- ✅ ~10x performance improvement through direct API calls
|
||||
- ✅ All 22 MCP tools implemented in Swift
|
||||
- ✅ Type-safe implementation with Swift 6
|
||||
- ✅ Direct PeekabooCore API integration
|
||||
|
||||
### Key Benefits
|
||||
- Single binary deployment (except npm wrapper)
|
||||
- Single binary deployment
|
||||
- Type-safe Swift implementation throughout
|
||||
- Direct PeekabooCore API access
|
||||
- Direct PeekabooCore API access (no subprocess spawning)
|
||||
- Reduced latency and memory usage
|
||||
- Unified codebase in Swift
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Current Architecture
|
||||
### Current Architecture (Implemented)
|
||||
```
|
||||
┌─────────────┐ ┌──────────────┐ ┌─────────────┐
|
||||
│ MCP Client │────▶│ TypeScript │────▶│ Swift CLI │
|
||||
│ (Claude) │stdio│ Server │spawn│ (Binary) │
|
||||
└─────────────┘ └──────────────┘ └─────────────┘
|
||||
│ │
|
||||
▼ ▼
|
||||
┌──────────────┐ ┌─────────────┐
|
||||
│ Zod Schemas │ │PeekabooCore │
|
||||
│ JSON Schema │ │ APIs │
|
||||
└──────────────┘ └─────────────┘
|
||||
┌─────────────┐ ┌──────────────┐
|
||||
│ MCP Client │────▶│ Swift MCP │
|
||||
│ (Claude) │stdio│ Server │
|
||||
└─────────────┘ └──────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────┐
|
||||
│PeekabooCore │
|
||||
│Direct APIs │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
### Target Architecture
|
||||
```
|
||||
┌─────────────┐ ┌──────────────┐ ┌─────────────┐
|
||||
│ MCP Client │────▶│Node Wrapper │────▶│ Swift MCP │
|
||||
│ (Claude) │stdio│ (Restart) │stdio│ Server │
|
||||
└─────────────┘ └──────────────┘ └─────────────┘
|
||||
│ │
|
||||
│ ▼
|
||||
┌──────────────┐ ┌─────────────┐
|
||||
│Health Check │ │PeekabooCore │
|
||||
│Auto-restart │ │Direct APIs │
|
||||
└──────────────┘ └─────────────┘
|
||||
```
|
||||
The Swift MCP server directly integrates with PeekabooCore, eliminating the need for TypeScript middleware and subprocess spawning.
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
@ -80,8 +64,7 @@ targets: [
|
||||
.executableTarget(
|
||||
name: "peekaboo",
|
||||
dependencies: [
|
||||
.product(name: "MCPServer", package: "swift-sdk"),
|
||||
.product(name: "MCPClient", package: "swift-sdk"),
|
||||
.product(name: "MCP", package: "swift-sdk"),
|
||||
"PeekabooCore",
|
||||
"AXorcist"
|
||||
]
|
||||
@ -124,12 +107,11 @@ struct Serve: AsyncParsableCommand {
|
||||
```swift
|
||||
// Core/PeekabooCore/Sources/PeekabooCore/MCP/PeekabooMCPServer.swift
|
||||
import Foundation
|
||||
import MCPServer
|
||||
import MCP
|
||||
import os.log
|
||||
|
||||
@MainActor
|
||||
public class PeekabooMCPServer {
|
||||
private let server: MCPServer
|
||||
public actor PeekabooMCPServer {
|
||||
private let server: Server
|
||||
private let toolRegistry: MCPToolRegistry
|
||||
private let logger: Logger
|
||||
|
||||
@ -137,13 +119,14 @@ public class PeekabooMCPServer {
|
||||
self.logger = Logger(subsystem: "boo.peekaboo.mcp", category: "server")
|
||||
self.toolRegistry = MCPToolRegistry()
|
||||
|
||||
self.server = try MCPServer(
|
||||
info: ServerInfo(
|
||||
name: "peekaboo-mcp",
|
||||
version: Version.current.string
|
||||
),
|
||||
capabilities: ServerCapabilities(
|
||||
tools: ToolsCapability()
|
||||
// Initialize the official MCP Server
|
||||
self.server = Server(
|
||||
name: "peekaboo-mcp",
|
||||
version: Version.current.string,
|
||||
capabilities: Server.Capabilities(
|
||||
tools: .init(listChanged: true),
|
||||
resources: .init(subscribe: false, listChanged: false),
|
||||
prompts: .init(listChanged: false)
|
||||
)
|
||||
)
|
||||
|
||||
@ -152,16 +135,39 @@ public class PeekabooMCPServer {
|
||||
}
|
||||
|
||||
private func setupHandlers() {
|
||||
server.setRequestHandler(ListToolsRequest.self) { [weak self] _ in
|
||||
guard let self = self else { return ListToolsResponse(tools: []) }
|
||||
return ListToolsResponse(tools: self.toolRegistry.allTools())
|
||||
// Tool list handler
|
||||
server.withMethodHandler(ListTools.self) { [weak self] _ in
|
||||
guard let self = self else { return ListTools.Response(tools: []) }
|
||||
|
||||
let tools = await self.toolRegistry.allTools().map { tool in
|
||||
Tool(
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
inputSchema: tool.inputSchema
|
||||
)
|
||||
}
|
||||
|
||||
return ListTools.Response(tools: tools)
|
||||
}
|
||||
|
||||
server.setRequestHandler(CallToolRequest.self) { [weak self] request in
|
||||
guard let self = self else {
|
||||
throw MCPError.serverError("Server deallocated")
|
||||
// Tool call handler
|
||||
server.withMethodHandler(CallTool.self) { [weak self] request in
|
||||
guard let self = self else {
|
||||
throw ServerError(code: ErrorCode.internalError, message: "Server deallocated")
|
||||
}
|
||||
return try await self.handleToolCall(request)
|
||||
|
||||
guard let tool = await self.toolRegistry.tool(named: request.name) else {
|
||||
throw ServerError(code: ErrorCode.invalidParams, message: "Tool '\(request.name)' not found")
|
||||
}
|
||||
|
||||
let arguments = ToolArguments(raw: request.arguments ?? [:])
|
||||
let response = try await tool.execute(arguments: arguments)
|
||||
|
||||
return CallTool.Response(
|
||||
content: response.content,
|
||||
isError: response.isError,
|
||||
meta: response.meta
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -171,22 +177,22 @@ public class PeekabooMCPServer {
|
||||
"version": "\(Version.current.string)"
|
||||
])
|
||||
|
||||
let serverTransport: any Transport
|
||||
|
||||
switch transport {
|
||||
case .stdio:
|
||||
let transport = StdioServerTransport()
|
||||
try await server.connect(transport)
|
||||
try await server.run()
|
||||
serverTransport = StdioTransport(logger: logger)
|
||||
|
||||
case .http:
|
||||
let transport = HTTPServerTransport(port: port)
|
||||
try await server.connect(transport)
|
||||
try await server.run()
|
||||
// Note: HTTP transport would need custom implementation
|
||||
// as the SDK only provides HTTPClientTransport
|
||||
throw MCPError.notImplemented("HTTP server transport not yet implemented")
|
||||
|
||||
case .sse:
|
||||
let transport = SSEServerTransport(port: port)
|
||||
try await server.connect(transport)
|
||||
try await server.run()
|
||||
throw MCPError.notImplemented("SSE server transport not yet implemented")
|
||||
}
|
||||
|
||||
try await server.start(transport: serverTransport)
|
||||
}
|
||||
}
|
||||
```
|
||||
@ -197,12 +203,12 @@ public class PeekabooMCPServer {
|
||||
```swift
|
||||
// Core/PeekabooCore/Sources/PeekabooCore/MCP/MCPTool.swift
|
||||
import Foundation
|
||||
import MCPServer
|
||||
import MCP
|
||||
|
||||
public protocol MCPTool {
|
||||
var name: String { get }
|
||||
var description: String { get }
|
||||
var inputSchema: JSONSchema { get }
|
||||
var inputSchema: Value { get }
|
||||
|
||||
func execute(arguments: ToolArguments) async throws -> ToolResponse
|
||||
}
|
||||
@ -210,11 +216,43 @@ public protocol MCPTool {
|
||||
public struct ToolArguments {
|
||||
private let raw: [String: Any]
|
||||
|
||||
public init(raw: [String: Any]) {
|
||||
self.raw = raw
|
||||
}
|
||||
|
||||
public func decode<T: Decodable>(_ type: T.Type) throws -> T {
|
||||
let data = try JSONSerialization.data(withJSONObject: raw)
|
||||
return try JSONDecoder().decode(type, from: data)
|
||||
}
|
||||
}
|
||||
|
||||
public struct ToolResponse {
|
||||
public let content: [Content]
|
||||
public let isError: Bool
|
||||
public let meta: [String: Any]?
|
||||
|
||||
public init(content: [Content], isError: Bool = false, meta: [String: Any]? = nil) {
|
||||
self.content = content
|
||||
self.isError = isError
|
||||
self.meta = meta
|
||||
}
|
||||
|
||||
public static func text(_ text: String, meta: [String: Any]? = nil) -> ToolResponse {
|
||||
ToolResponse(
|
||||
content: [.text(text)],
|
||||
isError: false,
|
||||
meta: meta
|
||||
)
|
||||
}
|
||||
|
||||
public static func error(_ message: String) -> ToolResponse {
|
||||
ToolResponse(
|
||||
content: [.text(message)],
|
||||
isError: true,
|
||||
meta: nil
|
||||
)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### 2.2 Image Tool Implementation
|
||||
@ -237,23 +275,25 @@ public struct ImageTool: MCPTool {
|
||||
"""
|
||||
}
|
||||
|
||||
public var inputSchema: JSONSchema {
|
||||
.object(
|
||||
public var inputSchema: Value {
|
||||
SchemaBuilder.object(
|
||||
properties: [
|
||||
"path": .string(description: "Optional. Base absolute path for saving the image."),
|
||||
"format": .enum(
|
||||
["png", "jpg", "data"],
|
||||
description: "Optional. Output format."
|
||||
"path": SchemaBuilder.string(
|
||||
description: "Optional. Base absolute path for saving the image."
|
||||
),
|
||||
"app_target": .string(
|
||||
"format": SchemaBuilder.string(
|
||||
description: "Optional. Output format.",
|
||||
enum: ["png", "jpg", "data"]
|
||||
),
|
||||
"app_target": SchemaBuilder.string(
|
||||
description: "Optional. Specifies the capture target."
|
||||
),
|
||||
"question": .string(
|
||||
"question": SchemaBuilder.string(
|
||||
description: "Optional. If provided, the captured image will be analyzed."
|
||||
),
|
||||
"capture_focus": .enum(
|
||||
["background", "auto", "foreground"],
|
||||
"capture_focus": SchemaBuilder.string(
|
||||
description: "Optional. Focus behavior.",
|
||||
enum: ["background", "auto", "foreground"],
|
||||
default: "auto"
|
||||
)
|
||||
],
|
||||
@ -289,16 +329,15 @@ public struct ImageTool: MCPTool {
|
||||
// Return capture result
|
||||
if input.format == "data" {
|
||||
let imageData = try Data(contentsOf: URL(fileURLWithPath: result.savedFiles.first!.path))
|
||||
return .data(
|
||||
imageData.base64EncodedString(),
|
||||
mimeType: "image/png",
|
||||
metadata: ["savedFiles": result.savedFiles.map { $0.path }]
|
||||
return ToolResponse(
|
||||
content: [.image(data: imageData, mimeType: "image/png")],
|
||||
meta: ["savedFiles": result.savedFiles.map { $0.path }]
|
||||
)
|
||||
}
|
||||
|
||||
return .text(
|
||||
return ToolResponse.text(
|
||||
buildImageSummary(result),
|
||||
metadata: ["savedFiles": result.savedFiles.map { $0.path }]
|
||||
meta: ["savedFiles": result.savedFiles.map { $0.path }]
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -368,58 +407,75 @@ public class MCPToolRegistry {
|
||||
|
||||
### Phase 3: Schema Generation (Days 6-7)
|
||||
|
||||
#### 3.1 Codable to JSON Schema
|
||||
#### 3.1 JSON Schema with MCP Value Type
|
||||
```swift
|
||||
// Core/PeekabooCore/Sources/PeekabooCore/MCP/Schema/JSONSchemaGenerator.swift
|
||||
// Core/PeekabooCore/Sources/PeekabooCore/MCP/Schema/SchemaBuilder.swift
|
||||
import Foundation
|
||||
import MCP
|
||||
|
||||
public enum JSONSchema {
|
||||
case object(properties: [String: JSONSchema], required: [String] = [])
|
||||
case array(items: JSONSchema)
|
||||
case string(description: String? = nil)
|
||||
case number(description: String? = nil)
|
||||
case integer(description: String? = nil)
|
||||
case boolean(description: String? = nil)
|
||||
case `enum`([String], description: String? = nil, default: String? = nil)
|
||||
|
||||
public func encode() -> [String: Any] {
|
||||
switch self {
|
||||
case .object(let properties, let required):
|
||||
var schema: [String: Any] = ["type": "object"]
|
||||
schema["properties"] = properties.mapValues { $0.encode() }
|
||||
if !required.isEmpty {
|
||||
schema["required"] = required
|
||||
}
|
||||
return schema
|
||||
|
||||
case .array(let items):
|
||||
return [
|
||||
"type": "array",
|
||||
"items": items.encode()
|
||||
]
|
||||
|
||||
case .string(let description):
|
||||
var schema: [String: Any] = ["type": "string"]
|
||||
if let desc = description {
|
||||
schema["description"] = desc
|
||||
}
|
||||
return schema
|
||||
|
||||
case .enum(let values, let description, let defaultValue):
|
||||
var schema: [String: Any] = [
|
||||
"type": "string",
|
||||
"enum": values
|
||||
]
|
||||
if let desc = description {
|
||||
schema["description"] = desc
|
||||
}
|
||||
if let def = defaultValue {
|
||||
schema["default"] = def
|
||||
}
|
||||
return schema
|
||||
|
||||
// ... other cases
|
||||
public struct SchemaBuilder {
|
||||
/// Build a JSON Schema using MCP's Value type
|
||||
public static func object(
|
||||
properties: [String: Value],
|
||||
required: [String] = [],
|
||||
description: String? = nil
|
||||
) -> Value {
|
||||
var schema: [String: Value] = [
|
||||
"type": .string("object"),
|
||||
"properties": .object(properties)
|
||||
]
|
||||
|
||||
if !required.isEmpty {
|
||||
schema["required"] = .array(required.map { .string($0) })
|
||||
}
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
|
||||
public static func string(
|
||||
description: String? = nil,
|
||||
enum values: [String]? = nil,
|
||||
default: String? = nil
|
||||
) -> Value {
|
||||
var schema: [String: Value] = ["type": .string("string")]
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
if let values = values {
|
||||
schema["enum"] = .array(values.map { .string($0) })
|
||||
}
|
||||
|
||||
if let defaultValue = `default` {
|
||||
schema["default"] = .string(defaultValue)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
|
||||
public static func boolean(description: String? = nil) -> Value {
|
||||
var schema: [String: Value] = ["type": .string("boolean")]
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
|
||||
public static func number(description: String? = nil) -> Value {
|
||||
var schema: [String: Value] = ["type": .string("number")]
|
||||
|
||||
if let desc = description {
|
||||
schema["description"] = .string(desc)
|
||||
}
|
||||
|
||||
return .object(schema)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
40
package.json
40
package.json
@ -4,54 +4,24 @@
|
||||
"private": true,
|
||||
"description": "Peekaboo - Lightning-fast macOS Screenshots & GUI Automation",
|
||||
"comments": [
|
||||
"This package.json is for convenience scripts only.",
|
||||
"All dependencies should be in Server/package.json.",
|
||||
"DO NOT run 'npm install' in the root directory.",
|
||||
"Run 'npm install' from the Server/ directory instead."
|
||||
"This package.json is for build scripts only.",
|
||||
"The TypeScript server has been removed - all MCP functionality is now in Swift."
|
||||
],
|
||||
"scripts": {
|
||||
"build": "cd Server && npm run build",
|
||||
"build:swift": "./scripts/build-swift-arm.sh",
|
||||
"build:swift:all": "./scripts/build-swift-universal.sh",
|
||||
"build:all": "npm run build:swift && npm run build",
|
||||
"start": "cd Server && npm start",
|
||||
"dev": "cd Server && npm run dev",
|
||||
"clean": "cd Server && npm run clean",
|
||||
"test": "cd Server && npm test",
|
||||
"test:safe": "cd Server && npm run test:safe",
|
||||
"test:full": "cd Server && npm run test:full",
|
||||
"test:watch": "cd Server && npm run test:watch",
|
||||
"test:watch:full": "cd Server && npm run test:watch:full",
|
||||
"test:coverage": "cd Server && npm run test:coverage",
|
||||
"test:coverage:full": "cd Server && npm run test:coverage:full",
|
||||
"test:unit": "cd Server && npm run test:unit",
|
||||
"test:unit:full": "cd Server && npm run test:unit:full",
|
||||
"test:typescript": "cd Server && npm run test:typescript",
|
||||
"test:typescript:watch": "cd Server && npm run test:typescript:watch",
|
||||
"build": "npm run build:swift",
|
||||
"test:swift": "cd Apps/CLI && swift test --parallel --skip \"LocalIntegrationTests|ScreenshotValidationTests|ApplicationFinderTests|WindowManagerTests\"",
|
||||
"test:integration": "npm run build && npm run test:swift && cd Server && npm run test:integration",
|
||||
"test:integration:full": "npm run build && npm run test:swift && cd Server && npm run test:integration:full",
|
||||
"test:all": "npm run test:integration:full",
|
||||
"lint": "cd Server && npm run lint",
|
||||
"lint:fix": "cd Server && npm run lint:fix",
|
||||
"format": "cd Server && npm run format",
|
||||
"format:check": "cd Server && npm run format:check",
|
||||
"typecheck": "cd Server && npm run typecheck",
|
||||
"check": "cd Server && npm run check",
|
||||
"check:fix": "cd Server && npm run check:fix",
|
||||
"test": "npm run test:swift",
|
||||
"lint:swift": "cd Apps/CLI && swiftlint",
|
||||
"format:swift": "cd Apps/CLI && swiftformat .",
|
||||
"prepare-release": "node scripts/prepare-release.js",
|
||||
"inspector": "cd Server && npm run inspector",
|
||||
"poltergeist:start": "./scripts/poltergeist-wrapper.sh start",
|
||||
"poltergeist:haunt": "./scripts/poltergeist-wrapper.sh haunt",
|
||||
"poltergeist:stop": "./scripts/poltergeist-wrapper.sh stop",
|
||||
"poltergeist:rest": "./scripts/poltergeist-wrapper.sh rest",
|
||||
"poltergeist:status": "./scripts/poltergeist-wrapper.sh status",
|
||||
"poltergeist:logs": "./scripts/poltergeist-wrapper.sh logs",
|
||||
"mcp:build": "cd Server && npm run build:all",
|
||||
"mcp:publish": "cd Server && npm publish",
|
||||
"mcp:publish:beta": "cd Server && npm publish --tag beta"
|
||||
"poltergeist:logs": "./scripts/poltergeist-wrapper.sh logs"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
||||
@ -17,8 +17,8 @@ echo "🧹 Cleaning previous build artifacts..."
|
||||
rm -rf "$SWIFT_PROJECT_PATH/.build"
|
||||
rm -f "$FINAL_BINARY_PATH.tmp"
|
||||
|
||||
echo "📦 Reading version from package.json..."
|
||||
VERSION=$(node -p "require('$PROJECT_ROOT/Server/package.json').version")
|
||||
echo "📦 Reading version from version.json..."
|
||||
VERSION=$(node -p "require('$PROJECT_ROOT/version.json').version")
|
||||
echo "Version: $VERSION"
|
||||
|
||||
echo "💉 Injecting version into Swift code..."
|
||||
|
||||
@ -17,8 +17,8 @@ if [[ "$CLEAN_BUILD" == "true" ]]; then
|
||||
(cd "$SWIFT_PROJECT_PATH" && swift package reset 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
echo "📦 Reading version from package.json..."
|
||||
VERSION=$(node -p "require('$PROJECT_ROOT/Server/package.json').version" 2>/dev/null || echo "3.0.0-dev")
|
||||
echo "📦 Reading version from version.json..."
|
||||
VERSION=$(node -p "require('$PROJECT_ROOT/version.json').version" 2>/dev/null || echo "3.0.0-dev")
|
||||
|
||||
echo "💉 Injecting version into Swift code..."
|
||||
VERSION_SWIFT_PATH="$SWIFT_PROJECT_PATH/Sources/peekaboo/Version.swift"
|
||||
|
||||
@ -20,8 +20,8 @@ echo "🧹 Cleaning previous build artifacts..."
|
||||
rm -rf "$SWIFT_PROJECT_PATH/.build"
|
||||
rm -f "$ARM64_BINARY_TEMP" "$X86_64_BINARY_TEMP" "$FINAL_BINARY_PATH.tmp"
|
||||
|
||||
echo "📦 Reading version from package.json..."
|
||||
VERSION=$(node -p "require('$PROJECT_ROOT/Server/package.json').version")
|
||||
echo "📦 Reading version from version.json..."
|
||||
VERSION=$(node -p "require('$PROJECT_ROOT/version.json').version")
|
||||
echo "Version: $VERSION"
|
||||
|
||||
echo "💉 Injecting version into Swift code..."
|
||||
|
||||
@ -1,229 +1,36 @@
|
||||
#!/bin/bash
|
||||
# Smart CLI Wrapper for Peekaboo
|
||||
# Automatically waits for Poltergeist rebuilds to complete before running
|
||||
# Smart CLI Wrapper for Peekaboo - Now Powered by pgrun
|
||||
# This wrapper uses Poltergeist's pgrun for superior build management and diagnostics
|
||||
|
||||
# Get the directory of this script
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
BINARY_PATH="$PROJECT_ROOT/peekaboo"
|
||||
BUILD_LOCK="/tmp/peekaboo-cli-build.lock"
|
||||
BUILD_STATUS="/tmp/peekaboo-cli-build-status.json"
|
||||
RECOVERY_SIGNAL="/tmp/peekaboo-cli-build-recovery"
|
||||
MAX_WAIT=300 # Maximum seconds to wait for build (5 minutes)
|
||||
DEBUG="${PEEKABOO_WAIT_DEBUG:-false}"
|
||||
|
||||
# Debug logging
|
||||
debug_log() {
|
||||
if [ "$DEBUG" = "true" ]; then
|
||||
echo "[peekaboo-wait] $1" >&2
|
||||
fi
|
||||
}
|
||||
# Path to pgrun
|
||||
PGRUN_PATH="/Users/steipete/Projects/poltergeist/dist/pgrun.js"
|
||||
|
||||
# Function to check if binary is newer than all Swift sources
|
||||
is_binary_fresh() {
|
||||
if [ ! -f "$BINARY_PATH" ]; then
|
||||
debug_log "Binary not found at $BINARY_PATH"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Get binary modification time
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
BINARY_TIME=$(stat -f "%m" "$BINARY_PATH" 2>/dev/null)
|
||||
else
|
||||
BINARY_TIME=$(stat -c "%Y" "$BINARY_PATH" 2>/dev/null)
|
||||
fi
|
||||
|
||||
debug_log "Binary modification time: $BINARY_TIME"
|
||||
|
||||
# Find newest source file modification time
|
||||
NEWEST_SOURCE=0
|
||||
NEWEST_FILE=""
|
||||
while IFS= read -r -d '' file; do
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
FILE_TIME=$(stat -f "%m" "$file" 2>/dev/null)
|
||||
else
|
||||
FILE_TIME=$(stat -c "%Y" "$file" 2>/dev/null)
|
||||
fi
|
||||
if [ "$FILE_TIME" -gt "$NEWEST_SOURCE" ]; then
|
||||
NEWEST_SOURCE=$FILE_TIME
|
||||
NEWEST_FILE="$file"
|
||||
fi
|
||||
done < <(find "$PROJECT_ROOT/Core/PeekabooCore/Sources" "$PROJECT_ROOT/Core/AXorcist/Sources" "$PROJECT_ROOT/Apps/CLI/Sources" -name "*.swift" -type f -print0 2>/dev/null)
|
||||
|
||||
debug_log "Newest source file: $NEWEST_FILE (time: $NEWEST_SOURCE)"
|
||||
|
||||
# Binary is fresh if it's newer than all source files
|
||||
if [ "$BINARY_TIME" -ge "$NEWEST_SOURCE" ]; then
|
||||
debug_log "Binary is fresh"
|
||||
return 0
|
||||
else
|
||||
debug_log "Binary is stale (older than source files)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check if a build is running
|
||||
is_build_running() {
|
||||
if [ -f "$BUILD_LOCK" ]; then
|
||||
PID=$(cat "$BUILD_LOCK" 2>/dev/null)
|
||||
if [ -n "$PID" ] && ps -p "$PID" > /dev/null 2>&1; then
|
||||
return 0
|
||||
else
|
||||
# Stale lock file
|
||||
debug_log "Removing stale build lock (PID $PID not running)"
|
||||
rm -f "$BUILD_LOCK"
|
||||
fi
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
|
||||
# Function to check build status from status file
|
||||
check_build_status_file() {
|
||||
if [ ! -f "$BUILD_STATUS" ]; then
|
||||
debug_log "No build status file found"
|
||||
return 2 # Unknown status
|
||||
fi
|
||||
|
||||
# Read status file
|
||||
local status=$(grep '"status"' "$BUILD_STATUS" 2>/dev/null | cut -d'"' -f4)
|
||||
local timestamp=$(grep '"timestamp"' "$BUILD_STATUS" 2>/dev/null | cut -d'"' -f4)
|
||||
local error_summary=$(grep '"error_summary"' "$BUILD_STATUS" 2>/dev/null | cut -d'"' -f4)
|
||||
|
||||
# Check age of status
|
||||
if [ -n "$timestamp" ]; then
|
||||
# Convert ISO timestamp to epoch
|
||||
local status_epoch=$(date -u -j -f "%Y-%m-%dT%H:%M:%SZ" "$timestamp" "+%s" 2>/dev/null || date -u -d "$timestamp" "+%s" 2>/dev/null || echo "0")
|
||||
local current_epoch=$(date +%s)
|
||||
local age=$((current_epoch - status_epoch))
|
||||
|
||||
# If status is older than 5 minutes, consider it stale
|
||||
if [ $age -gt 300 ]; then
|
||||
debug_log "Build status is stale (${age}s old)"
|
||||
return 2 # Unknown/stale status
|
||||
fi
|
||||
fi
|
||||
|
||||
case "$status" in
|
||||
"building")
|
||||
debug_log "Build status: currently building"
|
||||
return 3 # Building
|
||||
;;
|
||||
"success")
|
||||
debug_log "Build status: success"
|
||||
return 0 # Success
|
||||
;;
|
||||
"failed")
|
||||
debug_log "Build status: failed - $error_summary"
|
||||
echo "❌ POLTERGEIST BUILD FAILED" >&2
|
||||
echo "" >&2
|
||||
if [ -n "$error_summary" ]; then
|
||||
echo "Error: $error_summary" >&2
|
||||
else
|
||||
echo "Build failed. Check 'npm run poltergeist:logs' for details." >&2
|
||||
fi
|
||||
echo "" >&2
|
||||
echo "🔧 TO FIX: Run 'npm run build:swift' to see and fix the compilation errors." >&2
|
||||
echo " After fixing, the wrapper will automatically use the new binary." >&2
|
||||
echo "" >&2
|
||||
return 1 # Failed
|
||||
;;
|
||||
*)
|
||||
debug_log "Build status: unknown ($status)"
|
||||
return 2 # Unknown
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Main logic
|
||||
debug_log "Starting peekaboo-wait wrapper"
|
||||
debug_log "Binary path: $BINARY_PATH"
|
||||
debug_log "Build lock: $BUILD_LOCK"
|
||||
|
||||
# First, check if binary is already fresh
|
||||
if is_binary_fresh; then
|
||||
debug_log "Binary is fresh, executing immediately"
|
||||
exec "$BINARY_PATH" "$@"
|
||||
fi
|
||||
|
||||
# Binary is stale, check build status first
|
||||
debug_log "Binary is stale, checking build status"
|
||||
|
||||
# Check if there's a recent build failure
|
||||
check_build_status_file
|
||||
status_result=$?
|
||||
|
||||
if [ $status_result -eq 1 ]; then
|
||||
# Build failed - exit with special code to trigger manual rebuild
|
||||
exit 42 # Special exit code for build failure
|
||||
fi
|
||||
|
||||
# Check for ongoing build
|
||||
if ! is_build_running; then
|
||||
# No build running, but binary is stale
|
||||
if [ $status_result -eq 0 ]; then
|
||||
# Status says success but binary is stale - might be a race condition
|
||||
debug_log "Status shows success but binary is stale, proceeding anyway"
|
||||
else
|
||||
# Unknown status or stale - Poltergeist should pick it up
|
||||
echo "⏳ Binary is stale. Waiting for Poltergeist to detect changes and rebuild..." >&2
|
||||
echo " If this takes too long, check: npm run poltergeist:status" >&2
|
||||
|
||||
# Give Poltergeist a moment to detect the stale binary
|
||||
sleep 2
|
||||
fi
|
||||
fi
|
||||
|
||||
wait_count=0
|
||||
while is_build_running && [ $wait_count -lt $MAX_WAIT ]; do
|
||||
if [ $wait_count -eq 0 ]; then
|
||||
echo "🔨 Poltergeist is rebuilding the Swift CLI..." >&2
|
||||
fi
|
||||
sleep 1
|
||||
((wait_count++))
|
||||
|
||||
# Show progress with more helpful messages
|
||||
if [ $((wait_count % 10)) -eq 0 ] && [ $wait_count -gt 0 ]; then
|
||||
remaining=$((MAX_WAIT - wait_count))
|
||||
echo " Still building... (${wait_count}s elapsed, max ${remaining}s remaining)" >&2
|
||||
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $wait_count -ge $MAX_WAIT ]; then
|
||||
echo "⚠️ Build timeout reached (${MAX_WAIT}s / 5 minutes)." >&2
|
||||
echo " Check build status with: npm run poltergeist:status" >&2
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Final checks after waiting
|
||||
debug_log "Performing final checks after wait"
|
||||
|
||||
# Check build status file again
|
||||
check_build_status_file
|
||||
final_status=$?
|
||||
|
||||
if [ $final_status -eq 1 ]; then
|
||||
# Build failed - exit with special code
|
||||
exit 42 # Special exit code for build failure
|
||||
fi
|
||||
|
||||
# Final freshness check
|
||||
if is_binary_fresh; then
|
||||
debug_log "Binary is now fresh after waiting"
|
||||
else
|
||||
debug_log "Binary might still be stale, but proceeding"
|
||||
# If the binary exists but is stale, Poltergeist should pick it up
|
||||
# We'll run it anyway to avoid blocking
|
||||
fi
|
||||
|
||||
# Execute the binary if it exists
|
||||
if [ -f "$BINARY_PATH" ]; then
|
||||
debug_log "Executing: $BINARY_PATH $*"
|
||||
exec "$BINARY_PATH" "$@"
|
||||
else
|
||||
echo "❌ Binary not found at: $BINARY_PATH" >&2
|
||||
echo " This usually means the build failed." >&2
|
||||
echo " Check: npm run poltergeist:logs" >&2
|
||||
# Check if pgrun is available
|
||||
if [ ! -f "$PGRUN_PATH" ]; then
|
||||
echo "❌ pgrun not found at: $PGRUN_PATH" >&2
|
||||
echo " This wrapper requires Poltergeist to be available." >&2
|
||||
echo "🔧 Please check that Poltergeist is installed and built." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Map debug environment variable to pgrun verbose flag
|
||||
PGRUN_ARGS=()
|
||||
if [ "${PEEKABOO_WAIT_DEBUG:-false}" = "true" ]; then
|
||||
PGRUN_ARGS+=("--verbose")
|
||||
fi
|
||||
|
||||
# Change to project directory to ensure correct context
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Create a symlink to the peekaboo binary for pgrun to find
|
||||
# This works around the mismatch between target name (peekaboo-cli) and binary name (peekaboo)
|
||||
if [ ! -e "$PROJECT_ROOT/peekaboo-cli" ] && [ -e "$PROJECT_ROOT/peekaboo" ]; then
|
||||
ln -sf peekaboo "$PROJECT_ROOT/peekaboo-cli"
|
||||
fi
|
||||
|
||||
# Execute pgrun with peekaboo-cli target and all arguments
|
||||
exec node "$PGRUN_PATH" peekaboo-cli "${PGRUN_ARGS[@]}" "$@"
|
||||
340
scripts/peekaboo-wait.sh.original
Executable file
340
scripts/peekaboo-wait.sh.original
Executable file
@ -0,0 +1,340 @@
|
||||
#!/bin/bash
|
||||
# Smart CLI Wrapper for Peekaboo
|
||||
# Automatically waits for Poltergeist rebuilds to complete before running
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
BINARY_PATH="$PROJECT_ROOT/peekaboo"
|
||||
BUILD_LOCK="/tmp/peekaboo-cli-build.lock"
|
||||
BUILD_STATUS="/tmp/peekaboo-cli-build-status.json"
|
||||
RECOVERY_SIGNAL="/tmp/peekaboo-cli-build-recovery"
|
||||
MAX_WAIT=300 # Maximum seconds to wait for build (5 minutes)
|
||||
DEBUG="${PEEKABOO_WAIT_DEBUG:-false}"
|
||||
|
||||
# Poltergeist pgrun integration
|
||||
PGRUN_PATH="/Users/steipete/Projects/poltergeist/dist/pgrun.js"
|
||||
|
||||
# Debug logging
|
||||
debug_log() {
|
||||
if [ "$DEBUG" = "true" ]; then
|
||||
echo "[peekaboo-wait] $1" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# Run pgrun health check and provide specific diagnostics
|
||||
run_pgrun_health_check() {
|
||||
debug_log "Running pgrun health check..."
|
||||
|
||||
# Check if pgrun is available
|
||||
if [ ! -f "$PGRUN_PATH" ]; then
|
||||
debug_log "pgrun not found at $PGRUN_PATH"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Run pgrun with --verbose --no-wait to get immediate status
|
||||
local pgrun_output
|
||||
local pgrun_exit_code
|
||||
|
||||
pgrun_output=$(node "$PGRUN_PATH" peekaboo-cli --verbose --no-wait --timeout 1000 2>&1)
|
||||
pgrun_exit_code=$?
|
||||
|
||||
debug_log "pgrun exit code: $pgrun_exit_code"
|
||||
debug_log "pgrun output: $pgrun_output"
|
||||
|
||||
# Parse pgrun output and provide specific guidance
|
||||
if [ $pgrun_exit_code -eq 0 ]; then
|
||||
# pgrun succeeded - binary should be fresh
|
||||
debug_log "pgrun health check passed"
|
||||
return 0
|
||||
else
|
||||
# pgrun failed - analyze the output for specific issues
|
||||
echo "🔍 Poltergeist Health Check Results:" >&2
|
||||
echo "" >&2
|
||||
|
||||
if echo "$pgrun_output" | grep -q "No poltergeist.config.json found"; then
|
||||
echo "❌ Poltergeist configuration not found" >&2
|
||||
echo " This usually means Poltergeist is not set up for this project." >&2
|
||||
echo "" >&2
|
||||
echo "🔧 TO FIX:" >&2
|
||||
echo " 1. Check if Poltergeist is running: npm run poltergeist:status" >&2
|
||||
echo " 2. Start Poltergeist: npm run poltergeist:haunt" >&2
|
||||
echo "" >&2
|
||||
|
||||
elif echo "$pgrun_output" | grep -q "Target.*not found"; then
|
||||
echo "❌ Target 'peekaboo-cli' not found in Poltergeist config" >&2
|
||||
echo " The Poltergeist configuration doesn't include the CLI target." >&2
|
||||
echo "" >&2
|
||||
echo "🔧 TO FIX:" >&2
|
||||
echo " 1. Check Poltergeist config: cat poltergeist.config.json" >&2
|
||||
echo " 2. Restart Poltergeist: npm run poltergeist:haunt" >&2
|
||||
echo "" >&2
|
||||
|
||||
elif echo "$pgrun_output" | grep -q "Build in progress"; then
|
||||
echo "⏳ Build currently in progress" >&2
|
||||
echo " Poltergeist is actively rebuilding the CLI." >&2
|
||||
echo "" >&2
|
||||
return 2 # Special code for "building"
|
||||
|
||||
elif echo "$pgrun_output" | grep -q "Last build failed"; then
|
||||
echo "❌ Last Poltergeist build failed" >&2
|
||||
echo "" >&2
|
||||
echo "🔧 TO FIX:" >&2
|
||||
echo " 1. Check build logs: npm run poltergeist:logs" >&2
|
||||
echo " 2. Manual build: npm run build:swift" >&2
|
||||
echo " 3. Restart Poltergeist: npm run poltergeist:haunt" >&2
|
||||
echo "" >&2
|
||||
|
||||
elif echo "$pgrun_output" | grep -q "Binary not found"; then
|
||||
echo "❌ CLI binary not found" >&2
|
||||
echo " The expected binary doesn't exist at the configured path." >&2
|
||||
echo "" >&2
|
||||
echo "🔧 TO FIX:" >&2
|
||||
echo " 1. Manual build: npm run build:swift" >&2
|
||||
echo " 2. Check Poltergeist status: npm run poltergeist:status" >&2
|
||||
echo "" >&2
|
||||
|
||||
else
|
||||
# Generic pgrun failure
|
||||
echo "❌ Poltergeist health check failed" >&2
|
||||
echo "" >&2
|
||||
echo "Raw pgrun output:" >&2
|
||||
echo "$pgrun_output" >&2
|
||||
echo "" >&2
|
||||
echo "🔧 TO FIX:" >&2
|
||||
echo " 1. Check Poltergeist status: npm run poltergeist:status" >&2
|
||||
echo " 2. Check build logs: npm run poltergeist:logs" >&2
|
||||
echo " 3. Manual build: npm run build:swift" >&2
|
||||
echo "" >&2
|
||||
fi
|
||||
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check if binary is newer than all Swift sources
|
||||
is_binary_fresh() {
|
||||
if [ ! -f "$BINARY_PATH" ]; then
|
||||
debug_log "Binary not found at $BINARY_PATH"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Get binary modification time
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
BINARY_TIME=$(stat -f "%m" "$BINARY_PATH" 2>/dev/null)
|
||||
else
|
||||
BINARY_TIME=$(stat -c "%Y" "$BINARY_PATH" 2>/dev/null)
|
||||
fi
|
||||
|
||||
debug_log "Binary modification time: $BINARY_TIME"
|
||||
|
||||
# Find newest source file modification time
|
||||
NEWEST_SOURCE=0
|
||||
NEWEST_FILE=""
|
||||
while IFS= read -r -d '' file; do
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
FILE_TIME=$(stat -f "%m" "$file" 2>/dev/null)
|
||||
else
|
||||
FILE_TIME=$(stat -c "%Y" "$file" 2>/dev/null)
|
||||
fi
|
||||
if [ "$FILE_TIME" -gt "$NEWEST_SOURCE" ]; then
|
||||
NEWEST_SOURCE=$FILE_TIME
|
||||
NEWEST_FILE="$file"
|
||||
fi
|
||||
done < <(find "$PROJECT_ROOT/Core/PeekabooCore/Sources" "$PROJECT_ROOT/Core/AXorcist/Sources" "$PROJECT_ROOT/Apps/CLI/Sources" -name "*.swift" -type f -print0 2>/dev/null)
|
||||
|
||||
debug_log "Newest source file: $NEWEST_FILE (time: $NEWEST_SOURCE)"
|
||||
|
||||
# Binary is fresh if it's newer than all source files
|
||||
if [ "$BINARY_TIME" -ge "$NEWEST_SOURCE" ]; then
|
||||
debug_log "Binary is fresh"
|
||||
return 0
|
||||
else
|
||||
debug_log "Binary is stale (older than source files)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check if a build is running
|
||||
is_build_running() {
|
||||
if [ -f "$BUILD_LOCK" ]; then
|
||||
PID=$(cat "$BUILD_LOCK" 2>/dev/null)
|
||||
if [ -n "$PID" ] && ps -p "$PID" > /dev/null 2>&1; then
|
||||
return 0
|
||||
else
|
||||
# Stale lock file
|
||||
debug_log "Removing stale build lock (PID $PID not running)"
|
||||
rm -f "$BUILD_LOCK"
|
||||
fi
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
|
||||
# Function to check build status from status file
|
||||
check_build_status_file() {
|
||||
if [ ! -f "$BUILD_STATUS" ]; then
|
||||
debug_log "No build status file found"
|
||||
return 2 # Unknown status
|
||||
fi
|
||||
|
||||
# Read status file
|
||||
local status=$(grep '"status"' "$BUILD_STATUS" 2>/dev/null | cut -d'"' -f4)
|
||||
local timestamp=$(grep '"timestamp"' "$BUILD_STATUS" 2>/dev/null | cut -d'"' -f4)
|
||||
local error_summary=$(grep '"error_summary"' "$BUILD_STATUS" 2>/dev/null | cut -d'"' -f4)
|
||||
|
||||
# Check age of status
|
||||
if [ -n "$timestamp" ]; then
|
||||
# Convert ISO timestamp to epoch
|
||||
local status_epoch=$(date -u -j -f "%Y-%m-%dT%H:%M:%SZ" "$timestamp" "+%s" 2>/dev/null || date -u -d "$timestamp" "+%s" 2>/dev/null || echo "0")
|
||||
local current_epoch=$(date +%s)
|
||||
local age=$((current_epoch - status_epoch))
|
||||
|
||||
# If status is older than 5 minutes, consider it stale
|
||||
if [ $age -gt 300 ]; then
|
||||
debug_log "Build status is stale (${age}s old)"
|
||||
return 2 # Unknown/stale status
|
||||
fi
|
||||
fi
|
||||
|
||||
case "$status" in
|
||||
"building")
|
||||
debug_log "Build status: currently building"
|
||||
return 3 # Building
|
||||
;;
|
||||
"success")
|
||||
debug_log "Build status: success"
|
||||
return 0 # Success
|
||||
;;
|
||||
"failed")
|
||||
debug_log "Build status: failed - $error_summary"
|
||||
echo "❌ POLTERGEIST BUILD FAILED" >&2
|
||||
echo "" >&2
|
||||
if [ -n "$error_summary" ]; then
|
||||
echo "Error: $error_summary" >&2
|
||||
else
|
||||
echo "Build failed. Check 'npm run poltergeist:logs' for details." >&2
|
||||
fi
|
||||
echo "" >&2
|
||||
echo "🔧 TO FIX: Run 'npm run build:swift' to see and fix the compilation errors." >&2
|
||||
echo " After fixing, the wrapper will automatically use the new binary." >&2
|
||||
echo "" >&2
|
||||
return 1 # Failed
|
||||
;;
|
||||
*)
|
||||
debug_log "Build status: unknown ($status)"
|
||||
return 2 # Unknown
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Main logic
|
||||
debug_log "Starting peekaboo-wait wrapper"
|
||||
debug_log "Binary path: $BINARY_PATH"
|
||||
debug_log "Build lock: $BUILD_LOCK"
|
||||
|
||||
# First, check if binary is already fresh
|
||||
if is_binary_fresh; then
|
||||
debug_log "Binary is fresh, executing immediately"
|
||||
exec "$BINARY_PATH" "$@"
|
||||
fi
|
||||
|
||||
# Binary is stale, check build status first
|
||||
debug_log "Binary is stale, checking build status"
|
||||
|
||||
# Check if there's a recent build failure
|
||||
check_build_status_file
|
||||
status_result=$?
|
||||
|
||||
if [ $status_result -eq 1 ]; then
|
||||
# Build failed - exit with special code to trigger manual rebuild
|
||||
exit 42 # Special exit code for build failure
|
||||
fi
|
||||
|
||||
# Check for ongoing build
|
||||
if ! is_build_running; then
|
||||
# No build running, but binary is stale
|
||||
if [ $status_result -eq 0 ]; then
|
||||
# Status says success but binary is stale - might be a race condition
|
||||
debug_log "Status shows success but binary is stale, proceeding anyway"
|
||||
else
|
||||
# Unknown status or stale - run pgrun health check for detailed diagnostics
|
||||
debug_log "Binary is stale and no build running, running pgrun health check"
|
||||
|
||||
run_pgrun_health_check
|
||||
pgrun_result=$?
|
||||
|
||||
if [ $pgrun_result -eq 0 ]; then
|
||||
# pgrun says everything is fine - proceed
|
||||
debug_log "pgrun health check passed, proceeding with execution"
|
||||
elif [ $pgrun_result -eq 2 ]; then
|
||||
# pgrun detected build in progress - wait for it
|
||||
debug_log "pgrun detected build in progress, will wait"
|
||||
else
|
||||
# pgrun detected issues - detailed diagnostics already printed
|
||||
debug_log "pgrun health check failed, diagnostics printed"
|
||||
echo "💡 The wrapper detected issues with the build system." >&2
|
||||
echo " Please follow the steps above to resolve the problem." >&2
|
||||
echo "" >&2
|
||||
echo " If issues persist, try a manual build: npm run build:swift" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Give Poltergeist a moment to detect the stale binary
|
||||
sleep 2
|
||||
fi
|
||||
fi
|
||||
|
||||
wait_count=0
|
||||
while is_build_running && [ $wait_count -lt $MAX_WAIT ]; do
|
||||
if [ $wait_count -eq 0 ]; then
|
||||
echo "🔨 Poltergeist is rebuilding the Swift CLI..." >&2
|
||||
fi
|
||||
sleep 1
|
||||
((wait_count++))
|
||||
|
||||
# Show progress with more helpful messages
|
||||
if [ $((wait_count % 10)) -eq 0 ] && [ $wait_count -gt 0 ]; then
|
||||
remaining=$((MAX_WAIT - wait_count))
|
||||
echo " Still building... (${wait_count}s elapsed, max ${remaining}s remaining)" >&2
|
||||
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $wait_count -ge $MAX_WAIT ]; then
|
||||
echo "⚠️ Build timeout reached (${MAX_WAIT}s / 5 minutes)." >&2
|
||||
echo " Check build status with: npm run poltergeist:status" >&2
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Final checks after waiting
|
||||
debug_log "Performing final checks after wait"
|
||||
|
||||
# Check build status file again
|
||||
check_build_status_file
|
||||
final_status=$?
|
||||
|
||||
if [ $final_status -eq 1 ]; then
|
||||
# Build failed - exit with special code
|
||||
exit 42 # Special exit code for build failure
|
||||
fi
|
||||
|
||||
# Final freshness check
|
||||
if is_binary_fresh; then
|
||||
debug_log "Binary is now fresh after waiting"
|
||||
else
|
||||
debug_log "Binary might still be stale, but proceeding"
|
||||
# If the binary exists but is stale, Poltergeist should pick it up
|
||||
# We'll run it anyway to avoid blocking
|
||||
fi
|
||||
|
||||
# Execute the binary if it exists
|
||||
if [ -f "$BINARY_PATH" ]; then
|
||||
debug_log "Executing: $BINARY_PATH $*"
|
||||
exec "$BINARY_PATH" "$@"
|
||||
else
|
||||
echo "❌ Binary not found at: $BINARY_PATH" >&2
|
||||
echo " This usually means the build failed." >&2
|
||||
echo " Check: npm run poltergeist:logs" >&2
|
||||
exit 1
|
||||
fi
|
||||
3
version.json
Normal file
3
version.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"version": "3.0.0"
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user